diff options
Diffstat (limited to 'tools/bpf/bpftool/feature.c')
-rw-r--r-- | tools/bpf/bpftool/feature.c | 314 |
1 files changed, 238 insertions, 76 deletions
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 88718ee6a438..768bf77df886 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -6,6 +6,9 @@ #include <string.h> #include <unistd.h> #include <net/if.h> +#ifdef USE_LIBCAP +#include <sys/capability.h> +#endif #include <sys/utsname.h> #include <sys/vfs.h> @@ -35,6 +38,11 @@ static const char * const helper_name[] = { #undef BPF_HELPER_MAKE_ENTRY +static bool full_mode; +#ifdef USE_LIBCAP +static bool run_as_unprivileged; +#endif + /* Miscellaneous utility functions */ static bool check_procfs(void) @@ -72,13 +80,12 @@ print_bool_feature(const char *feat_name, const char *plain_name, printf("%s is %savailable\n", plain_name, res ? "" : "NOT "); } -static void print_kernel_option(const char *name, const char *value) +static void print_kernel_option(const char *name, const char *value, + const char *define_prefix) { char *endptr; int res; - /* No support for C-style ouptut */ - if (json_output) { if (!value) { jsonw_null_field(json_wtr, name); @@ -90,6 +97,12 @@ static void print_kernel_option(const char *name, const char *value) jsonw_int_field(json_wtr, name, res); else jsonw_string_field(json_wtr, name, value); + } else if (define_prefix) { + if (value) + printf("#define %s%s %s\n", define_prefix, + name, value); + else + printf("/* %s%s is not set */\n", define_prefix, name); } else { if (value) printf("%s is set to %s\n", name, value); @@ -307,77 +320,84 @@ static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n, return false; } -static void probe_kernel_image_config(void) +static void probe_kernel_image_config(const char *define_prefix) { - static const char * const options[] = { + static const struct { + const char * const name; + bool macro_dump; + } options[] = { /* Enable BPF */ - "CONFIG_BPF", + { "CONFIG_BPF", }, /* Enable bpf() syscall */ - "CONFIG_BPF_SYSCALL", + { "CONFIG_BPF_SYSCALL", }, /* Does selected architecture support eBPF JIT compiler */ - "CONFIG_HAVE_EBPF_JIT", + { "CONFIG_HAVE_EBPF_JIT", }, /* Compile eBPF JIT compiler */ - "CONFIG_BPF_JIT", + { "CONFIG_BPF_JIT", }, /* Avoid compiling eBPF interpreter (use JIT only) */ - "CONFIG_BPF_JIT_ALWAYS_ON", + { "CONFIG_BPF_JIT_ALWAYS_ON", }, /* cgroups */ - "CONFIG_CGROUPS", + { "CONFIG_CGROUPS", }, /* BPF programs attached to cgroups */ - "CONFIG_CGROUP_BPF", + { "CONFIG_CGROUP_BPF", }, /* bpf_get_cgroup_classid() helper */ - "CONFIG_CGROUP_NET_CLASSID", + { "CONFIG_CGROUP_NET_CLASSID", }, /* bpf_skb_{,ancestor_}cgroup_id() helpers */ - "CONFIG_SOCK_CGROUP_DATA", + { "CONFIG_SOCK_CGROUP_DATA", }, /* Tracing: attach BPF to kprobes, tracepoints, etc. */ - "CONFIG_BPF_EVENTS", + { "CONFIG_BPF_EVENTS", }, /* Kprobes */ - "CONFIG_KPROBE_EVENTS", + { "CONFIG_KPROBE_EVENTS", }, /* Uprobes */ - "CONFIG_UPROBE_EVENTS", + { "CONFIG_UPROBE_EVENTS", }, /* Tracepoints */ - "CONFIG_TRACING", + { "CONFIG_TRACING", }, /* Syscall tracepoints */ - "CONFIG_FTRACE_SYSCALLS", + { "CONFIG_FTRACE_SYSCALLS", }, /* bpf_override_return() helper support for selected arch */ - "CONFIG_FUNCTION_ERROR_INJECTION", + { "CONFIG_FUNCTION_ERROR_INJECTION", }, /* bpf_override_return() helper */ - "CONFIG_BPF_KPROBE_OVERRIDE", + { "CONFIG_BPF_KPROBE_OVERRIDE", }, /* Network */ - "CONFIG_NET", + { "CONFIG_NET", }, /* AF_XDP sockets */ - "CONFIG_XDP_SOCKETS", + { "CONFIG_XDP_SOCKETS", }, /* BPF_PROG_TYPE_LWT_* and related helpers */ - "CONFIG_LWTUNNEL_BPF", + { "CONFIG_LWTUNNEL_BPF", }, /* BPF_PROG_TYPE_SCHED_ACT, TC (traffic control) actions */ - "CONFIG_NET_ACT_BPF", + { "CONFIG_NET_ACT_BPF", }, /* BPF_PROG_TYPE_SCHED_CLS, TC filters */ - "CONFIG_NET_CLS_BPF", + { "CONFIG_NET_CLS_BPF", }, /* TC clsact qdisc */ - "CONFIG_NET_CLS_ACT", + { "CONFIG_NET_CLS_ACT", }, /* Ingress filtering with TC */ - "CONFIG_NET_SCH_INGRESS", + { "CONFIG_NET_SCH_INGRESS", }, /* bpf_skb_get_xfrm_state() helper */ - "CONFIG_XFRM", + { "CONFIG_XFRM", }, /* bpf_get_route_realm() helper */ - "CONFIG_IP_ROUTE_CLASSID", + { "CONFIG_IP_ROUTE_CLASSID", }, /* BPF_PROG_TYPE_LWT_SEG6_LOCAL and related helpers */ - "CONFIG_IPV6_SEG6_BPF", + { "CONFIG_IPV6_SEG6_BPF", }, /* BPF_PROG_TYPE_LIRC_MODE2 and related helpers */ - "CONFIG_BPF_LIRC_MODE2", + { "CONFIG_BPF_LIRC_MODE2", }, /* BPF stream parser and BPF socket maps */ - "CONFIG_BPF_STREAM_PARSER", + { "CONFIG_BPF_STREAM_PARSER", }, /* xt_bpf module for passing BPF programs to netfilter */ - "CONFIG_NETFILTER_XT_MATCH_BPF", + { "CONFIG_NETFILTER_XT_MATCH_BPF", }, /* bpfilter back-end for iptables */ - "CONFIG_BPFILTER", + { "CONFIG_BPFILTER", }, /* bpftilter module with "user mode helper" */ - "CONFIG_BPFILTER_UMH", + { "CONFIG_BPFILTER_UMH", }, /* test_bpf module for BPF tests */ - "CONFIG_TEST_BPF", + { "CONFIG_TEST_BPF", }, + + /* Misc configs useful in BPF C programs */ + /* jiffies <-> sec conversion for bpf_jiffies64() helper */ + { "CONFIG_HZ", true, } }; char *values[ARRAY_SIZE(options)] = { }; struct utsname utsn; @@ -419,7 +439,8 @@ static void probe_kernel_image_config(void) while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) { for (i = 0; i < ARRAY_SIZE(options); i++) { - if (values[i] || strcmp(buf, options[i])) + if ((define_prefix && !options[i].macro_dump) || + values[i] || strcmp(buf, options[i].name)) continue; values[i] = strdup(value); @@ -431,7 +452,9 @@ end_parse: gzclose(file); for (i = 0; i < ARRAY_SIZE(options); i++) { - print_kernel_option(options[i], values[i]); + if (define_prefix && !options[i].macro_dump) + continue; + print_kernel_option(options[i].name, values[i], define_prefix); free(values[i]); } } @@ -471,6 +494,13 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, } res = bpf_probe_prog_type(prog_type, ifindex); +#ifdef USE_LIBCAP + /* Probe may succeed even if program load fails, for unprivileged users + * check that we did not fail because of insufficient permissions + */ + if (run_as_unprivileged && errno == EPERM) + res = false; +#endif supported_types[prog_type] |= res; @@ -499,6 +529,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, res = bpf_probe_map_type(map_type, ifindex); + /* Probe result depends on the success of map creation, no additional + * check required for unprivileged users + */ + maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; if (strlen(map_type_name[map_type]) > maxlen) { p_info("map type name too long"); @@ -518,12 +552,19 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, const char *define_prefix, unsigned int id, const char *ptype_name, __u32 ifindex) { - bool res; + bool res = false; - if (!supported_type) - res = false; - else + if (supported_type) { res = bpf_probe_helper(id, prog_type, ifindex); +#ifdef USE_LIBCAP + /* Probe may succeed even if program load fails, for + * unprivileged users check that we did not fail because of + * insufficient permissions + */ + if (run_as_unprivileged && errno == EPERM) + res = false; +#endif + } if (json_output) { if (res) @@ -540,8 +581,7 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, static void probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, - const char *define_prefix, bool full_mode, - __u32 ifindex) + const char *define_prefix, __u32 ifindex) { const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; @@ -607,23 +647,22 @@ section_system_config(enum probe_component target, const char *define_prefix) switch (target) { case COMPONENT_KERNEL: case COMPONENT_UNSPEC: - if (define_prefix) - break; - print_start_section("system_config", "Scanning system configuration...", - NULL, /* define_comment never used here */ - NULL); /* define_prefix always NULL here */ - if (check_procfs()) { - probe_unprivileged_disabled(); - probe_jit_enable(); - probe_jit_harden(); - probe_jit_kallsyms(); - probe_jit_limit(); - } else { - p_info("/* procfs not mounted, skipping related probes */"); + "/*** Misc kernel config items ***/", + define_prefix); + if (!define_prefix) { + if (check_procfs()) { + probe_unprivileged_disabled(); + probe_jit_enable(); + probe_jit_harden(); + probe_jit_kallsyms(); + probe_jit_limit(); + } else { + p_info("/* procfs not mounted, skipping related probes */"); + } } - probe_kernel_image_config(); + probe_kernel_image_config(define_prefix); print_end_section(); break; default: @@ -678,8 +717,7 @@ static void section_map_types(const char *define_prefix, __u32 ifindex) } static void -section_helpers(bool *supported_types, const char *define_prefix, - bool full_mode, __u32 ifindex) +section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) { unsigned int i; @@ -704,8 +742,8 @@ section_helpers(bool *supported_types, const char *define_prefix, define_prefix, define_prefix, define_prefix, define_prefix); for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) - probe_helpers_for_progtype(i, supported_types[i], - define_prefix, full_mode, ifindex); + probe_helpers_for_progtype(i, supported_types[i], define_prefix, + ifindex); print_end_section(); } @@ -720,23 +758,133 @@ static void section_misc(const char *define_prefix, __u32 ifindex) print_end_section(); } +#ifdef USE_LIBCAP +#define capability(c) { c, false, #c } +#define capability_msg(a, i) a[i].set ? "" : a[i].name, a[i].set ? "" : ", " +#endif + +static int handle_perms(void) +{ +#ifdef USE_LIBCAP + struct { + cap_value_t cap; + bool set; + char name[14]; /* strlen("CAP_SYS_ADMIN") */ + } bpf_caps[] = { + capability(CAP_SYS_ADMIN), +#ifdef CAP_BPF + capability(CAP_BPF), + capability(CAP_NET_ADMIN), + capability(CAP_PERFMON), +#endif + }; + cap_value_t cap_list[ARRAY_SIZE(bpf_caps)]; + unsigned int i, nb_bpf_caps = 0; + bool cap_sys_admin_only = true; + cap_flag_value_t val; + int res = -1; + cap_t caps; + + caps = cap_get_proc(); + if (!caps) { + p_err("failed to get capabilities for process: %s", + strerror(errno)); + return -1; + } + +#ifdef CAP_BPF + if (CAP_IS_SUPPORTED(CAP_BPF)) + cap_sys_admin_only = false; +#endif + + for (i = 0; i < ARRAY_SIZE(bpf_caps); i++) { + const char *cap_name = bpf_caps[i].name; + cap_value_t cap = bpf_caps[i].cap; + + if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val)) { + p_err("bug: failed to retrieve %s status: %s", cap_name, + strerror(errno)); + goto exit_free; + } + + if (val == CAP_SET) { + bpf_caps[i].set = true; + cap_list[nb_bpf_caps++] = cap; + } + + if (cap_sys_admin_only) + /* System does not know about CAP_BPF, meaning that + * CAP_SYS_ADMIN is the only capability required. We + * just checked it, break. + */ + break; + } + + if ((run_as_unprivileged && !nb_bpf_caps) || + (!run_as_unprivileged && nb_bpf_caps == ARRAY_SIZE(bpf_caps)) || + (!run_as_unprivileged && cap_sys_admin_only && nb_bpf_caps)) { + /* We are all good, exit now */ + res = 0; + goto exit_free; + } + + if (!run_as_unprivileged) { + if (cap_sys_admin_only) + p_err("missing %s, required for full feature probing; run as root or use 'unprivileged'", + bpf_caps[0].name); + else + p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'", + capability_msg(bpf_caps, 0), + capability_msg(bpf_caps, 1), + capability_msg(bpf_caps, 2), + capability_msg(bpf_caps, 3)); + goto exit_free; + } + + /* if (run_as_unprivileged && nb_bpf_caps > 0), drop capabilities. */ + if (cap_set_flag(caps, CAP_EFFECTIVE, nb_bpf_caps, cap_list, + CAP_CLEAR)) { + p_err("bug: failed to clear capabilities: %s", strerror(errno)); + goto exit_free; + } + + if (cap_set_proc(caps)) { + p_err("failed to drop capabilities: %s", strerror(errno)); + goto exit_free; + } + + res = 0; + +exit_free: + if (cap_free(caps) && !res) { + p_err("failed to clear storage object for capabilities: %s", + strerror(errno)); + res = -1; + } + + return res; +#else + /* Detection assumes user has specific privileges. + * We do not use libpcap so let's approximate, and restrict usage to + * root user only. + */ + if (geteuid()) { + p_err("full feature probing requires root privileges"); + return -1; + } + + return 0; +#endif /* USE_LIBCAP */ +} + static int do_probe(int argc, char **argv) { enum probe_component target = COMPONENT_UNSPEC; const char *define_prefix = NULL; bool supported_types[128] = {}; - bool full_mode = false; __u32 ifindex = 0; char *ifname; - /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN). - * Let's approximate, and restrict usage to root user only. - */ - if (geteuid()) { - p_err("please run this command as root user"); - return -1; - } - set_max_rlimit(); while (argc) { @@ -785,6 +933,14 @@ static int do_probe(int argc, char **argv) if (!REQ_ARGS(1)) return -1; define_prefix = GET_ARG(); + } else if (is_prefix(*argv, "unprivileged")) { +#ifdef USE_LIBCAP + run_as_unprivileged = true; + NEXT_ARG(); +#else + p_err("unprivileged run not supported, recompile bpftool with libcap"); + return -1; +#endif } else { p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?", *argv); @@ -792,6 +948,12 @@ static int do_probe(int argc, char **argv) } } + /* Full feature detection requires specific privileges. + * Let's approximate, and warn if user is not root. + */ + if (handle_perms()) + return -1; + if (json_output) { define_prefix = NULL; jsonw_start_object(json_wtr); @@ -803,7 +965,7 @@ static int do_probe(int argc, char **argv) goto exit_close_json; section_program_types(supported_types, define_prefix, ifindex); section_map_types(define_prefix, ifindex); - section_helpers(supported_types, define_prefix, full_mode, ifindex); + section_helpers(supported_types, define_prefix, ifindex); section_misc(define_prefix, ifindex); exit_close_json: @@ -822,12 +984,12 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n" - " %s %s help\n" + "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n" + " %1$s %2$s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" "", - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2]); return 0; } |