From ad6330ac2c5a38e5573cb6ae8ff75288bfd96325 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:38:16 -0300 Subject: tools headers UAPI: Sync prctl.h with the kernel sources To get the changes in: 1c101da8b971a366 ("arm64: mte: Allow user control of the tag check mode via prctl()") af5ce95282dc99d0 ("arm64: mte: Allow user control of the generated random tags via prctl()") Which don't cause any change in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Cc: Adrian Hunter Cc: Catalin Marinas Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3-70-g09d2 From d0448d6a249b6fc4518181b214d3403dfe2c8075 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:12:52 -0300 Subject: tools headers UAPI: Update fscrypt.h copy To get the changes from: c7f0207b613033c5 ("fscrypt: make "#define fscrypt_policy" user-only") That don't cause any changes in tools/perf, only addresses this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 -- cgit v1.2.3-70-g09d2 From 97a3863b170e38a8eefc07a72d418a81fd225216 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:36:41 -0300 Subject: tools UAPI: Update copy of linux/mman.h from the kernel sources e47168f3d1b14af5 ("powerpc/8xx: Support 16k hugepages with 4k pages") That don't cause any changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Adrian Hunter Cc: Christophe Leroy Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB -- cgit v1.2.3-70-g09d2 From aa04899a13078e4181146212555a1bbaa387d2c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:42:55 -0300 Subject: tools kvm headers: Update KVM headers from the kernel sources Some should cause changes in tooling, like the one adding LAST_EXCP, but the way it is structured end up not making that happen. The new SVM_EXIT_INVPCID should get used by arch/x86/util/kvm-stat.c, in the svm_exit_reasons table. The tools/perf/trace/beauty part has scripts to catch changes and automagically create tables, like tools/perf/trace/beauty/kvm_ioctl.sh, but changes are needed to make tools/perf/arch/x86/util/kvm-stat.c catch those automatically. These were handled by the existing scripts: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:43:52.910728608 -0300 +++ after 2020-11-03 08:44:04.273959984 -0300 @@ -89,6 +89,7 @@ [0xbf] = "SET_NESTED_STATE", [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", + [0xc6] = "X86_SET_MSR_FILTER", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:45:55.522225198 -0300 +++ after 2020-11-03 08:46:12.881578666 -0300 @@ -37,4 +37,5 @@ [0x71] = "VDPA_GET_STATUS", [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", + [0x78] = "VDPA_GET_IOVA_RANGE", }; $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/sie.h' differs from latest version at 'arch/s390/include/uapi/asm/sie.h' diff -u tools/arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/sie.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: Alexander Yarygin Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Cornelia Huck Cc: David Ahern Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 25 +++++++++++++++++++++++++ tools/arch/s390/include/uapi/asm/sie.h | 2 +- tools/arch/x86/include/uapi/asm/kvm.h | 20 ++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 13 +++++++++++++ tools/include/uapi/linux/kvm.h | 19 +++++++++++++++++++ tools/include/uapi/linux/vhost.h | 4 ++++ 6 files changed, 82 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..f1d8307454e0 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -76,10 +77,21 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ @@ -171,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..ca41220b40b8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,17 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING @@ -1538,6 +1554,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif -- cgit v1.2.3-70-g09d2 From a9e27f5f9827eab25b76155fddcc22ddeeed58d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:49:59 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h The diff is just tabs versus spaces, trivial. This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..b95d3c485d27 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ -- cgit v1.2.3-70-g09d2 From 42cc0e70a21faa8e7d7ea8713a3f9cd64bd3f60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:52:11 -0300 Subject: tools include UAPI: Update linux/mount.h copy To pick the changes from: dab741e0e02bd3c4 ("Add a "nosymfollow" mount option.") That ends up adding support for the new MS_NOSYMFOLLOW mount flag: $ tools/perf/trace/beauty/mount_flags.sh > before $ cp include/uapi/linux/mount.h tools/include/uapi/linux/mount.h $ tools/perf/trace/beauty/mount_flags.sh > after $ diff -u before after --- before 2020-11-03 08:51:28.117997454 -0300 +++ after 2020-11-03 08:51:38.992218869 -0300 @@ -7,6 +7,7 @@ [32 ? (ilog2(32) + 1) : 0] = "REMOUNT", [64 ? (ilog2(64) + 1) : 0] = "MANDLOCK", [128 ? (ilog2(128) + 1) : 0] = "DIRSYNC", + [256 ? (ilog2(256) + 1) : 0] = "NOSYMFOLLOW", [1024 ? (ilog2(1024) + 1) : 0] = "NOATIME", [2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME", [4096 ? (ilog2(4096) + 1) : 0] = "BIND", $ So now one can use it in --filter expressions for tracepoints. This silences this perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/linux/mount.h' differs from latest version at 'include/uapi/linux/mount.h' diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Al Viro Cc: Ian Rogers Cc: Jiri Olsa Cc: Mattias Nissler Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 -- cgit v1.2.3-70-g09d2 From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:40 +0000 Subject: bpf: Implement task local storage Similar to bpf_local_storage for sockets and inodes add local storage for task_struct. The life-cycle of storage is managed with the life-cycle of the task_struct. i.e. the storage is destroyed along with the owning task with a callback to the bpf_task_storage_free from the task_free LSM hook. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. The userspace map operations can be done by using a pid fd as a key passed to the lookup, update and delete operations. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 23 +++ include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 39 +++++ kernel/bpf/Makefile | 1 + kernel/bpf/bpf_lsm.c | 4 + kernel/bpf/bpf_task_storage.c | 315 +++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 3 +- kernel/bpf/verifier.c | 10 ++ security/bpf/hooks.c | 2 + tools/include/uapi/linux/bpf.h | 39 +++++ 10 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/bpf_task_storage.c (limited to 'tools/include/uapi/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aaacb6aafc87..73226181b744 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -7,6 +7,7 @@ #ifndef _LINUX_BPF_LSM_H #define _LINUX_BPF_LSM_H +#include #include #include @@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + if (unlikely(!task->security)) + return NULL; + + return task->security + bpf_lsm_blob_sizes.lbs_task; +} + extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + return NULL; +} + static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2e6f568377f1..99f7fd657d87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..f4037b2161a6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,42 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3937,8 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index bdc8cd1b6767..f0b93ced5a7f 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-${CONFIG_BPF_LSM} += bpf_task_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index cd8a617f2109..e92c51bebb47 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -63,6 +63,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; + case BPF_FUNC_task_storage_get: + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + return &bpf_task_storage_delete_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c new file mode 100644 index 000000000000..39a45fba4fb0 --- /dev/null +++ b/kernel/bpf/bpf_task_storage.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Facebook + * Copyright 2020 Google LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_BPF_STORAGE_CACHE(task_cache); + +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) +{ + struct task_struct *task = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data * +task_storage_lookup(struct task_struct *task, struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + + task_storage = rcu_dereference(bsb->storage); + if (!task_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); +} + +void bpf_task_storage_free(struct task_struct *task) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_task_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_task(task); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Neither the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_task_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_task_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_task_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = task_storage_lookup(task, map, true); + put_pid(pid); + return sdata ? sdata->data : NULL; +out: + put_pid(pid); + return ERR_PTR(err); +} + +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, map_flags); + + err = PTR_ERR_OR_ZERO(sdata); +out: + put_pid(pid); + return err; +} + +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = task_storage_lookup(task, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + err = task_storage_delete(task, map); +out: + put_pid(pid); + return err; +} + +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the task_storage_ptr is not + * NULL as task_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!task_storage_ptr(task)) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + return task_storage_delete(task, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + return &smap->map; +} + +static void task_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int task_storage_map_btf_id; +const struct bpf_map_ops task_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = task_storage_map_alloc, + .map_free = task_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, + .map_update_elem = bpf_pid_task_storage_update_elem, + .map_delete_elem = bpf_pid_task_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &task_storage_map_btf_id, + .map_owner_storage_ptr = task_storage_ptr, +}; + +BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) + +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f50c9c19f1b..f3fe9f53f93c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE && - map->map_type != BPF_MAP_TYPE_INODE_STORAGE) + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && + map->map_type != BPF_MAP_TYPE_TASK_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f863aa84d0a2..00960f6a83ec 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4469,6 +4469,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_inode_storage_delete) goto error; break; + case BPF_MAP_TYPE_TASK_STORAGE: + if (func_id != BPF_FUNC_task_storage_get && + func_id != BPF_FUNC_task_storage_delete) + goto error; + break; default: break; } @@ -4547,6 +4552,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) goto error; break; + case BPF_FUNC_task_storage_get: + case BPF_FUNC_task_storage_delete: + if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) + goto error; + break; default: break; } diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index 788667d582ae..e5971fa74fd7 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -12,6 +12,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = { #include #undef LSM_HOOK LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), + LSM_HOOK_INIT(task_free, bpf_task_storage_free), }; static int __init bpf_lsm_init(void) @@ -23,6 +24,7 @@ static int __init bpf_lsm_init(void) struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { .lbs_inode = sizeof(struct bpf_storage_blob), + .lbs_task = sizeof(struct bpf_storage_blob), }; DEFINE_LSM(bpf) = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..f4037b2161a6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,42 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3937,8 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-70-g09d2 From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:43 +0000 Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID The currently available bpf_get_current_task returns an unsigned integer which can be used along with BPF_CORE_READ to read data from the task_struct but still cannot be used as an input argument to a helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct. In order to implement this helper a new return type, RET_PTR_TO_BTF_ID, is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not require checking the nullness of returned pointer. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/verifier.c | 7 +++++-- kernel/trace/bpf_trace.c | 16 ++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 5 files changed, 40 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2fffd30e13ac..73d5381a5d5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ enum bpf_return_type { RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4037b2161a6..9879d6793e90 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3779,6 +3779,14 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3939,6 +3947,7 @@ union bpf_attr { FN(redirect_peer), \ FN(task_storage_get), \ FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 00960f6a83ec..10da26e55130 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5186,11 +5186,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_BTF_ID) { int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? + PTR_TO_BTF_ID : + PTR_TO_BTF_ID_OR_NULL; ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { verbose(env, "invalid return type %d of func %s#%d\n", diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4517c8b66518..e4515b0f62a8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1022,6 +1022,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_current_task_btf) +{ + return (unsigned long) current; +} + +BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct) + +static const struct bpf_func_proto bpf_get_current_task_btf_proto = { + .func = bpf_get_current_task_btf, + .gpl_only = true, + .ret_type = RET_PTR_TO_BTF_ID, + .ret_btf_id = &bpf_get_current_btf_ids[0], +}; + BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -1265,6 +1279,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; + case BPF_FUNC_get_current_task_btf: + return &bpf_get_current_task_btf_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f4037b2161a6..9879d6793e90 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3779,6 +3779,14 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3939,6 +3947,7 @@ union bpf_attr { FN(redirect_peer), \ FN(task_storage_get), \ FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-70-g09d2 From 5329722057d41aebc31e391907a501feaa42f7d9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Nov 2020 17:19:29 -0800 Subject: bpf: Assign ID to vmlinux BTF and return extra info for BTF in GET_OBJ_INFO Allocate ID for vmlinux BTF. This makes it visible when iterating over all BTF objects in the system. To allow distinguishing vmlinux BTF (and later kernel module BTF) from user-provided BTFs, expose extra kernel_btf flag, as well as BTF name ("vmlinux" for vmlinux BTF, will equal to module's name for module BTF). We might want to later allow specifying BTF name for user-provided BTFs as well, if that makes sense. But currently this is reserved only for in-kernel BTFs. Having in-kernel BTFs exposed IDs will allow to extend BPF APIs that require in-kernel BTF type with ability to specify BTF types from kernel modules, not just vmlinux BTF. This will be implemented in a follow up patch set for fentry/fexit/fmod_ret/lsm/etc. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201110011932.3201430-3-andrii@kernel.org --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/btf.c | 43 +++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 3 +++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9879d6793e90..162999b12790 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4466,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 727c1c27053f..856585db7aa7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -214,6 +214,8 @@ struct btf { struct btf *base_btf; u32 start_id; /* first type ID in this BTF (0 for base BTF) */ u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; }; enum verifier_phase { @@ -4429,6 +4431,8 @@ struct btf *btf_parse_vmlinux(void) btf->data = __start_BTF; btf->data_size = __stop_BTF - __start_BTF; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "vmlinux"); err = btf_parse_hdr(env); if (err) @@ -4454,8 +4458,13 @@ struct btf *btf_parse_vmlinux(void) bpf_struct_ops_init(btf, log); - btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); + + err = btf_alloc_id(btf); + if (err) + goto errout; + + btf_verifier_env_free(env); return btf; errout: @@ -5553,7 +5562,9 @@ int btf_get_info_by_fd(const struct btf *btf, struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; - u32 uinfo_len; + char __user *uname; + u32 uinfo_len, uname_len, name_len; + int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; @@ -5570,11 +5581,37 @@ int btf_get_info_by_fd(const struct btf *btf, return -EFAULT; info.btf_size = btf->data_size; + info.kernel_btf = btf->kernel_btf; + + uname = u64_to_user_ptr(info.name); + uname_len = info.name_len; + if (!uname ^ !uname_len) + return -EINVAL; + + name_len = strlen(btf->name); + info.name_len = name_len; + + if (uname) { + if (uname_len >= name_len + 1) { + if (copy_to_user(uname, btf->name, name_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(uname, btf->name, uname_len - 1)) + return -EFAULT; + if (put_user(zero, uname + uname_len - 1)) + return -EFAULT; + /* let user-space know about too short buffer */ + ret = -ENOSPC; + } + } + if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; - return 0; + return ret; } int btf_get_fd_by_id(u32 id) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9879d6793e90..162999b12790 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4466,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { -- cgit v1.2.3-70-g09d2 From 3f6719c7b62f0327c9091e26d0da10e65668229e Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 17 Nov 2020 23:29:28 +0000 Subject: bpf: Add bpf_bprm_opts_set helper The helper allows modification of certain bits on the linux_binprm struct starting with the secureexec bit which can be updated using the BPF_F_BPRM_SECUREEXEC flag. secureexec can be set by the LSM for privilege gaining executions to set the AT_SECURE auxv for glibc. When set, the dynamic linker disables the use of certain environment variables (like LD_PRELOAD). Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117232929.2156341-1-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ kernel/bpf/bpf_lsm.c | 26 ++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 162999b12790..a52299b80b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3787,6 +3787,16 @@ union bpf_attr { * *ARG_PTR_TO_BTF_ID* of type *task_struct*. * Return * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3948,6 +3958,7 @@ union bpf_attr { FN(task_storage_get), \ FN(task_storage_delete), \ FN(get_current_task_btf), \ + FN(bprm_opts_set), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 553107f4706a..b4f27a874092 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,29 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return 0; } +/* Mask for all the currently supported BPRM option flags */ +#define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC + +BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags) +{ + if (flags & ~BPF_F_BRPM_OPTS_MASK) + return -EINVAL; + + bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC); + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm) + +const static struct bpf_func_proto bpf_bprm_opts_set_proto = { + .func = bpf_bprm_opts_set, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0], + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -71,6 +95,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: return &bpf_task_storage_delete_proto; + case BPF_FUNC_bprm_opts_set: + return &bpf_bprm_opts_set_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 31484377b8b1..c5bc947a70ad 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -418,6 +418,7 @@ class PrinterHelpers(Printer): 'struct bpf_tcp_sock', 'struct bpf_tunnel_key', 'struct bpf_xfrm_state', + 'struct linux_binprm', 'struct pt_regs', 'struct sk_reuseport_md', 'struct sockaddr', @@ -465,6 +466,7 @@ class PrinterHelpers(Printer): 'struct bpf_tcp_sock', 'struct bpf_tunnel_key', 'struct bpf_xfrm_state', + 'struct linux_binprm', 'struct pt_regs', 'struct sk_reuseport_md', 'struct sockaddr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 162999b12790..a52299b80b9d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3787,6 +3787,16 @@ union bpf_attr { * *ARG_PTR_TO_BTF_ID* of type *task_struct*. * Return * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3948,6 +3958,7 @@ union bpf_attr { FN(task_storage_get), \ FN(task_storage_delete), \ FN(get_current_task_btf), \ + FN(bprm_opts_set), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ -- cgit v1.2.3-70-g09d2 From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 17 Nov 2020 18:45:49 +0000 Subject: bpf: Add bpf_ktime_get_coarse_ns helper The helper uses CLOCK_MONOTONIC_COARSE source of time that is less accurate but more performant. We have a BPF CGROUP_SKB firewall that supports event logging through bpf_perf_event_output(). Each event has a timestamp and currently we use bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20 ns in time required for event logging. bpf_ktime_get_ns(): EgressLogByRemoteEndpoint 113.82ns 8.79M bpf_ktime_get_coarse_ns(): EgressLogByRemoteEndpoint 95.40ns 10.48M Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 13 +++++++++++++ kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 11 +++++++++++ 6 files changed, 39 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 581b2a2e78eb..e1bcb6d7345c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a52299b80b9d..3ca6146f001a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3797,6 +3797,16 @@ union bpf_attr { * is cleared if the flag is not specified. * Return * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3959,6 +3969,7 @@ union bpf_attr { FN(task_storage_delete), \ FN(get_current_task_btf), \ FN(bprm_opts_set), \ + FN(ktime_get_coarse_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 55454d2278b1..ff55cbcfbab4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2211,6 +2211,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 25520f5eeaf6..2c395deae279 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -167,6 +167,17 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_ktime_get_coarse_ns) +{ + return ktime_get_coarse_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { + .func = bpf_ktime_get_coarse_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -685,6 +696,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_coarse_ns: + return &bpf_ktime_get_coarse_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 02986c7b90eb..d255bc9b2bfa 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1280,6 +1280,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_coarse_ns: + return &bpf_ktime_get_coarse_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_current_pid_tgid: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a52299b80b9d..3ca6146f001a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3797,6 +3797,16 @@ union bpf_attr { * is cleared if the flag is not specified. * Return * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3959,6 +3969,7 @@ union bpf_attr { FN(task_storage_delete), \ FN(get_current_task_btf), \ FN(bprm_opts_set), \ + FN(ktime_get_coarse_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-70-g09d2 From 27672f0d280a3f286a410a8db2004f46ace72a17 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 24 Nov 2020 15:12:09 +0000 Subject: bpf: Add a BPF helper for getting the IMA hash of an inode Provide a wrapper function to get the IMA hash of an inode. This helper is useful in fingerprinting files (e.g executables on execution) and using these fingerprints in detections like an executable unlinking itself. Since the ima_inode_hash can sleep, it's only allowed for sleepable LSM hooks. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201124151210.1081188-3-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 11 +++++++++++ kernel/bpf/bpf_lsm.c | 26 ++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 11 +++++++++++ 4 files changed, 50 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3ca6146f001a..c3458ec1f30a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3807,6 +3807,16 @@ union bpf_attr { * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) * Return * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's avaialable). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3970,6 +3980,7 @@ union bpf_attr { FN(get_current_task_btf), \ FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ + FN(ima_inode_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index b4f27a874092..70e5e0b6d69d 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -15,6 +15,7 @@ #include #include #include +#include /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -75,6 +76,29 @@ const static struct bpf_func_proto bpf_bprm_opts_set_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size) +{ + return ima_inode_hash(inode, dst, size); +} + +static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog) +{ + return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); +} + +BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode) + +const static struct bpf_func_proto bpf_ima_inode_hash_proto = { + .func = bpf_ima_inode_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0], + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, + .allowed = bpf_ima_inode_hash_allowed, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -97,6 +121,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_delete_proto; case BPF_FUNC_bprm_opts_set: return &bpf_bprm_opts_set_proto; + case BPF_FUNC_ima_inode_hash: + return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index c5bc947a70ad..8b829748d488 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -436,6 +436,7 @@ class PrinterHelpers(Printer): 'struct xdp_md', 'struct path', 'struct btf_ptr', + 'struct inode', ] known_types = { '...', @@ -480,6 +481,7 @@ class PrinterHelpers(Printer): 'struct task_struct', 'struct path', 'struct btf_ptr', + 'struct inode', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3ca6146f001a..c3458ec1f30a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3807,6 +3807,16 @@ union bpf_attr { * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) * Return * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's avaialable). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3970,6 +3980,7 @@ union bpf_attr { FN(get_current_task_btf), \ FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ + FN(ima_inode_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-70-g09d2 From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001 From: Thomas Karlsson Date: Wed, 2 Dec 2020 19:49:58 +0100 Subject: macvlan: Support for high multicast packet rate Background: Broadcast and multicast packages are enqueued for later processing. This queue was previously hardcoded to 1000. This proved insufficient for handling very high packet rates. This resulted in packet drops for multicast. While at the same time unicast worked fine. The change: This patch make the queue length adjustable to accommodate for environments with very high multicast packet rate. But still keeps the default value of 1000 unless specified. The queue length is specified as a request per macvlan using the IFLA_MACVLAN_BC_QUEUE_LEN parameter. The actual used queue length will then be the maximum of any macvlan connected to the same port. The actual used queue length for the port can be retrieved (read only) by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification. This will be followed up by a patch to iproute2 in order to adjust the parameter from userspace. Signed-off-by: Thomas Karlsson Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 40 ++++++++++++++++++++++++++++++++++++-- include/linux/if_macvlan.h | 1 + include/uapi/linux/if_link.h | 2 ++ tools/include/uapi/linux/if_link.h | 2 ++ 4 files changed, 43 insertions(+), 2 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d9b6c44a5911..fb51329f8964 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -35,7 +35,7 @@ #define MACVLAN_HASH_BITS 8 #define MACVLAN_HASH_SIZE (1<cb[0])) static void macvlan_port_destroy(struct net_device *dev); +static void update_port_bc_queue_len(struct macvlan_port *port); static inline bool macvlan_passthru(const struct macvlan_port *port) { @@ -354,7 +356,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, MACVLAN_SKB_CB(nskb)->src = src; spin_lock(&port->bc_queue.lock); - if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) { + if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) { if (src) dev_hold(src->dev); __skb_queue_tail(&port->bc_queue, nskb); @@ -1218,6 +1220,7 @@ static int macvlan_port_create(struct net_device *dev) for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_source_hash[i]); + port->bc_queue_len_used = 0; skb_queue_head_init(&port->bc_queue); INIT_WORK(&port->bc_work, macvlan_process_broadcast); @@ -1486,6 +1489,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto destroy_macvlan_port; } + vlan->bc_queue_len_req = MACVLAN_DEFAULT_BC_QUEUE_LEN; + if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) + vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); + err = register_netdevice(dev); if (err < 0) goto destroy_macvlan_port; @@ -1496,6 +1503,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto unregister_netdev; list_add_tail_rcu(&vlan->list, &port->vlans); + update_port_bc_queue_len(vlan->port); netif_stacked_transfer_operstate(lowerdev, dev); linkwatch_fire_event(dev); @@ -1529,6 +1537,7 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head) if (vlan->mode == MACVLAN_MODE_SOURCE) macvlan_flush_sources(vlan->port, vlan); list_del_rcu(&vlan->list); + update_port_bc_queue_len(vlan->port); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(vlan->lowerdev, dev); } @@ -1572,6 +1581,12 @@ static int macvlan_changelink(struct net_device *dev, } vlan->flags = flags; } + + if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) { + vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]); + update_port_bc_queue_len(vlan->port); + } + if (set_mode) vlan->mode = mode; if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { @@ -1602,6 +1617,8 @@ static size_t macvlan_get_size(const struct net_device *dev) + nla_total_size(2) /* IFLA_MACVLAN_FLAGS */ + nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */ + macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */ + + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */ + + nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */ ); } @@ -1625,6 +1642,7 @@ static int macvlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; int i; struct nlattr *nest; @@ -1645,6 +1663,10 @@ static int macvlan_fill_info(struct sk_buff *skb, } nla_nest_end(skb, nest); } + if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1658,6 +1680,8 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { [IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED }, [IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 }, + [IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 }, + [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT }, }; int macvlan_link_register(struct rtnl_link_ops *ops) @@ -1688,6 +1712,18 @@ static struct rtnl_link_ops macvlan_link_ops = { .priv_size = sizeof(struct macvlan_dev), }; +static void update_port_bc_queue_len(struct macvlan_port *port) +{ + u32 max_bc_queue_len_req = 0; + struct macvlan_dev *vlan; + + list_for_each_entry(vlan, &port->vlans, list) { + if (vlan->bc_queue_len_req > max_bc_queue_len_req) + max_bc_queue_len_req = vlan->bc_queue_len_req; + } + port->bc_queue_len_used = max_bc_queue_len_req; +} + static int macvlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a367ead4bf4b..96556c64c95d 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -30,6 +30,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; unsigned int macaddr_count; + u32 bc_queue_len_req; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c4b23f06f69e..874cc12a34d9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -588,6 +588,8 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, __IFLA_MACVLAN_MAX, }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 781e482dc499..d208b2af697f 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -409,6 +409,8 @@ enum { IFLA_MACVLAN_MACADDR, IFLA_MACVLAN_MACADDR_DATA, IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, __IFLA_MACVLAN_MAX, }; -- cgit v1.2.3-70-g09d2 From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:30 -0800 Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs Add ability for user-space programs to specify non-vmlinux BTF when attaching BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this, attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD of a module or vmlinux BTF object. For backwards compatibility reasons, 0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org --- include/linux/btf.h | 1 + include/uapi/linux/bpf.h | 7 +++- kernel/bpf/btf.c | 5 +++ kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++---------------- tools/include/uapi/linux/bpf.h | 7 +++- 5 files changed, 69 insertions(+), 33 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index fb608e4de076..4c200f5d242b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); +bool btf_is_kernel(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c3458ec1f30a..1233f14f659f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -557,7 +557,12 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7a19bf5bfe97..8d6bdb4f4d61 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5738,6 +5738,11 @@ u32 btf_obj_id(const struct btf *btf) return btf->id; } +bool btf_is_kernel(const struct btf *btf) +{ + return btf->kernel_btf; +} + static int btf_id_cmp_func(const void *a, const void *b) { const int *pa = a, *pb = b; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 184204169949..0cd3cc2af9c1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1926,12 +1926,16 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, - u32 btf_id, u32 prog_fd) + struct btf *attach_btf, u32 btf_id, + struct bpf_prog *dst_prog) { if (btf_id) { if (btf_id > BTF_MAX_TYPE) return -EINVAL; + if (!attach_btf && !dst_prog) + return -EINVAL; + switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: @@ -1943,7 +1947,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, } } - if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && + if (attach_btf && (!btf_id || dst_prog)) + return -EINVAL; + + if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT) return -EINVAL; @@ -2060,7 +2067,8 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { enum bpf_prog_type type = attr->prog_type; - struct bpf_prog *prog; + struct bpf_prog *prog, *dst_prog = NULL; + struct btf *attach_btf = NULL; int err; char license[128]; bool is_gpl; @@ -2102,44 +2110,56 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (is_perfmon_prog_type(type) && !perfmon_capable()) return -EPERM; + /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog + * or btf, we need to check which one it is + */ + if (attr->attach_prog_fd) { + dst_prog = bpf_prog_get(attr->attach_prog_fd); + if (IS_ERR(dst_prog)) { + dst_prog = NULL; + attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); + if (IS_ERR(attach_btf)) + return -EINVAL; + if (!btf_is_kernel(attach_btf)) { + btf_put(attach_btf); + return -EINVAL; + } + } + } else if (attr->attach_btf_id) { + /* fall back to vmlinux BTF, if BTF type ID is specified */ + attach_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(attach_btf)) + return PTR_ERR(attach_btf); + if (!attach_btf) + return -EINVAL; + btf_get(attach_btf); + } + bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, - attr->attach_btf_id, - attr->attach_prog_fd)) + attach_btf, attr->attach_btf_id, + dst_prog)) { + if (dst_prog) + bpf_prog_put(dst_prog); + if (attach_btf) + btf_put(attach_btf); return -EINVAL; + } /* plain bpf_prog allocation */ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); - if (!prog) + if (!prog) { + if (dst_prog) + bpf_prog_put(dst_prog); + if (attach_btf) + btf_put(attach_btf); return -ENOMEM; + } prog->expected_attach_type = attr->expected_attach_type; + prog->aux->attach_btf = attach_btf; prog->aux->attach_btf_id = attr->attach_btf_id; - - if (attr->attach_btf_id && !attr->attach_prog_fd) { - struct btf *btf; - - btf = bpf_get_btf_vmlinux(); - if (IS_ERR(btf)) - return PTR_ERR(btf); - if (!btf) - return -EINVAL; - - btf_get(btf); - prog->aux->attach_btf = btf; - } - - if (attr->attach_prog_fd) { - struct bpf_prog *dst_prog; - - dst_prog = bpf_prog_get(attr->attach_prog_fd); - if (IS_ERR(dst_prog)) { - err = PTR_ERR(dst_prog); - goto free_prog; - } - prog->aux->dst_prog = dst_prog; - } - + prog->aux->dst_prog = dst_prog; prog->aux->offload_requested = !!attr->prog_ifindex; prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c3458ec1f30a..1233f14f659f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -557,7 +557,12 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3-70-g09d2 From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:05 +0100 Subject: bpf: Add a bpf_sock_from_file helper While eBPF programs can check whether a file is a socket by file->f_op == &socket_file_ops, they cannot convert the void private_data pointer to a struct socket BTF pointer. In order to do this a new helper wrapping sock_from_file is added. This is useful to tracing programs but also other program types inheriting this set of helpers such as iterators or LSM programs. Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: KP Singh Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com --- include/uapi/linux/bpf.h | 9 +++++++++ kernel/trace/bpf_trace.c | 20 ++++++++++++++++++++ scripts/bpf_helpers_doc.py | 4 ++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 4 files changed, 42 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1233f14f659f..30b477a26482 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3822,6 +3822,14 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3986,6 +3994,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(sock_from_file), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb9d7478ef0c..0cf0a6331482 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1270,6 +1270,24 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_sock_from_file, struct file *, file) +{ + return (unsigned long) sock_from_file(file); +} + +BTF_ID_LIST(bpf_sock_from_file_btf_ids) +BTF_ID(struct, socket) +BTF_ID(struct, file) + +static const struct bpf_func_proto bpf_sock_from_file_proto = { + .func = bpf_sock_from_file, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = &bpf_sock_from_file_btf_ids[0], + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_sock_from_file_btf_ids[1], +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1366,6 +1384,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_bpf_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_sock_from_file: + return &bpf_sock_from_file_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 8b829748d488..867ada23281c 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -437,6 +437,8 @@ class PrinterHelpers(Printer): 'struct path', 'struct btf_ptr', 'struct inode', + 'struct socket', + 'struct file', ] known_types = { '...', @@ -482,6 +484,8 @@ class PrinterHelpers(Printer): 'struct path', 'struct btf_ptr', 'struct inode', + 'struct socket', + 'struct file', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1233f14f659f..30b477a26482 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3822,6 +3822,14 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3986,6 +3994,7 @@ union bpf_attr { FN(bprm_opts_set), \ FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ + FN(sock_from_file), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-70-g09d2 From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Dec 2020 22:36:25 +0100 Subject: bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr() helpers Remove bpf_ prefix, which causes these helpers to be reported in verifier dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets fix it as long as it is still possible before UAPI freezes on these helpers. Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Linus Torvalds --- include/uapi/linux/bpf.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 25520f5eeaf6..deda1185237b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_btf_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 048c655315f1..a125ea5e04cd 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1337,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; - case BPF_FUNC_bpf_per_cpu_ptr: + case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; - case BPF_FUNC_bpf_this_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; default: return NULL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..556216dc9703 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3897,8 +3897,8 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ - FN(bpf_per_cpu_ptr), \ - FN(bpf_this_cpu_ptr), \ + FN(per_cpu_ptr), \ + FN(this_cpu_ptr), \ FN(redirect_peer), \ /* */ -- cgit v1.2.3-70-g09d2 From 47d982202f8cfaac6f208c9109fa15cb6a0181f7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:52 -0800 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: commit 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE") commit 995f088efebe ("perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b95d3c485d27..b15e3447cd9f 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,8 +143,10 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -896,6 +898,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, -- cgit v1.2.3-70-g09d2 From 1c28a05d1a972594164efc7fcffda416c5d6ab02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Dec 2020 09:13:16 -0300 Subject: tools headers UAPI: Sync linux/stat.h with the kernel sources To pick the changes in: 72d1249e2ffdbc34 ("uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT") That don't cause any change in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/stat.h' differs from latest version at 'include/uapi/linux/stat.h' diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Eric Sandeen Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 82cc58fe9368..1500a0f58041 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -171,9 +171,12 @@ struct statx { * be of use to ordinary userspace programs such as GUIs or ls rather than * specialised tools. * - * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags * semantically. Where possible, the numerical value is picked to correspond - * also. + * also. Note that the DAX attribute indicates that the file is in the CPU + * direct access state. It does not correspond to the per-inode flag that + * some filesystems support. + * */ #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ @@ -183,7 +186,7 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */ +#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.2.3-70-g09d2 From 7ddcdea5b54492f54700f427f58690cf1e187e5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:55:01 -0300 Subject: tools headers UAPI: Sync linux/const.h with the kernel headers To pick up the changes in: a85cbe6159ffc973 ("uapi: move constants from to ") That causes no changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/const.h' differs from latest version at 'include/uapi/linux/const.h' diff -u tools/include/uapi/linux/const.h include/uapi/linux/const.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Petr Vorel Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/const.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index 5ed721ad5b19..af2a44c08683 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ -- cgit v1.2.3-70-g09d2 From 4a443a51776ca9847942523cf987a330894d3a31 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 14:58:51 -0300 Subject: tools headers UAPI: Sync linux/fscrypt.h with the kernel sources To pick the changes from: 3ceb6543e9cf6ed8 ("fscrypt: remove kernel-internal constants from UAPI header") That don't result in any changes in tooling, just addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index e5de60336938..9f4428be3e36 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10 -#define FSCRYPT_POLICY_FLAGS_VALID 0x1F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -28,7 +27,7 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -#define __FSCRYPT_MODE_MAX 9 +/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. @@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg { #define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 #define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK #define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY -#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_POLICY_FLAGS_VALID 0x07 /* contains old flags only */ #define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS #define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ -- cgit v1.2.3-70-g09d2 From d6dbfceec5dd41becbe8c47c402240925d31036a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Dec 2020 15:01:08 -0300 Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources To pick a new prctl introduced in: 1446e1df9eb183fd ("kernel: Implement selective syscall userspace redirection") That results in: $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2020-12-17 15:00:42.012537367 -0300 +++ after 2020-12-17 15:00:49.832699463 -0300 @@ -53,6 +53,7 @@ [56] = "GET_TAGGED_ADDR_CTRL", [57] = "SET_IO_FLUSHER", [58] = "GET_IO_FLUSHER", + [59] = "SET_SYSCALL_USER_DISPATCH", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ Now users can do: # perf trace -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" ^C# # trace -v -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH" New filter for syscalls:sys_enter_prctl: (option==0x3b) && (common_pid != 5519 && common_pid != 3404) ^C# And also when prctl appears in a session, its options will be translated to the string. Cc: Adrian Hunter Cc: Gabriel Krisman Bertazi Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 7f0827705c9a..90deb41c8a34 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -247,4 +247,9 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3-70-g09d2 From 288807fc3a5f19ed77cb8c25342323bbe58a75a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Dec 2020 09:20:52 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: fb04a1eddb1a65b6 ("KVM: X86: Implement ring-based dirty memory tracking") That result in these change in tooling: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ cp arch/x86/include/uapi/asm/kvm.h tools/arch/x86/include/uapi/asm/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-12-21 11:55:45.229737066 -0300 +++ after 2020-12-21 11:55:56.379983393 -0300 @@ -90,6 +90,7 @@ [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", + [0xc7] = "RESET_DIRTY_RINGS", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Now one can use that string in filters when tracing ioctls, etc. And silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Xu Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 56 ++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 89e5f3d1bba8..8e76d3701db3 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define DE_VECTOR 0 #define DB_VECTOR 1 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index ca41220b40b8..886802b8ffba 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 #ifdef KVM_CAP_IRQ_ROUTING @@ -1511,7 +1514,7 @@ struct kvm_enc_region { /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) -/* Available with KVM_CAP_HYPERV_CPUID */ +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) /* Available with KVM_CAP_ARM_SVE */ @@ -1557,6 +1560,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd { #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3-70-g09d2