diff options
Diffstat (limited to 'tools')
284 files changed, 11568 insertions, 3069 deletions
diff --git a/tools/Makefile b/tools/Makefile index c8a90d01dd8e..221e1ce78b06 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -19,6 +19,7 @@ help: @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' @echo ' lguest - a minimal 32-bit x86 hypervisor' + @echo ' liblockdep - user-space wrapper for kernel locking-validator' @echo ' net - misc networking tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @@ -89,7 +90,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup cpupower gpio hv firewire lguest \ +all: acpi cgroup cpupower gpio hv firewire lguest liblockdep \ perf selftests turbostat usb \ virtio vm net x86_energy_perf_policy \ tmon freefall objtool kvm_stat @@ -103,6 +104,9 @@ cpupower_install: cgroup_install firewire_install gpio_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install: $(call descend,$(@:_install=),install) +liblockdep_install: + $(call descend,lib/lockdep,install) + selftests_install: $(call descend,testing/$(@:_install=),install) @@ -119,7 +123,7 @@ kvm_stat_install: $(call descend,kvm/$(@:_install=),install) install: acpi_install cgroup_install cpupower_install gpio_install \ - hv_install firewire_install lguest_install \ + hv_install firewire_install lguest_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 6ebd3e6a1fd1..5e3c673fa3f4 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -114,6 +116,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -192,13 +196,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index c2860358ae3e..70eea2ecc663 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -212,13 +216,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 4edbe4bb0e8b..07fbeb927834 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -29,6 +29,9 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_GUEST_DEBUG +/* Not always available, but if it is, this is the correct offset. */ +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + struct kvm_regs { __u64 pc; __u64 cr; diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 7f4fd65e9208..3dd2a1d308dd 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -198,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -218,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 0fe00446f9ca..2701e5f8145b 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -202,6 +202,8 @@ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 85599ad4d024..5dff775af7cd 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -36,6 +36,12 @@ # define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ +#ifdef CONFIG_X86_5LEVEL +# define DISABLE_LA57 0 +#else +# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -55,7 +61,7 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) #define DISABLED_MASK17 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index fac9a5c0abe9..d91ba04dd007 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -53,6 +53,12 @@ # define NEED_MOVBE 0 #endif +#ifdef CONFIG_X86_5LEVEL +# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) +#else +# define NEED_LA57 0 +#endif + #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -98,7 +104,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 0 +#define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK17 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 739c0c594022..c2824d02ba37 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,9 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + #define DE_VECTOR 0 #define DB_VECTOR 1 #define BP_VECTOR 3 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 14458658e988..690a2dcf4078 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -76,7 +76,11 @@ #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 @@ -90,6 +94,7 @@ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVLPG, "INVLPG" }, \ { EXIT_REASON_RDPMC, "RDPMC" }, \ { EXIT_REASON_RDTSC, "RDTSC" }, \ @@ -108,6 +113,8 @@ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ + { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ @@ -115,20 +122,24 @@ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ - { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ + { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ + { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ + { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ { EXIT_REASON_INVEPT, "INVEPT" }, \ + { EXIT_REASON_RDTSCP, "RDTSCP" }, \ { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_WBINVD, "WBINVD" }, \ + { EXIT_REASON_XSETBV, "XSETBV" }, \ { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ - { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ - { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ - { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ - { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVVPID, "INVVPID" }, \ + { EXIT_REASON_RDRAND, "RDRAND" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_VMFUNC, "VMFUNC" }, \ + { EXIT_REASON_ENCLS, "ENCLS" }, \ + { EXIT_REASON_RDSEED, "RDSEED" }, \ + { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" } diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index ebc6dceddb58..7598361ef1f1 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -29,6 +29,7 @@ int main(void) attr.log_size = 0; attr.log_level = 0; attr.kern_version = 0; + attr.prog_flags = 0; /* * Test existence of __NR_bpf and BPF_PROG_LOAD. diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index d85968cb1bf2..89b25068cd98 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -102,15 +102,30 @@ function create_bond_cfg_redhat { } function del_eth_cfg_ubuntu { - local fn=$cfgdir/interfaces + local mainfn=$cfgdir/interfaces + local fnlist=( $mainfn ) + + local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`) + + local i + for i in "${dirlist[@]}" + do + fnlist+=(`ls $i 2>/dev/null`) + done + local tmpfl=$(mktemp) local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1 local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)' - awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" $fn >$tmpfl + local fn + for fn in "${fnlist[@]}" + do + awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \ + $fn >$tmpfl - cp $tmpfl $fn + cp $tmpfl $fn + done rm $tmpfl } @@ -119,7 +134,6 @@ function create_eth_cfg_ubuntu { local fn=$cfgdir/interfaces del_eth_cfg_ubuntu $1 - echo $'\n'auto $1 >>$fn echo iface $1 inet manual >>$fn echo bond-master $2 >>$fn @@ -128,7 +142,10 @@ function create_eth_cfg_ubuntu { function create_eth_cfg_pri_ubuntu { local fn=$cfgdir/interfaces - create_eth_cfg_ubuntu $1 $2 + del_eth_cfg_ubuntu $1 + echo $'\n'allow-hotplug $1 >>$fn + echo iface $1 inet manual >>$fn + echo bond-master $2 >>$fn echo bond-primary $1 >>$fn } @@ -153,7 +170,11 @@ function create_eth_cfg_suse { } function create_eth_cfg_pri_suse { - create_eth_cfg_suse $1 + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo BOOTPROTO=none >>$fn + echo STARTMODE=hotplug >>$fn } function create_bond_cfg_suse { diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index f1758fcbc37d..88b20e007c05 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -39,6 +39,7 @@ #include <fcntl.h> #include <dirent.h> #include <net/if.h> +#include <limits.h> #include <getopt.h> /* @@ -97,6 +98,8 @@ static struct utsname uts_buf; #define KVP_SCRIPTS_PATH "/usr/libexec/hypervkvpd/" #endif +#define KVP_NET_DIR "/sys/class/net/" + #define MAX_FILE_NAME 100 #define ENTRIES_PER_BLOCK 50 @@ -596,26 +599,21 @@ static char *kvp_get_if_name(char *guid) DIR *dir; struct dirent *entry; FILE *file; - char *p, *q, *x; + char *p, *x; char *if_name = NULL; char buf[256]; - char *kvp_net_dir = "/sys/class/net/"; - char dev_id[256]; + char dev_id[PATH_MAX]; - dir = opendir(kvp_net_dir); + dir = opendir(KVP_NET_DIR); if (dir == NULL) return NULL; - snprintf(dev_id, sizeof(dev_id), "%s", kvp_net_dir); - q = dev_id + strlen(kvp_net_dir); - while ((entry = readdir(dir)) != NULL) { /* * Set the state for the next pass. */ - *q = '\0'; - strcat(dev_id, entry->d_name); - strcat(dev_id, "/device/device_id"); + snprintf(dev_id, sizeof(dev_id), "%s%s/device/device_id", + KVP_NET_DIR, entry->d_name); file = fopen(dev_id, "r"); if (file == NULL) @@ -653,12 +651,12 @@ static char *kvp_if_name_to_mac(char *if_name) FILE *file; char *p, *x; char buf[256]; - char addr_file[256]; + char addr_file[PATH_MAX]; unsigned int i; char *mac_addr = NULL; - snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/", - if_name, "/address"); + snprintf(addr_file, sizeof(addr_file), "%s%s%s", KVP_NET_DIR, + if_name, "/address"); file = fopen(addr_file, "r"); if (file == NULL) @@ -688,28 +686,22 @@ static char *kvp_mac_to_if_name(char *mac) DIR *dir; struct dirent *entry; FILE *file; - char *p, *q, *x; + char *p, *x; char *if_name = NULL; char buf[256]; - char *kvp_net_dir = "/sys/class/net/"; - char dev_id[256]; + char dev_id[PATH_MAX]; unsigned int i; - dir = opendir(kvp_net_dir); + dir = opendir(KVP_NET_DIR); if (dir == NULL) return NULL; - snprintf(dev_id, sizeof(dev_id), "%s", kvp_net_dir); - q = dev_id + strlen(kvp_net_dir); - while ((entry = readdir(dir)) != NULL) { /* * Set the state for the next pass. */ - *q = '\0'; - - strcat(dev_id, entry->d_name); - strcat(dev_id, "/address"); + snprintf(dev_id, sizeof(dev_id), "%s%s/address", KVP_NET_DIR, + entry->d_name); file = fopen(dev_id, "r"); if (file == NULL) @@ -1218,9 +1210,9 @@ static int process_ip_string(FILE *f, char *ip_string, int type) static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { int error = 0; - char if_file[128]; + char if_file[PATH_MAX]; FILE *file; - char cmd[512]; + char cmd[PATH_MAX]; char *mac_addr; /* diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index e0829809c897..7ba54195934c 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -261,7 +261,9 @@ int main(int argc, char *argv[]) if (len != sizeof(struct hv_vss_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - exit(EXIT_FAILURE); + + if (op == VSS_OP_FREEZE) + vss_operate(VSS_OP_THAW); } } diff --git a/tools/iio/Makefile b/tools/iio/Makefile index 5446d625e17d..8f08e03a9a5e 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -1,5 +1,5 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS += -Wall -g -D_GNU_SOURCE +CFLAGS += -Wall -g -D_GNU_SOURCE -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include BINDIR=usr/bin INSTALL_PROGRAM=install -m 755 -p diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h index 780f2014f8fa..8b379da26e35 100644 --- a/tools/iio/iio_utils.h +++ b/tools/iio/iio_utils.h @@ -13,7 +13,7 @@ #include <stdint.h> /* Made up value to limit allocation sizes */ -#define IIO_MAX_NAME_LENGTH 30 +#define IIO_MAX_NAME_LENGTH 64 #define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements" #define FORMAT_TYPE_FILE "%s_type" diff --git a/tools/include/asm/sections.h b/tools/include/asm/sections.h new file mode 100644 index 000000000000..a80643d7a7f1 --- /dev/null +++ b/tools/include/asm/sections.h @@ -0,0 +1,4 @@ +#ifndef __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H +#define __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H + +#endif /* __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H */ diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 1aecad369af5..969db1981868 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -61,4 +61,14 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + #endif diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 825d44f89a29..bd39b2090ad1 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -19,3 +19,13 @@ /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) + +#define noinline __attribute__((noinline)) + +#define __packed __attribute__((packed)) + +#define __noreturn __attribute__((noreturn)) + +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 23299d7e7160..d7a5604c38d7 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -17,6 +17,10 @@ # define __always_inline inline __attribute__((always_inline)) #endif +#ifndef noinline +#define noinline +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) @@ -45,6 +49,10 @@ # define __maybe_unused __attribute__((unused)) #endif +#ifndef __used +# define __used __attribute__((__unused__)) +#endif + #ifndef __packed # define __packed __attribute__((__packed__)) #endif @@ -65,6 +73,14 @@ # define unlikely(x) __builtin_expect(!!(x), 0) #endif +#ifndef __init +# define __init +#endif + +#ifndef noinline +# define noinline +#endif + #define uninitialized_var(x) x = *(&(x)) #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/include/linux/debug_locks.h index f38eb64df794..61cc7f501168 100644 --- a/tools/lib/lockdep/uinclude/linux/debug_locks.h +++ b/tools/include/linux/debug_locks.h @@ -3,8 +3,9 @@ #include <stddef.h> #include <linux/compiler.h> +#include <asm/bug.h> -#define DEBUG_LOCKS_WARN_ON(x) (x) +#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) extern bool debug_locks; extern bool debug_locks_silent; diff --git a/tools/include/linux/delay.h b/tools/include/linux/delay.h new file mode 100644 index 000000000000..55aa4173af1f --- /dev/null +++ b/tools/include/linux/delay.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_DELAY_H +#define _TOOLS_INCLUDE_LINUX_DELAY_H + +#endif /* _TOOLS_INCLUDE_LINUX_DELAY_H */ diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h index bdc3dd8131d4..abf0478a8fb2 100644 --- a/tools/include/linux/err.h +++ b/tools/include/linux/err.h @@ -46,4 +46,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr) return IS_ERR_VALUE((unsigned long)ptr); } +static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) +{ + return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); +} + #endif /* _LINUX_ERR_H */ diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 390d7c9685fd..4ce25d43e8e3 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -208,6 +208,16 @@ .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/ftrace.h b/tools/include/linux/ftrace.h new file mode 100644 index 000000000000..949f541ce11e --- /dev/null +++ b/tools/include/linux/ftrace.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_FTRACE_H +#define _TOOLS_INCLUDE_LINUX_FTRACE_H + +#endif /* _TOOLS_INCLUDE_LINUX_FTRACE_H */ diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h new file mode 100644 index 000000000000..22030756fbc0 --- /dev/null +++ b/tools/include/linux/gfp.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_GFP_H +#define _TOOLS_INCLUDE_LINUX_GFP_H + +#endif /* _TOOLS_INCLUDE_LINUX_GFP_H */ diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/include/linux/hardirq.h index c8f3f8f58729..c8f3f8f58729 100644 --- a/tools/lib/lockdep/uinclude/linux/hardirq.h +++ b/tools/include/linux/hardirq.h diff --git a/tools/include/linux/interrupt.h b/tools/include/linux/interrupt.h new file mode 100644 index 000000000000..6be25bbdca9e --- /dev/null +++ b/tools/include/linux/interrupt.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_INTERRUPT_H +#define _TOOLS_INCLUDE_LINUX_INTERRUPT_H + +#endif /* _TOOLS_INCLUDE_LINUX_INTERRUPT_H */ diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/include/linux/irqflags.h index 6cc296f0fad0..df77669cfe1c 100644 --- a/tools/lib/lockdep/uinclude/linux/irqflags.h +++ b/tools/include/linux/irqflags.h @@ -17,19 +17,19 @@ #define raw_local_irq_disable() do { } while (0) #define raw_local_irq_enable() do { } while (0) #define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) do { } while (0) +#define raw_local_irq_restore(flags) ((void)(flags)) #define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) do { } while (0) +#define raw_irqs_disabled_flags(flags) ((void)(flags)) #define raw_irqs_disabled() 0 #define raw_safe_halt() #define local_irq_enable() do { } while (0) #define local_irq_disable() do { } while (0) #define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) do { } while (0) +#define local_irq_restore(flags) ((void)(flags)) #define local_save_flags(flags) ((flags) = 0) #define irqs_disabled() (1) -#define irqs_disabled_flags(flags) (0) +#define irqs_disabled_flags(flags) ((void)(flags), 0) #define safe_halt() do { } while (0) #define trace_lock_release(x, y) diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h new file mode 100644 index 000000000000..348c6f47e4cc --- /dev/null +++ b/tools/include/linux/jhash.h @@ -0,0 +1,175 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are my fault. + * Jozsef + */ +#include <linux/bitops.h> +#include <linux/unaligned/packed_struct.h> + +/* Best hash sizes are of power of two */ +#define jhash_size(n) ((u32)1<<(n)) +/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ +#define jhash_mask(n) (jhash_size(n)-1) + +/* __jhash_mix -- mix 3 32-bit values reversibly. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +/* An arbitrary initial parameter */ +#define JHASH_INITVAL 0xdeadbeef + +/* jhash - hash an arbitrary key + * @k: sequence of bytes as key + * @length: the length of the key + * @initval: the previous hash, or an arbitray value + * + * The generic version, hashes an arbitrary sequence of bytes. + * No alignment or length assumptions are made about the input key. + * + * Returns the hash value of the key. The result depends on endianness. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const u8 *k = key; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + length + initval; + + /* All but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += __get_unaligned_cpu32(k); + b += __get_unaligned_cpu32(k + 4); + c += __get_unaligned_cpu32(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + /* Last block: affect all 32 bits of (c) */ + /* All the case statements fall through */ + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +/* jhash2 - hash an array of u32's + * @k: the key which must be an array of u32's + * @length: the number of u32's in the key + * @initval: the previous hash, or an arbitray value + * + * Returns the hash value of the key. + */ +static inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's: all the case statements fall through */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + + +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + + __jhash_final(a, b, c); + + return c; +} + +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); +} + +#endif /* _LINUX_JHASH_H */ diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/include/linux/kallsyms.h index b0f2dbdf1a15..582cc1e5f3a4 100644 --- a/tools/lib/lockdep/uinclude/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <stdio.h> +#include <unistd.h> #define KSYM_NAME_LEN 128 @@ -24,7 +25,7 @@ static inline void print_ip_sym(unsigned long ip) name = backtrace_symbols((void **)&ip, 1); - printf("%s\n", *name); + dprintf(STDOUT_FILENO, "%s\n", *name); free(name); } diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/include/linux/kern_levels.h index 3b9bade28698..3b9bade28698 100644 --- a/tools/lib/lockdep/uinclude/linux/kern_levels.h +++ b/tools/include/linux/kern_levels.h diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 73ccc48126bb..77d2e94ca5df 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -5,6 +5,8 @@ #include <stddef.h> #include <assert.h> #include <linux/compiler.h> +#include <endian.h> +#include <byteswap.h> #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -32,6 +34,7 @@ (type *)((char *)__mptr - offsetof(type, member)); }) #endif +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #ifndef max @@ -67,12 +70,33 @@ #endif #endif -/* - * Both need more care to handle endianness - * (Don't use bitmap_copy_le() for now) - */ -#define cpu_to_le64(x) (x) -#define cpu_to_le32(x) (x) +#if __BYTE_ORDER == __BIG_ENDIAN +#define cpu_to_le16 bswap_16 +#define cpu_to_le32 bswap_32 +#define cpu_to_le64 bswap_64 +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define cpu_to_be16 +#define cpu_to_be32 +#define cpu_to_be64 +#define be16_to_cpu +#define be32_to_cpu +#define be64_to_cpu +#else +#define cpu_to_le16 +#define cpu_to_le32 +#define cpu_to_le64 +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define cpu_to_be16 bswap_16 +#define cpu_to_be32 bswap_32 +#define cpu_to_be64 bswap_64 +#define be16_to_cpu bswap_16 +#define be32_to_cpu bswap_32 +#define be64_to_cpu bswap_64 +#endif int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); @@ -89,4 +113,7 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...); #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +#define current_gfp_context(k) 0 +#define synchronize_sched() + #endif diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h index 94d598bc6abe..94d598bc6abe 100644 --- a/tools/lib/lockdep/uinclude/linux/kmemcheck.h +++ b/tools/include/linux/kmemcheck.h diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h new file mode 100644 index 000000000000..bc763d500262 --- /dev/null +++ b/tools/include/linux/linkage.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H +#define _TOOLS_INCLUDE_LINUX_LINKAGE_H + +#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/include/linux/lockdep.h index c808c7d02d21..8da3e8effafa 100644 --- a/tools/lib/lockdep/uinclude/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -7,8 +7,15 @@ #include <limits.h> #include <linux/utsname.h> #include <linux/compiler.h> +#include <linux/export.h> +#include <linux/kern_levels.h> +#include <linux/err.h> +#include <linux/rcu.h> +#include <linux/list.h> +#include <linux/hardirq.h> +#include <unistd.h> -#define MAX_LOCK_DEPTH 2000UL +#define MAX_LOCK_DEPTH 63UL #define asmlinkage #define __visible @@ -29,31 +36,32 @@ extern struct task_struct *__curr(void); #define current (__curr()) -#define debug_locks_off() 1 +static inline int debug_locks_off(void) +{ + return 1; +} + #define task_pid_nr(tsk) ((tsk)->pid) #define KSYM_NAME_LEN 128 -#define printk printf +#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) +#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define pr_warn pr_err #define list_del_rcu list_del #define atomic_t unsigned long #define atomic_inc(x) ((*(x))++) -static struct new_utsname *init_utsname(void) -{ - static struct new_utsname n = (struct new_utsname) { - .release = "liblockdep", - .version = LIBLOCKDEP_VERSION, - }; - - return &n; -} - #define print_tainted() "" #define static_obj(x) 1 #define debug_show_all_locks() extern void debug_check_no_locks_held(void); +static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) +{ + return false; +} + #endif diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/include/linux/module.h index 09c7a7be8ccc..07055db296f3 100644 --- a/tools/lib/lockdep/uinclude/linux/module.h +++ b/tools/include/linux/module.h @@ -3,4 +3,9 @@ #define module_param(name, type, perm) +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + #endif diff --git a/tools/include/linux/mutex.h b/tools/include/linux/mutex.h new file mode 100644 index 000000000000..a8180d25f2fc --- /dev/null +++ b/tools/include/linux/mutex.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_MUTEX_H +#define _TOOLS_INCLUDE_LINUX_MUTEX_H + +#endif /* _TOOLS_INCLUDE_LINUX_MUTEX_H */ diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h new file mode 100644 index 000000000000..8b3b03b64fda --- /dev/null +++ b/tools/include/linux/proc_fs.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H +#define _TOOLS_INCLUDE_LINUX_PROC_FS_H + +#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/include/linux/rcu.h index 042ee8e463c9..5080649dad04 100644 --- a/tools/lib/lockdep/uinclude/linux/rcu.h +++ b/tools/include/linux/rcu.h @@ -18,4 +18,7 @@ static inline bool rcu_is_watching(void) return false; } +#define rcu_assign_pointer(p, v) ((p) = (v)) +#define RCU_INIT_POINTER(p, v) p=(v) + #endif diff --git a/tools/include/linux/sched/clock.h b/tools/include/linux/sched/clock.h new file mode 100644 index 000000000000..5837d17c4182 --- /dev/null +++ b/tools/include/linux/sched/clock.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_PERF_LINUX_SCHED_CLOCK_H +#define _TOOLS_PERF_LINUX_SCHED_CLOCK_H + +#endif /* _TOOLS_PERF_LINUX_SCHED_CLOCK_H */ diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h new file mode 100644 index 000000000000..c8d9f19c1f35 --- /dev/null +++ b/tools/include/linux/sched/mm.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_PERF_LINUX_SCHED_MM_H +#define _TOOLS_PERF_LINUX_SCHED_MM_H + +#endif /* _TOOLS_PERF_LINUX_SCHED_MM_H */ diff --git a/tools/include/linux/sched/task.h b/tools/include/linux/sched/task.h new file mode 100644 index 000000000000..a97890eca110 --- /dev/null +++ b/tools/include/linux/sched/task.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_PERF_LINUX_SCHED_TASK_H +#define _TOOLS_PERF_LINUX_SCHED_TASK_H + +#endif /* _TOOLS_PERF_LINUX_SCHED_TASK_H */ diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h new file mode 100644 index 000000000000..102fd9217f1f --- /dev/null +++ b/tools/include/linux/seq_file.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H +#define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H + +#endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index 58397dcb19d6..417cda4f793f 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -1,5 +1,31 @@ +#ifndef __LINUX_SPINLOCK_H_ +#define __LINUX_SPINLOCK_H_ + +#include <pthread.h> +#include <stdbool.h> + #define spinlock_t pthread_mutex_t #define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER; #define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x) #define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x) + +#define arch_spinlock_t pthread_mutex_t +#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER + +static inline void arch_spin_lock(arch_spinlock_t *mutex) +{ + pthread_mutex_lock(mutex); +} + +static inline void arch_spin_unlock(arch_spinlock_t *mutex) +{ + pthread_mutex_unlock(mutex); +} + +static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) +{ + return true; +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/include/linux/stacktrace.h index 39aecc6b19d1..39aecc6b19d1 100644 --- a/tools/lib/lockdep/uinclude/linux/stacktrace.h +++ b/tools/include/linux/stacktrace.h diff --git a/tools/include/linux/unaligned/packed_struct.h b/tools/include/linux/unaligned/packed_struct.h new file mode 100644 index 000000000000..c0d817de4df2 --- /dev/null +++ b/tools/include/linux/unaligned/packed_struct.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H +#define _LINUX_UNALIGNED_PACKED_STRUCT_H + +#include <linux/kernel.h> + +struct __una_u16 { u16 x; } __packed; +struct __una_u32 { u32 x; } __packed; +struct __una_u64 { u64 x; } __packed; + +static inline u16 __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline u32 __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline u64 __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(u16 val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(u32 val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(u64 val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */ diff --git a/tools/include/trace/events/lock.h b/tools/include/trace/events/lock.h new file mode 100644 index 000000000000..5b15fd5ee1af --- /dev/null +++ b/tools/include/trace/events/lock.h @@ -0,0 +1,4 @@ +#ifndef _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H +#define _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H + +#endif /* _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e553529929f6..ce2988be4f0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -82,6 +82,11 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, }; enum bpf_map_type { @@ -115,12 +120,14 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, }; enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -132,6 +139,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -177,6 +191,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -201,6 +216,21 @@ union bpf_attr { __u32 repeat; __u32 duration; } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + }; + __u32 next_id; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -305,8 +335,11 @@ union bpf_attr { * @flags: room for future extensions * Return: 0 on success or negative error * - * u64 bpf_perf_event_read(&map, index) - * Return: Number events read or error code + * u64 bpf_perf_event_read(map, flags) + * read perf event counter value + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * Return: value of perf event counter read or error code * * int bpf_redirect(ifindex, flags) * redirect to another netdev @@ -320,11 +353,11 @@ union bpf_attr { * @skb: pointer to skb * Return: realm if != 0 * - * int bpf_perf_event_output(ctx, map, index, data, size) + * int bpf_perf_event_output(ctx, map, flags, data, size) * output perf raw sample * @ctx: struct pt_regs* * @map: pointer to perf_event_array map - * @index: index of event in the map + * @flags: index of event in the map or bitmask flags * @data: data on stack to be output as raw data * @size: size of data * Return: 0 on success or negative error @@ -481,8 +514,31 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. + * + * u32 bpf_set_hash(skb, hash) + * Set full skb->hash. + * @skb: pointer to skb + * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in byes + * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -532,7 +588,10 @@ union bpf_attr { FN(xdp_adjust_head), \ FN(probe_read_str), \ FN(get_socket_cookie), \ - FN(get_socket_uid), + FN(get_socket_uid), \ + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -582,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET_OPTS, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -663,4 +727,75 @@ struct xdp_md { __u32 data_end; }; +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; +} __attribute__((aligned(8))); + +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 reply; + __u32 replylong[4]; + }; + __u32 family; + __u32 remote_ip4; + __u32 local_ip4; + __u32 remote_ip6[4]; + __u32 local_ip6[4]; + __u32 remote_port; + __u32 local_port; +}; + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index d538897b8e08..17b10304c393 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -48,17 +48,13 @@ * tv_sec holds the number of seconds before (negative) or after (positive) * 00:00:00 1st January 1970 UTC. * - * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is - * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. - * - * Note that if both tv_sec and tv_nsec are non-zero, then the two values must - * either be both positive or both negative. + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. * * __reserved is held in case we need a yet finer resolution. */ struct statx_timestamp { __s64 tv_sec; - __s32 tv_nsec; + __u32 tv_nsec; __s32 __reserved; }; diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 809c7721cd24..a7ecf8f469f4 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep) return err; } +int filename__write_int(const char *filename, int value) +{ + int fd = open(filename, O_WRONLY), err = -1; + char buf[64]; + + if (fd < 0) + return err; + + sprintf(buf, "%d", value); + if (write(fd, buf, sizeof(buf)) == sizeof(buf)) + err = 0; + + close(fd); + return err; +} + int procfs__read_str(const char *entry, char **buf, size_t *sizep) { char path[PATH_MAX]; @@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value) return filename__read_int(path, value); } + +int sysfs__write_int(const char *entry, int value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX) + return -1; + + return filename__write_int(path, value); +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 956c21127d1e..45605348461e 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); int filename__read_str(const char *filename, char **buf, size_t *sizep); +int filename__write_int(const char *filename, int value); + int procfs__read_str(const char *entry, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); @@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); int sysfs__read_bool(const char *entry, bool *value); + +int sysfs__write_int(const char *entry, int value); #endif /* __API_FS__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 4fe444b8092e..7e0405e1651d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -117,6 +117,28 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 2; + log_buf[0] = 0; + attr.kern_version = kern_version; + attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + + return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { @@ -235,3 +257,71 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, *duration = attr.test.duration; return ret; } + +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_prog_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_id = id; + + return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_map_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_id = id; + + return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + bzero(info, *info_len); + attr.info.bpf_fd = prog_fd; + attr.info.info_len = *info_len; + attr.info.info = ptr_to_u64(info); + + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) + *info_len = attr.info.info_len; + + return err; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index edb4daeff7a5..16de44a14b48 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -35,6 +35,10 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -50,5 +54,10 @@ int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_prog_get_fd_by_id(__u32 id); +int bpf_map_get_fd_by_id(__u32 id); +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); #endif diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 3bc0ef9f8923..ed9ace59d112 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -79,6 +79,7 @@ INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g CFLAGS += -fPIC +CFLAGS += -Wall override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) @@ -100,7 +101,7 @@ include $(srctree)/tools/build/Makefile.include do_compile_shared_library = \ ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so)) + $(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so)) do_build_static_lib = \ ($(print_static_lib_build) \ @@ -118,10 +119,10 @@ all_cmd: $(CMD_TARGETS) $(LIB_IN): force $(Q)$(MAKE) $(build)=liblockdep -liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) +$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(LIB_IN) +$(OUTPUT)liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) tags: force @@ -149,7 +150,7 @@ install_lib: all_cmd install: install_lib clean: - $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d .*.cmd + $(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd $(RM) tags TAGS PHONY += force diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c index a0a2e3a266af..ced6d7443cea 100644 --- a/tools/lib/lockdep/lockdep.c +++ b/tools/lib/lockdep/lockdep.c @@ -1,8 +1,27 @@ #include <linux/lockdep.h> +#include <stdlib.h> /* Trivial API wrappers, we don't (yet) have RCU in user-space: */ #define hlist_for_each_entry_rcu hlist_for_each_entry #define hlist_add_head_rcu hlist_add_head #define hlist_del_rcu hlist_del +#define list_for_each_entry_rcu list_for_each_entry +#define list_add_tail_rcu list_add_tail + +u32 prandom_u32(void) +{ + /* Used only by lock_pin_lock() which is dead code */ + abort(); +} + +static struct new_utsname *init_utsname(void) +{ + static struct new_utsname n = (struct new_utsname) { + .release = "liblockdep", + .version = LIBLOCKDEP_VERSION, + }; + + return &n; +} #include "../../../kernel/locking/lockdep.c" diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 52844847569c..6a2d3c5d4e92 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -4,6 +4,7 @@ #include <dlfcn.h> #include <stdlib.h> #include <sysexits.h> +#include <unistd.h> #include "include/liblockdep/mutex.h" #include "../../include/linux/rbtree.h" @@ -122,8 +123,6 @@ static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent) #define LIBLOCKDEP_STATIC_ENTRIES 1024 #endif -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES]; static int __locks_nr; @@ -149,7 +148,7 @@ static struct lock_lookup *alloc_lock(void) int idx = __locks_nr++; if (idx >= ARRAY_SIZE(__locks)) { - fprintf(stderr, + dprintf(STDERR_FILENO, "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n"); exit(EX_UNAVAILABLE); } diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c index f7f43033c8b7..297c304571f8 100644 --- a/tools/lib/lockdep/rbtree.c +++ b/tools/lib/lockdep/rbtree.c @@ -1 +1 @@ -#include "../../../lib/rbtree.c" +#include "../../lib/rbtree.c" diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh index 1069d96248c1..f9b94098fc98 100755 --- a/tools/lib/lockdep/run_tests.sh +++ b/tools/lib/lockdep/run_tests.sh @@ -4,9 +4,9 @@ make &> /dev/null for i in `ls tests/*.c`; do testname=$(basename "$i" .c) - gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null + gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null echo -ne "$testname... " - if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then + if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then echo "PASSED!" else echo "FAILED!" @@ -18,9 +18,9 @@ done for i in `ls tests/*.c`; do testname=$(basename "$i" .c) - gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null + gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null echo -ne "(PRELOAD) $testname... " - if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then + if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then echo "PASSED!" else echo "FAILED!" diff --git a/tools/lib/lockdep/uinclude/asm/hash.h b/tools/lib/lockdep/uinclude/asm/hash.h deleted file mode 100644 index d82b170bb216..000000000000 --- a/tools/lib/lockdep/uinclude/asm/hash.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_GENERIC_HASH_H -#define __ASM_GENERIC_HASH_H - -/* Stub */ - -#endif /* __ASM_GENERIC_HASH_H */ diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/asm/hweight.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/asm/sections.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/bitops.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h deleted file mode 100644 index fd3e56a83fc2..000000000000 --- a/tools/lib/lockdep/uinclude/linux/compiler.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_ -#define _LIBLOCKDEP_LINUX_COMPILER_H_ - -#define __used __attribute__((__unused__)) -#define unlikely -#define READ_ONCE(x) (x) -#define WRITE_ONCE(x, val) x=(val) -#define RCU_INIT_POINTER(p, v) p=(v) - -#endif diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/delay.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/ftrace.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/gfp.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h deleted file mode 100644 index 0f8479858dc0..000000000000 --- a/tools/lib/lockdep/uinclude/linux/hash.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../include/linux/hash.h" diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/interrupt.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h deleted file mode 100644 index 276c7a8b2ed1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_ -#define _LIBLOCKDEP_LINUX_KERNEL_H_ - -#include <linux/export.h> -#include <linux/types.h> -#include <linux/rcu.h> -#include <linux/hardirq.h> -#include <linux/kern_levels.h> - -#ifndef container_of -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) - -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#define WARN_ON(x) (x) -#define WARN_ON_ONCE(x) (x) -#define likely(x) (x) -#define WARN(x, y...) (x) -#define uninitialized_var(x) x -#define __init -#define noinline -#define list_add_tail_rcu list_add_tail -#define list_for_each_entry_rcu list_for_each_entry -#define barrier() -#define synchronize_sched() - -#ifndef CALLER_ADDR0 -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#endif - -#ifndef _RET_IP_ -#define _RET_IP_ CALLER_ADDR0 -#endif - -#ifndef _THIS_IP_ -#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -#endif - -#endif diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/linkage.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h deleted file mode 100644 index 6e9ef31ed82e..000000000000 --- a/tools/lib/lockdep/uinclude/linux/list.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../include/linux/list.h" diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/mutex.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h deleted file mode 100644 index 0c27bdf14233..000000000000 --- a/tools/lib/lockdep/uinclude/linux/poison.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../include/linux/poison.h" diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h deleted file mode 100644 index d73fe6f850ac..000000000000 --- a/tools/lib/lockdep/uinclude/linux/prefetch.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_ -#define _LIBLOCKDEP_LINUX_PREFETCH_H - -static inline void prefetch(void *a __attribute__((unused))) { } - -#endif diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/proc_fs.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h deleted file mode 100644 index c3759477379c..000000000000 --- a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#define __always_inline -#include "../../../include/linux/rbtree_augmented.h" diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/linux/seq_file.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h deleted file mode 100644 index 68c1aa2bcba5..000000000000 --- a/tools/lib/lockdep/uinclude/linux/spinlock.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _LIBLOCKDEP_SPINLOCK_H_ -#define _LIBLOCKDEP_SPINLOCK_H_ - -#include <pthread.h> -#include <stdbool.h> - -#define arch_spinlock_t pthread_mutex_t -#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER - -static inline void arch_spin_lock(arch_spinlock_t *mutex) -{ - pthread_mutex_lock(mutex); -} - -static inline void arch_spin_unlock(arch_spinlock_t *mutex) -{ - pthread_mutex_unlock(mutex); -} - -static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) -{ - return true; -} - -#endif diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h deleted file mode 100644 index 05dfcd1ac118..000000000000 --- a/tools/lib/lockdep/uinclude/linux/stringify.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_ -#define _LIBLOCKDEP_LINUX_STRINGIFY_H_ - -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#endif diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h deleted file mode 100644 index fab00ff936d1..000000000000 --- a/tools/lib/lockdep/uinclude/trace/events/lock.h +++ /dev/null @@ -1,3 +0,0 @@ - -/* empty file */ - diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index ad572e6cdbd0..422d9abd666a 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -159,8 +159,8 @@ static void put_log_buff(char *buff) free(buff); } -static unsigned int get_last_jit_image(char *haystack, size_t hlen, - uint8_t *image, size_t ilen) +static uint8_t *get_last_jit_image(char *haystack, size_t hlen, + unsigned int *ilen) { char *ptr, *pptr, *tmp; off_t off = 0; @@ -168,9 +168,10 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen, regmatch_t pmatch[1]; unsigned long base; regex_t regex; + uint8_t *image; if (hlen == 0) - return 0; + return NULL; ret = regcomp(®ex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ " "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED); @@ -194,11 +195,22 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen, &flen, &proglen, &pass, &base); if (ret != 4) { regfree(®ex); - return 0; + return NULL; + } + if (proglen > 1000000) { + printf("proglen of %d too big, stopping\n", proglen); + return NULL; } + image = malloc(proglen); + if (!image) { + printf("Out of memory\n"); + return NULL; + } + memset(image, 0, proglen); + tmp = ptr = haystack + off; - while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) { + while ((ptr = strtok(tmp, "\n")) != NULL && ulen < proglen) { tmp = NULL; if (!strstr(ptr, "JIT code")) continue; @@ -208,10 +220,12 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen, ptr = pptr; do { image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16); - if (ptr == pptr || ulen >= ilen) { + if (ptr == pptr) { ulen--; break; } + if (ulen >= proglen) + break; ptr = pptr; } while (1); } @@ -222,7 +236,8 @@ static unsigned int get_last_jit_image(char *haystack, size_t hlen, printf("%lx + <x>:\n", base); regfree(®ex); - return ulen; + *ilen = ulen; + return image; } static void usage(void) @@ -237,12 +252,12 @@ static void usage(void) int main(int argc, char **argv) { unsigned int len, klen, opt, opcodes = 0; - static uint8_t image[32768]; char *kbuff, *file = NULL; char *ofile = NULL; int ofd; ssize_t nr; uint8_t *pos; + uint8_t *image = NULL; while ((opt = getopt(argc, argv, "of:O:")) != -1) { switch (opt) { @@ -262,7 +277,6 @@ int main(int argc, char **argv) } bfd_init(); - memset(image, 0, sizeof(image)); kbuff = get_log_buff(file, &klen); if (!kbuff) { @@ -270,8 +284,8 @@ int main(int argc, char **argv) return -1; } - len = get_last_jit_image(kbuff, klen, image, sizeof(image)); - if (len <= 0) { + image = get_last_jit_image(kbuff, klen, &len); + if (!image) { fprintf(stderr, "No JIT image found!\n"); goto done; } @@ -301,5 +315,6 @@ int main(int argc, char **argv) done: put_log_buff(kbuff); + free(image); return 0; } diff --git a/tools/objtool/Build b/tools/objtool/Build index d6cdece5e58b..6f2e1987c4d9 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -1,5 +1,6 @@ objtool-y += arch/$(SRCARCH)/ objtool-y += builtin-check.o +objtool-y += check.o objtool-y += elf.o objtool-y += special.o objtool-y += objtool.o diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 55a60d331f47..17c1195f11f4 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -127,28 +127,13 @@ b) 100% reliable stack traces for DWARF enabled kernels c) Higher live patching compatibility rate - (NOTE: This is not yet implemented) - - Currently with CONFIG_LIVEPATCH there's a basic live patching - framework which is safe for roughly 85-90% of "security" fixes. But - patches can't have complex features like function dependency or - prototype changes, or data structure changes. - - There's a strong need to support patches which have the more complex - features so that the patch compatibility rate for security fixes can - eventually approach something resembling 100%. To achieve that, a - "consistency model" is needed, which allows tasks to be safely - transitioned from an unpatched state to a patched state. - - One of the key requirements of the currently proposed livepatch - consistency model [*] is that it needs to walk the stack of each - sleeping task to determine if it can be transitioned to the patched - state. If objtool can ensure that stack traces are reliable, this - consistency model can be used and the live patching compatibility - rate can be improved significantly. - - [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com + Livepatch has an optional "consistency model", which is needed for + more complex patches. In order for the consistency model to work, + stack traces need to be reliable (or an unreliable condition needs to + be detectable). Objtool makes that possible. + For more details, see the livepatch documentation in the Linux kernel + source tree at Documentation/livepatch/livepatch.txt. Rules ----- @@ -201,80 +186,84 @@ To achieve the validation, objtool enforces the following rules: return normally. -Errors in .S files ------------------- +Objtool warnings +---------------- -If you're getting an error in a compiled .S file which you don't -understand, first make sure that the affected code follows the above -rules. +For asm files, if you're getting an error which doesn't make sense, +first make sure that the affected code follows the above rules. + +For C files, the common culprits are inline asm statements and calls to +"noreturn" functions. See below for more details. + +Another possible cause for errors in C code is if the Makefile removes +-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. Here are some examples of common warnings reported by objtool, what they mean, and suggestions for how to fix them. -1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup +1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup The func() function made a function call without first saving and/or - updating the frame pointer. - - If func() is indeed a callable function, add proper frame pointer - logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove - its ELF function annotation by changing ENDPROC to END. + updating the frame pointer, and CONFIG_FRAME_POINTER is enabled. - If you're getting this error in a .c file, see the "Errors in .c - files" section. + If the error is for an asm file, and func() is indeed a callable + function, add proper frame pointer logic using the FRAME_BEGIN and + FRAME_END macros. Otherwise, if it's not a callable function, remove + its ELF function annotation by changing ENDPROC to END, and instead + use the manual CFI hint macros in asm/undwarf.h. + If it's a GCC-compiled .c file, the error may be because the function + uses an inline asm() statement which has a "call" instruction. An + asm() statement with a call instruction must declare the use of the + stack pointer in its output operand. For example, on x86_64: -2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function - - A return instruction was detected, but objtool couldn't find a way - for a callable function to reach the instruction. + register void *__sp asm("rsp"); + asm volatile("call func" : "+r" (__sp)); - If the return instruction is inside (or reachable from) a callable - function, the function needs to be annotated with the ENTRY/ENDPROC - macros. + Otherwise the stack frame may not get created before the call. - If you _really_ need a return instruction outside of a function, and - are 100% sure that it won't affect stack traces, you can tell - objtool to ignore it. See the "Adding exceptions" section below. +2. file.o: warning: objtool: .text+0x53: unreachable instruction -3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction + Objtool couldn't find a code path to reach the instruction. - The instruction lives inside of a callable function, but there's no - possible control flow path from the beginning of the function to the - instruction. + If the error is for an asm file, and the instruction is inside (or + reachable from) a callable function, the function should be annotated + with the ENTRY/ENDPROC macros (ENDPROC is the important one). + Otherwise, the code should probably be annotated with the CFI hint + macros in asm/undwarf.h so objtool and the unwinder can know the + stack state associated with the code. - If the instruction is actually needed, and it's actually in a - callable function, ensure that its function is properly annotated - with ENTRY/ENDPROC. + If you're 100% sure the code won't affect stack traces, or if you're + a just a bad person, you can tell objtool to ignore it. See the + "Adding exceptions" section below. If it's not actually in a callable function (e.g. kernel entry code), change ENDPROC to END. -4. asm_file.o: warning: objtool: func(): can't find starting instruction +4. file.o: warning: objtool: func(): can't find starting instruction or - asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction + file.o: warning: objtool: func()+0x11dd: can't decode instruction - Did you put data in a text section? If so, that can confuse + Does the file have data in a text section? If so, that can confuse objtool's instruction decoder. Move the data to a more appropriate section like .data or .rodata. -5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction - - This is a kernel entry/exit instruction like sysenter or sysret. - Such instructions aren't allowed in a callable function, and are most - likely part of the kernel entry code. +5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function - If the instruction isn't actually in a callable function, change - ENDPROC to END. + This is a kernel entry/exit instruction like sysenter or iret. Such + instructions aren't allowed in a callable function, and are most + likely part of the kernel entry code. They should usually not have + the callable function annotation (ENDPROC) and should always be + annotated with the CFI hint macros in asm/undwarf.h. -6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer +6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame - This is a dynamic jump or a jump to an undefined symbol. Stacktool + This is a dynamic jump or a jump to an undefined symbol. Objtool assumed it's a sibling call and detected that the frame pointer wasn't first restored to its original state. @@ -282,24 +271,28 @@ they mean, and suggestions for how to fix them. destination code to the local file. If the instruction is not actually in a callable function (e.g. - kernel entry code), change ENDPROC to END. + kernel entry code), change ENDPROC to END and annotate manually with + the CFI hint macros in asm/undwarf.h. -7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch +7. file: warning: objtool: func()+0x5c: stack state mismatch The instruction's frame pointer state is inconsistent, depending on which execution path was taken to reach the instruction. - Make sure the function pushes and sets up the frame pointer (for - x86_64, this means rbp) at the beginning of the function and pops it - at the end of the function. Also make sure that no other code in the - function touches the frame pointer. + Make sure that, when CONFIG_FRAME_POINTER is enabled, the function + pushes and sets up the frame pointer (for x86_64, this means rbp) at + the beginning of the function and pops it at the end of the function. + Also make sure that no other code in the function touches the frame + pointer. + Another possibility is that the code has some asm or inline asm which + does some unusual things to the stack or the frame pointer. In such + cases it's probably appropriate to use the CFI hint macros in + asm/undwarf.h. -Errors in .c files ------------------- -1. c_file.o: warning: objtool: funcA() falls through to next function funcB() +8. file.o: warning: objtool: funcA() falls through to next function funcB() This means that funcA() doesn't end with a return instruction or an unconditional jump, and that objtool has determined that the function @@ -318,22 +311,6 @@ Errors in .c files might be corrupt due to a gcc bug. For more details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 -2. If you're getting any other objtool error in a compiled .c file, it - may be because the file uses an asm() statement which has a "call" - instruction. An asm() statement with a call instruction must declare - the use of the stack pointer in its output operand. For example, on - x86_64: - - register void *__sp asm("rsp"); - asm volatile("call func" : "+r" (__sp)); - - Otherwise the stack frame may not get created before the call. - -3. Another possible cause for errors in C code is if the Makefile removes - -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options. - -Also see the above section for .S file errors for more information what -the individual error messages mean. If the error doesn't seem to make sense, it could be a bug in objtool. Feel free to ask the objtool maintainer for help. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 27e019c09bd2..0e2765e243c0 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -25,7 +25,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi -CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) +CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES) LDFLAGS += -lelf $(LIBSUBCMD) # Allow old libelf to be used: diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index a59e061c0b4a..21aeca874edb 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -19,25 +19,63 @@ #define _ARCH_H #include <stdbool.h> +#include <linux/list.h> #include "elf.h" +#include "cfi.h" -#define INSN_FP_SAVE 1 -#define INSN_FP_SETUP 2 -#define INSN_FP_RESTORE 3 -#define INSN_JUMP_CONDITIONAL 4 -#define INSN_JUMP_UNCONDITIONAL 5 -#define INSN_JUMP_DYNAMIC 6 -#define INSN_CALL 7 -#define INSN_CALL_DYNAMIC 8 -#define INSN_RETURN 9 -#define INSN_CONTEXT_SWITCH 10 -#define INSN_NOP 11 -#define INSN_OTHER 12 +#define INSN_JUMP_CONDITIONAL 1 +#define INSN_JUMP_UNCONDITIONAL 2 +#define INSN_JUMP_DYNAMIC 3 +#define INSN_CALL 4 +#define INSN_CALL_DYNAMIC 5 +#define INSN_RETURN 6 +#define INSN_CONTEXT_SWITCH 7 +#define INSN_STACK 8 +#define INSN_NOP 9 +#define INSN_OTHER 10 #define INSN_LAST INSN_OTHER +enum op_dest_type { + OP_DEST_REG, + OP_DEST_REG_INDIRECT, + OP_DEST_MEM, + OP_DEST_PUSH, + OP_DEST_LEAVE, +}; + +struct op_dest { + enum op_dest_type type; + unsigned char reg; + int offset; +}; + +enum op_src_type { + OP_SRC_REG, + OP_SRC_REG_INDIRECT, + OP_SRC_CONST, + OP_SRC_POP, + OP_SRC_ADD, + OP_SRC_AND, +}; + +struct op_src { + enum op_src_type type; + unsigned char reg; + int offset; +}; + +struct stack_op { + struct op_dest dest; + struct op_src src; +}; + +void arch_initial_func_cfi_state(struct cfi_state *state); + int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, unsigned char *type, - unsigned long *displacement); + unsigned long *immediate, struct stack_op *op); + +bool arch_callee_saved_reg(unsigned char reg); #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 6ac99e3266eb..a36c2eba64e7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -27,6 +27,17 @@ #include "../../arch.h" #include "../../warn.h" +static unsigned char op_to_cfi_reg[][2] = { + {CFI_AX, CFI_R8}, + {CFI_CX, CFI_R9}, + {CFI_DX, CFI_R10}, + {CFI_BX, CFI_R11}, + {CFI_SP, CFI_R12}, + {CFI_BP, CFI_R13}, + {CFI_SI, CFI_R14}, + {CFI_DI, CFI_R15}, +}; + static int is_x86_64(struct elf *elf) { switch (elf->ehdr.e_machine) { @@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf) } } +bool arch_callee_saved_reg(unsigned char reg) +{ + switch (reg) { + case CFI_BP: + case CFI_BX: + case CFI_R12: + case CFI_R13: + case CFI_R14: + case CFI_R15: + return true; + + case CFI_AX: + case CFI_CX: + case CFI_DX: + case CFI_SI: + case CFI_DI: + case CFI_SP: + case CFI_R8: + case CFI_R9: + case CFI_R10: + case CFI_R11: + case CFI_RA: + default: + return false; + } +} + int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, unsigned char *type, - unsigned long *immediate) + unsigned long *immediate, struct stack_op *op) { struct insn insn; - int x86_64; - unsigned char op1, op2, ext; + int x86_64, sign; + unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, + modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, + sib = 0; x86_64 = is_x86_64(elf); if (x86_64 == -1) return -1; - insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64); + insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64); insn_get_length(&insn); - insn_get_opcode(&insn); - insn_get_modrm(&insn); - insn_get_immediate(&insn); if (!insn_complete(&insn)) { WARN_FUNC("can't decode instruction", sec, offset); @@ -73,67 +110,323 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + if (insn.rex_prefix.nbytes) { + rex = insn.rex_prefix.bytes[0]; + rex_w = X86_REX_W(rex) >> 3; + rex_r = X86_REX_R(rex) >> 2; + rex_b = X86_REX_B(rex); + } + + if (insn.modrm.nbytes) { + modrm = insn.modrm.bytes[0]; + modrm_mod = X86_MODRM_MOD(modrm); + modrm_reg = X86_MODRM_REG(modrm); + modrm_rm = X86_MODRM_RM(modrm); + } + + if (insn.sib.nbytes) + sib = insn.sib.bytes[0]; + switch (op1) { - case 0x55: - if (!insn.rex_prefix.nbytes) - /* push rbp */ - *type = INSN_FP_SAVE; + + case 0x1: + case 0x29: + if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + + /* add/sub reg, %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_SRC_REG; + op->dest.reg = CFI_SP; + } + break; + + case 0x50 ... 0x57: + + /* push reg */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.type = OP_DEST_PUSH; + break; - case 0x5d: - if (!insn.rex_prefix.nbytes) - /* pop rbp */ - *type = INSN_FP_RESTORE; + case 0x58 ... 0x5f: + + /* pop reg */ + *type = INSN_STACK; + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + + break; + + case 0x68: + case 0x6a: + /* push immediate */ + *type = INSN_STACK; + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; break; case 0x70 ... 0x7f: *type = INSN_JUMP_CONDITIONAL; break; + case 0x81: + case 0x83: + if (rex != 0x48) + break; + + if (modrm == 0xe4) { + /* and imm, %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_AND; + op->src.reg = CFI_SP; + op->src.offset = insn.immediate.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + if (modrm == 0xc4) + sign = 1; + else if (modrm == 0xec) + sign = -1; + else + break; + + /* add/sub imm, %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = insn.immediate.value * sign; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + case 0x89: - if (insn.rex_prefix.nbytes == 1 && - insn.rex_prefix.bytes[0] == 0x48 && - insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5) - /* mov rsp, rbp */ - *type = INSN_FP_SETUP; + if (rex == 0x48 && modrm == 0xe5) { + + /* mov %rsp, %rbp */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + break; + } + /* fallthrough */ + case 0x88: + if (!rex_b && + (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { + + /* mov reg, disp(%rbp) */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_BP; + op->dest.offset = insn.displacement.value; + + } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { + + /* mov reg, disp(%rsp) */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_SP; + op->dest.offset = insn.displacement.value; + } + + break; + + case 0x8b: + if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { + + /* mov disp(%rbp), reg */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_BP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + + } else if (rex_w && !rex_b && sib == 0x24 && + modrm_mod != 3 && modrm_rm == 4) { + + /* mov disp(%rsp), reg */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_SP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + } + break; case 0x8d: - if (insn.rex_prefix.nbytes && - insn.rex_prefix.bytes[0] == 0x48 && - insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c && - insn.sib.nbytes && insn.sib.bytes[0] == 0x24) - /* lea %(rsp), %rbp */ - *type = INSN_FP_SETUP; + if (rex == 0x48 && modrm == 0x65) { + + /* lea -disp(%rbp), %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_BP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + if (rex == 0x4c && modrm == 0x54 && sib == 0x24 && + insn.displacement.value == 8) { + + /* + * lea 0x8(%rsp), %r10 + * + * Here r10 is the "drap" pointer, used as a stack + * pointer helper when the stack gets realigned. + */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = 8; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_R10; + break; + } + + if (rex == 0x4c && modrm == 0x6c && sib == 0x24 && + insn.displacement.value == 16) { + + /* + * lea 0x10(%rsp), %r13 + * + * Here r13 is the "drap" pointer, used as a stack + * pointer helper when the stack gets realigned. + */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = 16; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_R13; + break; + } + + if (rex == 0x49 && modrm == 0x62 && + insn.displacement.value == -8) { + + /* + * lea -0x8(%r10), %rsp + * + * Restoring rsp back to its original value after a + * stack realignment. + */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_R10; + op->src.offset = -8; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + if (rex == 0x49 && modrm == 0x65 && + insn.displacement.value == -16) { + + /* + * lea -0x10(%r13), %rsp + * + * Restoring rsp back to its original value after a + * stack realignment. + */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_R13; + op->src.offset = -16; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + break; + + case 0x8f: + /* pop to mem */ + *type = INSN_STACK; + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_MEM; break; case 0x90: *type = INSN_NOP; break; + case 0x9c: + /* pushf */ + *type = INSN_STACK; + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + break; + + case 0x9d: + /* popf */ + *type = INSN_STACK; + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_MEM; + break; + case 0x0f: + if (op2 >= 0x80 && op2 <= 0x8f) *type = INSN_JUMP_CONDITIONAL; else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || op2 == 0x35) + /* sysenter, sysret */ *type = INSN_CONTEXT_SWITCH; + else if (op2 == 0x0d || op2 == 0x1f) + /* nopl/nopw */ *type = INSN_NOP; - else if (op2 == 0x01 && insn.modrm.nbytes && - (insn.modrm.bytes[0] == 0xc2 || - insn.modrm.bytes[0] == 0xd8)) - /* vmlaunch, vmrun */ - *type = INSN_CONTEXT_SWITCH; + + else if (op2 == 0xa0 || op2 == 0xa8) { + + /* push fs/gs */ + *type = INSN_STACK; + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + + } else if (op2 == 0xa1 || op2 == 0xa9) { + + /* pop fs/gs */ + *type = INSN_STACK; + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_MEM; + } break; - case 0xc9: /* leave */ - *type = INSN_FP_RESTORE; + case 0xc9: + /* + * leave + * + * equivalent to: + * mov bp, sp + * pop bp + */ + *type = INSN_STACK; + op->dest.type = OP_DEST_LEAVE; + break; - case 0xe3: /* jecxz/jrcxz */ + case 0xe3: + /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; break; @@ -158,14 +451,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; case 0xff: - ext = X86_MODRM_REG(insn.modrm.bytes[0]); - if (ext == 2 || ext == 3) + if (modrm_reg == 2 || modrm_reg == 3) + *type = INSN_CALL_DYNAMIC; - else if (ext == 4) + + else if (modrm_reg == 4) + *type = INSN_JUMP_DYNAMIC; - else if (ext == 5) /*jmpf */ + + else if (modrm_reg == 5) + + /* jmpf */ *type = INSN_CONTEXT_SWITCH; + else if (modrm_reg == 6) { + + /* push from mem */ + *type = INSN_STACK; + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + } + break; default: @@ -176,3 +482,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, return 0; } + +void arch_initial_func_cfi_state(struct cfi_state *state) +{ + int i; + + for (i = 0; i < CFI_NUM_REGS; i++) { + state->regs[i].base = CFI_UNDEFINED; + state->regs[i].offset = 0; + } + + /* initial CFA (call frame address) */ + state->cfa.base = CFI_SP; + state->cfa.offset = 8; + + /* initial RA (return address) */ + state->regs[16].base = CFI_CFA; + state->regs[16].offset = -8; +} diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt index 767be7c76034..12e377184ee4 100644 --- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -1009,7 +1009,7 @@ GrpTable: Grp15 1: fxstor | RDGSBASE Ry (F3),(11B) 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -4: XSAVE +4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) 7: clflush | clflushopt (66) | sfence (11B) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 282a60368b14..365c34ecab26 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com> + * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -25,1286 +25,32 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ -#include <string.h> -#include <stdlib.h> #include <subcmd/parse-options.h> - #include "builtin.h" -#include "elf.h" -#include "special.h" -#include "arch.h" -#include "warn.h" - -#include <linux/hashtable.h> -#include <linux/kernel.h> - -#define STATE_FP_SAVED 0x1 -#define STATE_FP_SETUP 0x2 -#define STATE_FENTRY 0x4 - -struct instruction { - struct list_head list; - struct hlist_node hash; - struct section *sec; - unsigned long offset; - unsigned int len, state; - unsigned char type; - unsigned long immediate; - bool alt_group, visited, dead_end; - struct symbol *call_dest; - struct instruction *jump_dest; - struct list_head alts; - struct symbol *func; -}; - -struct alternative { - struct list_head list; - struct instruction *insn; -}; - -struct objtool_file { - struct elf *elf; - struct list_head insn_list; - DECLARE_HASHTABLE(insn_hash, 16); - struct section *rodata, *whitelist; - bool ignore_unreachables, c_file; -}; - -const char *objname; -static bool nofp; - -static struct instruction *find_insn(struct objtool_file *file, - struct section *sec, unsigned long offset) -{ - struct instruction *insn; - - hash_for_each_possible(file->insn_hash, insn, hash, offset) - if (insn->sec == sec && insn->offset == offset) - return insn; - - return NULL; -} - -static struct instruction *next_insn_same_sec(struct objtool_file *file, - struct instruction *insn) -{ - struct instruction *next = list_next_entry(insn, list); - - if (&next->list == &file->insn_list || next->sec != insn->sec) - return NULL; - - return next; -} - -static bool gcov_enabled(struct objtool_file *file) -{ - struct section *sec; - struct symbol *sym; - - list_for_each_entry(sec, &file->elf->sections, list) - list_for_each_entry(sym, &sec->symbol_list, list) - if (!strncmp(sym->name, "__gcov_.", 8)) - return true; - - return false; -} - -#define for_each_insn(file, insn) \ - list_for_each_entry(insn, &file->insn_list, list) - -#define func_for_each_insn(file, func, insn) \ - for (insn = find_insn(file, func->sec, func->offset); \ - insn && &insn->list != &file->insn_list && \ - insn->sec == func->sec && \ - insn->offset < func->offset + func->len; \ - insn = list_next_entry(insn, list)) - -#define func_for_each_insn_continue_reverse(file, func, insn) \ - for (insn = list_prev_entry(insn, list); \ - &insn->list != &file->insn_list && \ - insn->sec == func->sec && insn->offset >= func->offset; \ - insn = list_prev_entry(insn, list)) - -#define sec_for_each_insn_from(file, insn) \ - for (; insn; insn = next_insn_same_sec(file, insn)) - - -/* - * Check if the function has been manually whitelisted with the - * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted - * due to its use of a context switching instruction. - */ -static bool ignore_func(struct objtool_file *file, struct symbol *func) -{ - struct rela *rela; - struct instruction *insn; - - /* check for STACK_FRAME_NON_STANDARD */ - if (file->whitelist && file->whitelist->rela) - list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { - if (rela->sym->type == STT_SECTION && - rela->sym->sec == func->sec && - rela->addend == func->offset) - return true; - if (rela->sym->type == STT_FUNC && rela->sym == func) - return true; - } - - /* check if it has a context switching instruction */ - func_for_each_insn(file, func, insn) - if (insn->type == INSN_CONTEXT_SWITCH) - return true; - - return false; -} - -/* - * This checks to see if the given function is a "noreturn" function. - * - * For global functions which are outside the scope of this object file, we - * have to keep a manual list of them. - * - * For local functions, we have to detect them manually by simply looking for - * the lack of a return instruction. - * - * Returns: - * -1: error - * 0: no dead end - * 1: dead end - */ -static int __dead_end_function(struct objtool_file *file, struct symbol *func, - int recursion) -{ - int i; - struct instruction *insn; - bool empty = true; - - /* - * Unfortunately these have to be hard coded because the noreturn - * attribute isn't provided in ELF data. - */ - static const char * const global_noreturns[] = { - "__stack_chk_fail", - "panic", - "do_exit", - "do_task_dead", - "__module_put_and_exit", - "complete_and_exit", - "kvm_spurious_fault", - "__reiserfs_panic", - "lbug_with_loc" - }; - - if (func->bind == STB_WEAK) - return 0; - - if (func->bind == STB_GLOBAL) - for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) - if (!strcmp(func->name, global_noreturns[i])) - return 1; - - if (!func->sec) - return 0; - - func_for_each_insn(file, func, insn) { - empty = false; - - if (insn->type == INSN_RETURN) - return 0; - } - - if (empty) - return 0; - - /* - * A function can have a sibling call instead of a return. In that - * case, the function's dead-end status depends on whether the target - * of the sibling call returns. - */ - func_for_each_insn(file, func, insn) { - if (insn->sec != func->sec || - insn->offset >= func->offset + func->len) - break; - - if (insn->type == INSN_JUMP_UNCONDITIONAL) { - struct instruction *dest = insn->jump_dest; - struct symbol *dest_func; - - if (!dest) - /* sibling call to another file */ - return 0; - - if (dest->sec != func->sec || - dest->offset < func->offset || - dest->offset >= func->offset + func->len) { - /* local sibling call */ - dest_func = find_symbol_by_offset(dest->sec, - dest->offset); - if (!dest_func) - continue; - - if (recursion == 5) { - WARN_FUNC("infinite recursion (objtool bug!)", - dest->sec, dest->offset); - return -1; - } - - return __dead_end_function(file, dest_func, - recursion + 1); - } - } - - if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) - /* sibling call */ - return 0; - } - - return 1; -} - -static int dead_end_function(struct objtool_file *file, struct symbol *func) -{ - return __dead_end_function(file, func, 0); -} - -/* - * Call the arch-specific instruction decoder for all the instructions and add - * them to the global instruction list. - */ -static int decode_instructions(struct objtool_file *file) -{ - struct section *sec; - struct symbol *func; - unsigned long offset; - struct instruction *insn; - int ret; - - list_for_each_entry(sec, &file->elf->sections, list) { - - if (!(sec->sh.sh_flags & SHF_EXECINSTR)) - continue; - - for (offset = 0; offset < sec->len; offset += insn->len) { - insn = malloc(sizeof(*insn)); - memset(insn, 0, sizeof(*insn)); - - INIT_LIST_HEAD(&insn->alts); - insn->sec = sec; - insn->offset = offset; - - ret = arch_decode_instruction(file->elf, sec, offset, - sec->len - offset, - &insn->len, &insn->type, - &insn->immediate); - if (ret) - return ret; - - if (!insn->type || insn->type > INSN_LAST) { - WARN_FUNC("invalid instruction type %d", - insn->sec, insn->offset, insn->type); - return -1; - } - - hash_add(file->insn_hash, &insn->hash, insn->offset); - list_add_tail(&insn->list, &file->insn_list); - } - - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - if (!find_insn(file, sec, func->offset)) { - WARN("%s(): can't find starting instruction", - func->name); - return -1; - } - - func_for_each_insn(file, func, insn) - if (!insn->func) - insn->func = func; - } - } - - return 0; -} - -/* - * Find all uses of the unreachable() macro, which are code path dead ends. - */ -static int add_dead_ends(struct objtool_file *file) -{ - struct section *sec; - struct rela *rela; - struct instruction *insn; - bool found; - - sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!sec) - return 0; - - list_for_each_entry(rela, &sec->rela_list, list) { - if (rela->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", sec->name); - return -1; - } - insn = find_insn(file, rela->sym->sec, rela->addend); - if (insn) - insn = list_prev_entry(insn, list); - else if (rela->addend == rela->sym->sec->len) { - found = false; - list_for_each_entry_reverse(insn, &file->insn_list, list) { - if (insn->sec == rela->sym->sec) { - found = true; - break; - } - } - - if (!found) { - WARN("can't find unreachable insn at %s+0x%x", - rela->sym->sec->name, rela->addend); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%x", - rela->sym->sec->name, rela->addend); - return -1; - } - - insn->dead_end = true; - } - - return 0; -} - -/* - * Warnings shouldn't be reported for ignored functions. - */ -static void add_ignores(struct objtool_file *file) -{ - struct instruction *insn; - struct section *sec; - struct symbol *func; - - list_for_each_entry(sec, &file->elf->sections, list) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - if (!ignore_func(file, func)) - continue; - - func_for_each_insn(file, func, insn) - insn->visited = true; - } - } -} - -/* - * Find the destination instructions for all jumps. - */ -static int add_jump_destinations(struct objtool_file *file) -{ - struct instruction *insn; - struct rela *rela; - struct section *dest_sec; - unsigned long dest_off; - - for_each_insn(file, insn) { - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) - continue; - - /* skip ignores */ - if (insn->visited) - continue; - - rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); - if (!rela) { - dest_sec = insn->sec; - dest_off = insn->offset + insn->len + insn->immediate; - } else if (rela->sym->type == STT_SECTION) { - dest_sec = rela->sym->sec; - dest_off = rela->addend + 4; - } else if (rela->sym->sec->idx) { - dest_sec = rela->sym->sec; - dest_off = rela->sym->sym.st_value + rela->addend + 4; - } else { - /* sibling call */ - insn->jump_dest = 0; - continue; - } - - insn->jump_dest = find_insn(file, dest_sec, dest_off); - if (!insn->jump_dest) { - - /* - * This is a special case where an alt instruction - * jumps past the end of the section. These are - * handled later in handle_group_alt(). - */ - if (!strcmp(insn->sec->name, ".altinstr_replacement")) - continue; - - WARN_FUNC("can't find jump dest instruction at %s+0x%lx", - insn->sec, insn->offset, dest_sec->name, - dest_off); - return -1; - } - } - - return 0; -} - -/* - * Find the destination instructions for all calls. - */ -static int add_call_destinations(struct objtool_file *file) -{ - struct instruction *insn; - unsigned long dest_off; - struct rela *rela; - - for_each_insn(file, insn) { - if (insn->type != INSN_CALL) - continue; - - rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); - if (!rela) { - dest_off = insn->offset + insn->len + insn->immediate; - insn->call_dest = find_symbol_by_offset(insn->sec, - dest_off); - if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at offset 0x%lx", - insn->sec, insn->offset, dest_off); - return -1; - } - } else if (rela->sym->type == STT_SECTION) { - insn->call_dest = find_symbol_by_offset(rela->sym->sec, - rela->addend+4); - if (!insn->call_dest || - insn->call_dest->type != STT_FUNC) { - WARN_FUNC("can't find call dest symbol at %s+0x%x", - insn->sec, insn->offset, - rela->sym->sec->name, - rela->addend + 4); - return -1; - } - } else - insn->call_dest = rela->sym; - } - - return 0; -} - -/* - * The .alternatives section requires some extra special care, over and above - * what other special sections require: - * - * 1. Because alternatives are patched in-place, we need to insert a fake jump - * instruction at the end so that validate_branch() skips all the original - * replaced instructions when validating the new instruction path. - * - * 2. An added wrinkle is that the new instruction length might be zero. In - * that case the old instructions are replaced with noops. We simulate that - * by creating a fake jump as the only new instruction. - * - * 3. In some cases, the alternative section includes an instruction which - * conditionally jumps to the _end_ of the entry. We have to modify these - * jumps' destinations to point back to .text rather than the end of the - * entry in .altinstr_replacement. - * - * 4. It has been requested that we don't validate the !POPCNT feature path - * which is a "very very small percentage of machines". - */ -static int handle_group_alt(struct objtool_file *file, - struct special_alt *special_alt, - struct instruction *orig_insn, - struct instruction **new_insn) -{ - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; - unsigned long dest_off; - - last_orig_insn = NULL; - insn = orig_insn; - sec_for_each_insn_from(file, insn) { - if (insn->offset >= special_alt->orig_off + special_alt->orig_len) - break; - - if (special_alt->skip_orig) - insn->type = INSN_NOP; - - insn->alt_group = true; - last_orig_insn = insn; - } - - if (!next_insn_same_sec(file, last_orig_insn)) { - WARN("%s: don't know how to handle alternatives at end of section", - special_alt->orig_sec->name); - return -1; - } - - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { - WARN("malloc failed"); - return -1; - } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = -1; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - - if (!special_alt->new_len) { - *new_insn = fake_jump; - return 0; - } - - last_new_insn = NULL; - insn = *new_insn; - sec_for_each_insn_from(file, insn) { - if (insn->offset >= special_alt->new_off + special_alt->new_len) - break; - - last_new_insn = insn; - - if (insn->type != INSN_JUMP_CONDITIONAL && - insn->type != INSN_JUMP_UNCONDITIONAL) - continue; - - if (!insn->immediate) - continue; - - dest_off = insn->offset + insn->len + insn->immediate; - if (dest_off == special_alt->new_off + special_alt->new_len) - insn->jump_dest = fake_jump; - - if (!insn->jump_dest) { - WARN_FUNC("can't find alternative jump destination", - insn->sec, insn->offset); - return -1; - } - } - - if (!last_new_insn) { - WARN_FUNC("can't find last new alternative instruction", - special_alt->new_sec, special_alt->new_off); - return -1; - } - - list_add(&fake_jump->list, &last_new_insn->list); - - return 0; -} - -/* - * A jump table entry can either convert a nop to a jump or a jump to a nop. - * If the original instruction is a jump, make the alt entry an effective nop - * by just skipping the original instruction. - */ -static int handle_jump_alt(struct objtool_file *file, - struct special_alt *special_alt, - struct instruction *orig_insn, - struct instruction **new_insn) -{ - if (orig_insn->type == INSN_NOP) - return 0; - - if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { - WARN_FUNC("unsupported instruction at jump label", - orig_insn->sec, orig_insn->offset); - return -1; - } - - *new_insn = list_next_entry(orig_insn, list); - return 0; -} - -/* - * Read all the special sections which have alternate instructions which can be - * patched in or redirected to at runtime. Each instruction having alternate - * instruction(s) has them added to its insn->alts list, which will be - * traversed in validate_branch(). - */ -static int add_special_section_alts(struct objtool_file *file) -{ - struct list_head special_alts; - struct instruction *orig_insn, *new_insn; - struct special_alt *special_alt, *tmp; - struct alternative *alt; - int ret; - - ret = special_get_alts(file->elf, &special_alts); - if (ret) - return ret; - - list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; - } - - orig_insn = find_insn(file, special_alt->orig_sec, - special_alt->orig_off); - if (!orig_insn) { - WARN_FUNC("special: can't find orig instruction", - special_alt->orig_sec, special_alt->orig_off); - ret = -1; - goto out; - } +#include "check.h" - new_insn = NULL; - if (!special_alt->group || special_alt->new_len) { - new_insn = find_insn(file, special_alt->new_sec, - special_alt->new_off); - if (!new_insn) { - WARN_FUNC("special: can't find new instruction", - special_alt->new_sec, - special_alt->new_off); - ret = -1; - goto out; - } - } +bool nofp; - if (special_alt->group) { - ret = handle_group_alt(file, special_alt, orig_insn, - &new_insn); - if (ret) - goto out; - } else if (special_alt->jump_or_nop) { - ret = handle_jump_alt(file, special_alt, orig_insn, - &new_insn); - if (ret) - goto out; - } - - alt->insn = new_insn; - list_add_tail(&alt->list, &orig_insn->alts); - - list_del(&special_alt->list); - free(special_alt); - } - -out: - return ret; -} - -static int add_switch_table(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct rela *table, - struct rela *next_table) -{ - struct rela *rela = table; - struct instruction *alt_insn; - struct alternative *alt; - - list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { - if (rela == next_table) - break; - - if (rela->sym->sec != insn->sec || - rela->addend <= func->offset || - rela->addend >= func->offset + func->len) - break; - - alt_insn = find_insn(file, insn->sec, rela->addend); - if (!alt_insn) { - WARN("%s: can't find instruction at %s+0x%x", - file->rodata->rela->name, insn->sec->name, - rela->addend); - return -1; - } - - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - return -1; - } - - alt->insn = alt_insn; - list_add_tail(&alt->list, &insn->alts); - } - - return 0; -} - -/* - * find_switch_table() - Given a dynamic jump, find the switch jump table in - * .rodata associated with it. - * - * There are 3 basic patterns: - * - * 1. jmpq *[rodata addr](,%reg,8) - * - * This is the most common case by far. It jumps to an address in a simple - * jump table which is stored in .rodata. - * - * 2. jmpq *[rodata addr](%rip) - * - * This is caused by a rare GCC quirk, currently only seen in three driver - * functions in the kernel, only with certain obscure non-distro configs. - * - * As part of an optimization, GCC makes a copy of an existing switch jump - * table, modifies it, and then hard-codes the jump (albeit with an indirect - * jump) to use a single entry in the table. The rest of the jump table and - * some of its jump targets remain as dead code. - * - * In such a case we can just crudely ignore all unreachable instruction - * warnings for the entire object file. Ideally we would just ignore them - * for the function, but that would require redesigning the code quite a - * bit. And honestly that's just not worth doing: unreachable instruction - * warnings are of questionable value anyway, and this is such a rare issue. - * - * 3. mov [rodata addr],%reg1 - * ... some instructions ... - * jmpq *(%reg1,%reg2,8) - * - * This is a fairly uncommon pattern which is new for GCC 6. As of this - * writing, there are 11 occurrences of it in the allmodconfig kernel. - * - * TODO: Once we have DWARF CFI and smarter instruction decoding logic, - * ensure the same register is used in the mov and jump instructions. - */ -static struct rela *find_switch_table(struct objtool_file *file, - struct symbol *func, - struct instruction *insn) -{ - struct rela *text_rela, *rodata_rela; - struct instruction *orig_insn = insn; - - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) { - /* case 1 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); - if (rodata_rela) - return rodata_rela; - - /* case 2 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); - if (!rodata_rela) - return NULL; - file->ignore_unreachables = true; - return rodata_rela; - } - - /* case 3 */ - func_for_each_insn_continue_reverse(file, func, insn) { - if (insn->type == INSN_JUMP_DYNAMIC) - break; - - /* allow small jumps within the range */ - if (insn->type == INSN_JUMP_UNCONDITIONAL && - insn->jump_dest && - (insn->jump_dest->offset <= insn->offset || - insn->jump_dest->offset > orig_insn->offset)) - break; - - /* look for a relocation which references .rodata */ - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, - insn->len); - if (!text_rela || text_rela->sym != file->rodata->sym) - continue; - - /* - * Make sure the .rodata address isn't associated with a - * symbol. gcc jump tables are anonymous data. - */ - if (find_symbol_containing(file->rodata, text_rela->addend)) - continue; - - return find_rela_by_dest(file->rodata, text_rela->addend); - } - - return NULL; -} - -static int add_func_switch_tables(struct objtool_file *file, - struct symbol *func) -{ - struct instruction *insn, *prev_jump = NULL; - struct rela *rela, *prev_rela = NULL; - int ret; - - func_for_each_insn(file, func, insn) { - if (insn->type != INSN_JUMP_DYNAMIC) - continue; - - rela = find_switch_table(file, func, insn); - if (!rela) - continue; - - /* - * We found a switch table, but we don't know yet how big it - * is. Don't add it until we reach the end of the function or - * the beginning of another switch table in the same function. - */ - if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, - rela); - if (ret) - return ret; - } - - prev_jump = insn; - prev_rela = rela; - } - - if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); - if (ret) - return ret; - } - - return 0; -} - -/* - * For some switch statements, gcc generates a jump table in the .rodata - * section which contains a list of addresses within the function to jump to. - * This finds these jump tables and adds them to the insn->alts lists. - */ -static int add_switch_table_alts(struct objtool_file *file) -{ - struct section *sec; - struct symbol *func; - int ret; - - if (!file->rodata || !file->rodata->rela) - return 0; - - list_for_each_entry(sec, &file->elf->sections, list) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - ret = add_func_switch_tables(file, func); - if (ret) - return ret; - } - } - - return 0; -} - -static int decode_sections(struct objtool_file *file) -{ - int ret; - - ret = decode_instructions(file); - if (ret) - return ret; - - ret = add_dead_ends(file); - if (ret) - return ret; - - add_ignores(file); - - ret = add_jump_destinations(file); - if (ret) - return ret; - - ret = add_call_destinations(file); - if (ret) - return ret; - - ret = add_special_section_alts(file); - if (ret) - return ret; - - ret = add_switch_table_alts(file); - if (ret) - return ret; - - return 0; -} - -static bool is_fentry_call(struct instruction *insn) -{ - if (insn->type == INSN_CALL && - insn->call_dest->type == STT_NOTYPE && - !strcmp(insn->call_dest->name, "__fentry__")) - return true; - - return false; -} - -static bool has_modified_stack_frame(struct instruction *insn) -{ - return (insn->state & STATE_FP_SAVED) || - (insn->state & STATE_FP_SETUP); -} - -static bool has_valid_stack_frame(struct instruction *insn) -{ - return (insn->state & STATE_FP_SAVED) && - (insn->state & STATE_FP_SETUP); -} - -static unsigned int frame_state(unsigned long state) -{ - return (state & (STATE_FP_SAVED | STATE_FP_SETUP)); -} - -/* - * Follow the branch starting at the given instruction, and recursively follow - * any other branches (jumps). Meanwhile, track the frame pointer state at - * each instruction and validate all the rules described in - * tools/objtool/Documentation/stack-validation.txt. - */ -static int validate_branch(struct objtool_file *file, - struct instruction *first, unsigned char first_state) -{ - struct alternative *alt; - struct instruction *insn; - struct section *sec; - struct symbol *func = NULL; - unsigned char state; - int ret; - - insn = first; - sec = insn->sec; - state = first_state; - - if (insn->alt_group && list_empty(&insn->alts)) { - WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", - sec, insn->offset); - return 1; - } - - while (1) { - if (file->c_file && insn->func) { - if (func && func != insn->func) { - WARN("%s() falls through to next function %s()", - func->name, insn->func->name); - return 1; - } - - func = insn->func; - } - - if (insn->visited) { - if (frame_state(insn->state) != frame_state(state)) { - WARN_FUNC("frame pointer state mismatch", - sec, insn->offset); - return 1; - } - - return 0; - } - - insn->visited = true; - insn->state = state; - - list_for_each_entry(alt, &insn->alts, list) { - ret = validate_branch(file, alt->insn, state); - if (ret) - return 1; - } - - switch (insn->type) { - - case INSN_FP_SAVE: - if (!nofp) { - if (state & STATE_FP_SAVED) { - WARN_FUNC("duplicate frame pointer save", - sec, insn->offset); - return 1; - } - state |= STATE_FP_SAVED; - } - break; - - case INSN_FP_SETUP: - if (!nofp) { - if (state & STATE_FP_SETUP) { - WARN_FUNC("duplicate frame pointer setup", - sec, insn->offset); - return 1; - } - state |= STATE_FP_SETUP; - } - break; - - case INSN_FP_RESTORE: - if (!nofp) { - if (has_valid_stack_frame(insn)) - state &= ~STATE_FP_SETUP; - - state &= ~STATE_FP_SAVED; - } - break; - - case INSN_RETURN: - if (!nofp && has_modified_stack_frame(insn)) { - WARN_FUNC("return without frame pointer restore", - sec, insn->offset); - return 1; - } - return 0; - - case INSN_CALL: - if (is_fentry_call(insn)) { - state |= STATE_FENTRY; - break; - } - - ret = dead_end_function(file, insn->call_dest); - if (ret == 1) - return 0; - if (ret == -1) - return 1; - - /* fallthrough */ - case INSN_CALL_DYNAMIC: - if (!nofp && !has_valid_stack_frame(insn)) { - WARN_FUNC("call without frame pointer save/setup", - sec, insn->offset); - return 1; - } - break; - - case INSN_JUMP_CONDITIONAL: - case INSN_JUMP_UNCONDITIONAL: - if (insn->jump_dest) { - ret = validate_branch(file, insn->jump_dest, - state); - if (ret) - return 1; - } else if (has_modified_stack_frame(insn)) { - WARN_FUNC("sibling call from callable instruction with changed frame pointer", - sec, insn->offset); - return 1; - } /* else it's a sibling call */ - - if (insn->type == INSN_JUMP_UNCONDITIONAL) - return 0; - - break; - - case INSN_JUMP_DYNAMIC: - if (list_empty(&insn->alts) && - has_modified_stack_frame(insn)) { - WARN_FUNC("sibling call from callable instruction with changed frame pointer", - sec, insn->offset); - return 1; - } - - return 0; - - default: - break; - } - - if (insn->dead_end) - return 0; - - insn = next_insn_same_sec(file, insn); - if (!insn) { - WARN("%s: unexpected end of section", sec->name); - return 1; - } - } - - return 0; -} - -static bool is_kasan_insn(struct instruction *insn) -{ - return (insn->type == INSN_CALL && - !strcmp(insn->call_dest->name, "__asan_handle_no_return")); -} - -static bool is_ubsan_insn(struct instruction *insn) -{ - return (insn->type == INSN_CALL && - !strcmp(insn->call_dest->name, - "__ubsan_handle_builtin_unreachable")); -} - -static bool ignore_unreachable_insn(struct symbol *func, - struct instruction *insn) -{ - int i; - - if (insn->type == INSN_NOP) - return true; - - /* - * Check if this (or a subsequent) instruction is related to - * CONFIG_UBSAN or CONFIG_KASAN. - * - * End the search at 5 instructions to avoid going into the weeds. - */ - for (i = 0; i < 5; i++) { - - if (is_kasan_insn(insn) || is_ubsan_insn(insn)) - return true; - - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { - insn = insn->jump_dest; - continue; - } - - if (insn->offset + insn->len >= func->offset + func->len) - break; - insn = list_next_entry(insn, list); - } - - return false; -} - -static int validate_functions(struct objtool_file *file) -{ - struct section *sec; - struct symbol *func; - struct instruction *insn; - int ret, warnings = 0; - - list_for_each_entry(sec, &file->elf->sections, list) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - insn = find_insn(file, sec, func->offset); - if (!insn) - continue; - - ret = validate_branch(file, insn, 0); - warnings += ret; - } - } - - list_for_each_entry(sec, &file->elf->sections, list) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) - continue; - - func_for_each_insn(file, func, insn) { - if (insn->visited) - continue; - - insn->visited = true; - - if (file->ignore_unreachables || warnings || - ignore_unreachable_insn(func, insn)) - continue; - - /* - * gcov produces a lot of unreachable - * instructions. If we get an unreachable - * warning and the file has gcov enabled, just - * ignore it, and all other such warnings for - * the file. - */ - if (!file->ignore_unreachables && - gcov_enabled(file)) { - file->ignore_unreachables = true; - continue; - } - - WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset); - warnings++; - } - } - } - - return warnings; -} - -static int validate_uncallable_instructions(struct objtool_file *file) -{ - struct instruction *insn; - int warnings = 0; - - for_each_insn(file, insn) { - if (!insn->visited && insn->type == INSN_RETURN) { - WARN_FUNC("return instruction outside of a callable function", - insn->sec, insn->offset); - warnings++; - } - } - - return warnings; -} - -static void cleanup(struct objtool_file *file) -{ - struct instruction *insn, *tmpinsn; - struct alternative *alt, *tmpalt; - - list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { - list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { - list_del(&alt->list); - free(alt); - } - list_del(&insn->list); - hash_del(&insn->hash); - free(insn); - } - elf_close(file->elf); -} - -const char * const check_usage[] = { +static const char * const check_usage[] = { "objtool check [<options>] file.o", NULL, }; +const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), + OPT_END(), +}; + int cmd_check(int argc, const char **argv) { - struct objtool_file file; - int ret, warnings = 0; - - const struct option options[] = { - OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), - OPT_END(), - }; + const char *objname; - argc = parse_options(argc, argv, options, check_usage, 0); + argc = parse_options(argc, argv, check_options, check_usage, 0); if (argc != 1) - usage_with_options(check_usage, options); + usage_with_options(check_usage, check_options); objname = argv[0]; - file.elf = elf_open(objname); - if (!file.elf) { - fprintf(stderr, "error reading elf file %s\n", objname); - return 1; - } - - INIT_LIST_HEAD(&file.insn_list); - hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); - file.rodata = find_section_by_name(file.elf, ".rodata"); - file.ignore_unreachables = false; - file.c_file = find_section_by_name(file.elf, ".comment"); - - ret = decode_sections(&file); - if (ret < 0) - goto out; - warnings += ret; - - ret = validate_functions(&file); - if (ret < 0) - goto out; - warnings += ret; - - ret = validate_uncallable_instructions(&file); - if (ret < 0) - goto out; - warnings += ret; - -out: - cleanup(&file); - - /* ignore warnings for now until we get all the code cleaned up */ - if (ret || warnings) - return 0; - return 0; + return check(objname, nofp); } diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h new file mode 100644 index 000000000000..443ab2c69992 --- /dev/null +++ b/tools/objtool/cfi.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _OBJTOOL_CFI_H +#define _OBJTOOL_CFI_H + +#define CFI_UNDEFINED -1 +#define CFI_CFA -2 +#define CFI_SP_INDIRECT -3 +#define CFI_BP_INDIRECT -4 + +#define CFI_AX 0 +#define CFI_DX 1 +#define CFI_CX 2 +#define CFI_BX 3 +#define CFI_SI 4 +#define CFI_DI 5 +#define CFI_BP 6 +#define CFI_SP 7 +#define CFI_R8 8 +#define CFI_R9 9 +#define CFI_R10 10 +#define CFI_R11 11 +#define CFI_R12 12 +#define CFI_R13 13 +#define CFI_R14 14 +#define CFI_R15 15 +#define CFI_RA 16 +#define CFI_NUM_REGS 17 + +struct cfi_reg { + int base; + int offset; +}; + +struct cfi_state { + struct cfi_reg cfa; + struct cfi_reg regs[CFI_NUM_REGS]; +}; + +#endif /* _OBJTOOL_CFI_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c new file mode 100644 index 000000000000..fea222192c57 --- /dev/null +++ b/tools/objtool/check.c @@ -0,0 +1,1655 @@ +/* + * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> +#include <stdlib.h> + +#include "check.h" +#include "elf.h" +#include "special.h" +#include "arch.h" +#include "warn.h" + +#include <linux/hashtable.h> +#include <linux/kernel.h> + +struct alternative { + struct list_head list; + struct instruction *insn; +}; + +const char *objname; +static bool nofp; +struct cfi_state initial_func_cfi; + +static struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset) +{ + struct instruction *insn; + + hash_for_each_possible(file->insn_hash, insn, hash, offset) + if (insn->sec == sec && insn->offset == offset) + return insn; + + return NULL; +} + +static struct instruction *next_insn_same_sec(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + + if (!next || &next->list == &file->insn_list || next->sec != insn->sec) + return NULL; + + return next; +} + +static bool gcov_enabled(struct objtool_file *file) +{ + struct section *sec; + struct symbol *sym; + + for_each_sec(file, sec) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strncmp(sym->name, "__gcov_.", 8)) + return true; + + return false; +} + +#define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn && &insn->list != &file->insn_list && \ + insn->sec == func->sec && \ + insn->offset < func->offset + func->len; \ + insn = list_next_entry(insn, list)) + +#define func_for_each_insn_continue_reverse(file, func, insn) \ + for (insn = list_prev_entry(insn, list); \ + &insn->list != &file->insn_list && \ + insn->sec == func->sec && insn->offset >= func->offset; \ + insn = list_prev_entry(insn, list)) + +#define sec_for_each_insn_from(file, insn) \ + for (; insn; insn = next_insn_same_sec(file, insn)) + +#define sec_for_each_insn_continue(file, insn) \ + for (insn = next_insn_same_sec(file, insn); insn; \ + insn = next_insn_same_sec(file, insn)) + +/* + * Check if the function has been manually whitelisted with the + * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted + * due to its use of a context switching instruction. + */ +static bool ignore_func(struct objtool_file *file, struct symbol *func) +{ + struct rela *rela; + struct instruction *insn; + + /* check for STACK_FRAME_NON_STANDARD */ + if (file->whitelist && file->whitelist->rela) + list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { + if (rela->sym->type == STT_SECTION && + rela->sym->sec == func->sec && + rela->addend == func->offset) + return true; + if (rela->sym->type == STT_FUNC && rela->sym == func) + return true; + } + + /* check if it has a context switching instruction */ + func_for_each_insn(file, func, insn) + if (insn->type == INSN_CONTEXT_SWITCH) + return true; + + return false; +} + +/* + * This checks to see if the given function is a "noreturn" function. + * + * For global functions which are outside the scope of this object file, we + * have to keep a manual list of them. + * + * For local functions, we have to detect them manually by simply looking for + * the lack of a return instruction. + * + * Returns: + * -1: error + * 0: no dead end + * 1: dead end + */ +static int __dead_end_function(struct objtool_file *file, struct symbol *func, + int recursion) +{ + int i; + struct instruction *insn; + bool empty = true; + + /* + * Unfortunately these have to be hard coded because the noreturn + * attribute isn't provided in ELF data. + */ + static const char * const global_noreturns[] = { + "__stack_chk_fail", + "panic", + "do_exit", + "do_task_dead", + "__module_put_and_exit", + "complete_and_exit", + "kvm_spurious_fault", + "__reiserfs_panic", + "lbug_with_loc", + "fortify_panic", + }; + + if (func->bind == STB_WEAK) + return 0; + + if (func->bind == STB_GLOBAL) + for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) + if (!strcmp(func->name, global_noreturns[i])) + return 1; + + if (!func->sec) + return 0; + + func_for_each_insn(file, func, insn) { + empty = false; + + if (insn->type == INSN_RETURN) + return 0; + } + + if (empty) + return 0; + + /* + * A function can have a sibling call instead of a return. In that + * case, the function's dead-end status depends on whether the target + * of the sibling call returns. + */ + func_for_each_insn(file, func, insn) { + if (insn->sec != func->sec || + insn->offset >= func->offset + func->len) + break; + + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + struct instruction *dest = insn->jump_dest; + struct symbol *dest_func; + + if (!dest) + /* sibling call to another file */ + return 0; + + if (dest->sec != func->sec || + dest->offset < func->offset || + dest->offset >= func->offset + func->len) { + /* local sibling call */ + dest_func = find_symbol_by_offset(dest->sec, + dest->offset); + if (!dest_func) + continue; + + if (recursion == 5) { + WARN_FUNC("infinite recursion (objtool bug!)", + dest->sec, dest->offset); + return -1; + } + + return __dead_end_function(file, dest_func, + recursion + 1); + } + } + + if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) + /* sibling call */ + return 0; + } + + return 1; +} + +static int dead_end_function(struct objtool_file *file, struct symbol *func) +{ + return __dead_end_function(file, func, 0); +} + +static void clear_insn_state(struct insn_state *state) +{ + int i; + + memset(state, 0, sizeof(*state)); + state->cfa.base = CFI_UNDEFINED; + for (i = 0; i < CFI_NUM_REGS; i++) + state->regs[i].base = CFI_UNDEFINED; + state->drap_reg = CFI_UNDEFINED; +} + +/* + * Call the arch-specific instruction decoder for all the instructions and add + * them to the global instruction list. + */ +static int decode_instructions(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + unsigned long offset; + struct instruction *insn; + int ret; + + for_each_sec(file, sec) { + + if (!(sec->sh.sh_flags & SHF_EXECINSTR)) + continue; + + for (offset = 0; offset < sec->len; offset += insn->len) { + insn = malloc(sizeof(*insn)); + if (!insn) { + WARN("malloc failed"); + return -1; + } + memset(insn, 0, sizeof(*insn)); + INIT_LIST_HEAD(&insn->alts); + clear_insn_state(&insn->state); + + insn->sec = sec; + insn->offset = offset; + + ret = arch_decode_instruction(file->elf, sec, offset, + sec->len - offset, + &insn->len, &insn->type, + &insn->immediate, + &insn->stack_op); + if (ret) + return ret; + + if (!insn->type || insn->type > INSN_LAST) { + WARN_FUNC("invalid instruction type %d", + insn->sec, insn->offset, insn->type); + return -1; + } + + hash_add(file->insn_hash, &insn->hash, insn->offset); + list_add_tail(&insn->list, &file->insn_list); + } + + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!find_insn(file, sec, func->offset)) { + WARN("%s(): can't find starting instruction", + func->name); + return -1; + } + + func_for_each_insn(file, func, insn) + if (!insn->func) + insn->func = func; + } + } + + return 0; +} + +/* + * Find all uses of the unreachable() macro, which are code path dead ends. + */ +static int add_dead_ends(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + bool found; + + sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + insn = find_insn(file, rela->sym->sec, rela->addend); + if (insn) + insn = list_prev_entry(insn, list); + else if (rela->addend == rela->sym->sec->len) { + found = false; + list_for_each_entry_reverse(insn, &file->insn_list, list) { + if (insn->sec == rela->sym->sec) { + found = true; + break; + } + } + + if (!found) { + WARN("can't find unreachable insn at %s+0x%x", + rela->sym->sec->name, rela->addend); + return -1; + } + } else { + WARN("can't find unreachable insn at %s+0x%x", + rela->sym->sec->name, rela->addend); + return -1; + } + + insn->dead_end = true; + } + + return 0; +} + +/* + * Warnings shouldn't be reported for ignored functions. + */ +static void add_ignores(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + if (!ignore_func(file, func)) + continue; + + func_for_each_insn(file, func, insn) + insn->ignore = true; + } + } +} + +/* + * Find the destination instructions for all jumps. + */ +static int add_jump_destinations(struct objtool_file *file) +{ + struct instruction *insn; + struct rela *rela; + struct section *dest_sec; + unsigned long dest_off; + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + if (insn->ignore) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_sec = insn->sec; + dest_off = insn->offset + insn->len + insn->immediate; + } else if (rela->sym->type == STT_SECTION) { + dest_sec = rela->sym->sec; + dest_off = rela->addend + 4; + } else if (rela->sym->sec->idx) { + dest_sec = rela->sym->sec; + dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else { + /* sibling call */ + insn->jump_dest = 0; + continue; + } + + insn->jump_dest = find_insn(file, dest_sec, dest_off); + if (!insn->jump_dest) { + + /* + * This is a special case where an alt instruction + * jumps past the end of the section. These are + * handled later in handle_group_alt(). + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + continue; + + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); + return -1; + } + } + + return 0; +} + +/* + * Find the destination instructions for all calls. + */ +static int add_call_destinations(struct objtool_file *file) +{ + struct instruction *insn; + unsigned long dest_off; + struct rela *rela; + + for_each_insn(file, insn) { + if (insn->type != INSN_CALL) + continue; + + rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!rela) { + dest_off = insn->offset + insn->len + insn->immediate; + insn->call_dest = find_symbol_by_offset(insn->sec, + dest_off); + if (!insn->call_dest) { + WARN_FUNC("can't find call dest symbol at offset 0x%lx", + insn->sec, insn->offset, dest_off); + return -1; + } + } else if (rela->sym->type == STT_SECTION) { + insn->call_dest = find_symbol_by_offset(rela->sym->sec, + rela->addend+4); + if (!insn->call_dest || + insn->call_dest->type != STT_FUNC) { + WARN_FUNC("can't find call dest symbol at %s+0x%x", + insn->sec, insn->offset, + rela->sym->sec->name, + rela->addend + 4); + return -1; + } + } else + insn->call_dest = rela->sym; + } + + return 0; +} + +/* + * The .alternatives section requires some extra special care, over and above + * what other special sections require: + * + * 1. Because alternatives are patched in-place, we need to insert a fake jump + * instruction at the end so that validate_branch() skips all the original + * replaced instructions when validating the new instruction path. + * + * 2. An added wrinkle is that the new instruction length might be zero. In + * that case the old instructions are replaced with noops. We simulate that + * by creating a fake jump as the only new instruction. + * + * 3. In some cases, the alternative section includes an instruction which + * conditionally jumps to the _end_ of the entry. We have to modify these + * jumps' destinations to point back to .text rather than the end of the + * entry in .altinstr_replacement. + * + * 4. It has been requested that we don't validate the !POPCNT feature path + * which is a "very very small percentage of machines". + */ +static int handle_group_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + unsigned long dest_off; + + last_orig_insn = NULL; + insn = orig_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->orig_off + special_alt->orig_len) + break; + + if (special_alt->skip_orig) + insn->type = INSN_NOP; + + insn->alt_group = true; + last_orig_insn = insn; + } + + if (!next_insn_same_sec(file, last_orig_insn)) { + WARN("%s: don't know how to handle alternatives at end of section", + special_alt->orig_sec->name); + return -1; + } + + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + clear_insn_state(&fake_jump->state); + + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + fake_jump->ignore = true; + + if (!special_alt->new_len) { + *new_insn = fake_jump; + return 0; + } + + last_new_insn = NULL; + insn = *new_insn; + sec_for_each_insn_from(file, insn) { + if (insn->offset >= special_alt->new_off + special_alt->new_len) + break; + + last_new_insn = insn; + + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + continue; + + if (!insn->immediate) + continue; + + dest_off = insn->offset + insn->len + insn->immediate; + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = fake_jump; + + if (!insn->jump_dest) { + WARN_FUNC("can't find alternative jump destination", + insn->sec, insn->offset); + return -1; + } + } + + if (!last_new_insn) { + WARN_FUNC("can't find last new alternative instruction", + special_alt->new_sec, special_alt->new_off); + return -1; + } + + list_add(&fake_jump->list, &last_new_insn->list); + + return 0; +} + +/* + * A jump table entry can either convert a nop to a jump or a jump to a nop. + * If the original instruction is a jump, make the alt entry an effective nop + * by just skipping the original instruction. + */ +static int handle_jump_alt(struct objtool_file *file, + struct special_alt *special_alt, + struct instruction *orig_insn, + struct instruction **new_insn) +{ + if (orig_insn->type == INSN_NOP) + return 0; + + if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { + WARN_FUNC("unsupported instruction at jump label", + orig_insn->sec, orig_insn->offset); + return -1; + } + + *new_insn = list_next_entry(orig_insn, list); + return 0; +} + +/* + * Read all the special sections which have alternate instructions which can be + * patched in or redirected to at runtime. Each instruction having alternate + * instruction(s) has them added to its insn->alts list, which will be + * traversed in validate_branch(). + */ +static int add_special_section_alts(struct objtool_file *file) +{ + struct list_head special_alts; + struct instruction *orig_insn, *new_insn; + struct special_alt *special_alt, *tmp; + struct alternative *alt; + int ret; + + ret = special_get_alts(file->elf, &special_alts); + if (ret) + return ret; + + list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + + orig_insn = find_insn(file, special_alt->orig_sec, + special_alt->orig_off); + if (!orig_insn) { + WARN_FUNC("special: can't find orig instruction", + special_alt->orig_sec, special_alt->orig_off); + ret = -1; + goto out; + } + + new_insn = NULL; + if (!special_alt->group || special_alt->new_len) { + new_insn = find_insn(file, special_alt->new_sec, + special_alt->new_off); + if (!new_insn) { + WARN_FUNC("special: can't find new instruction", + special_alt->new_sec, + special_alt->new_off); + ret = -1; + goto out; + } + } + + if (special_alt->group) { + ret = handle_group_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } else if (special_alt->jump_or_nop) { + ret = handle_jump_alt(file, special_alt, orig_insn, + &new_insn); + if (ret) + goto out; + } + + alt->insn = new_insn; + list_add_tail(&alt->list, &orig_insn->alts); + + list_del(&special_alt->list); + free(special_alt); + } + +out: + return ret; +} + +static int add_switch_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn, struct rela *table, + struct rela *next_table) +{ + struct rela *rela = table; + struct instruction *alt_insn; + struct alternative *alt; + + list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { + if (rela == next_table) + break; + + if (rela->sym->sec != insn->sec || + rela->addend <= func->offset || + rela->addend >= func->offset + func->len) + break; + + alt_insn = find_insn(file, insn->sec, rela->addend); + if (!alt_insn) { + WARN("%s: can't find instruction at %s+0x%x", + file->rodata->rela->name, insn->sec->name, + rela->addend); + return -1; + } + + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + + alt->insn = alt_insn; + list_add_tail(&alt->list, &insn->alts); + } + + return 0; +} + +/* + * find_switch_table() - Given a dynamic jump, find the switch jump table in + * .rodata associated with it. + * + * There are 3 basic patterns: + * + * 1. jmpq *[rodata addr](,%reg,8) + * + * This is the most common case by far. It jumps to an address in a simple + * jump table which is stored in .rodata. + * + * 2. jmpq *[rodata addr](%rip) + * + * This is caused by a rare GCC quirk, currently only seen in three driver + * functions in the kernel, only with certain obscure non-distro configs. + * + * As part of an optimization, GCC makes a copy of an existing switch jump + * table, modifies it, and then hard-codes the jump (albeit with an indirect + * jump) to use a single entry in the table. The rest of the jump table and + * some of its jump targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable instruction + * warnings for the entire object file. Ideally we would just ignore them + * for the function, but that would require redesigning the code quite a + * bit. And honestly that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such a rare issue. + * + * 3. mov [rodata addr],%reg1 + * ... some instructions ... + * jmpq *(%reg1,%reg2,8) + * + * This is a fairly uncommon pattern which is new for GCC 6. As of this + * writing, there are 11 occurrences of it in the allmodconfig kernel. + * + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, + * ensure the same register is used in the mov and jump instructions. + */ +static struct rela *find_switch_table(struct objtool_file *file, + struct symbol *func, + struct instruction *insn) +{ + struct rela *text_rela, *rodata_rela; + struct instruction *orig_insn = insn; + + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); + if (text_rela && text_rela->sym == file->rodata->sym) { + /* case 1 */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend); + if (rodata_rela) + return rodata_rela; + + /* case 2 */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend + 4); + if (!rodata_rela) + return NULL; + file->ignore_unreachables = true; + return rodata_rela; + } + + /* case 3 */ + func_for_each_insn_continue_reverse(file, func, insn) { + if (insn->type == INSN_JUMP_DYNAMIC) + break; + + /* allow small jumps within the range */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && + insn->jump_dest && + (insn->jump_dest->offset <= insn->offset || + insn->jump_dest->offset > orig_insn->offset)) + break; + + /* look for a relocation which references .rodata */ + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, + insn->len); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. gcc jump tables are anonymous data. + */ + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + + return find_rela_by_dest(file->rodata, text_rela->addend); + } + + return NULL; +} + +static int add_func_switch_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn, *prev_jump = NULL; + struct rela *rela, *prev_rela = NULL; + int ret; + + func_for_each_insn(file, func, insn) { + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + + rela = find_switch_table(file, func, insn); + if (!rela) + continue; + + /* + * We found a switch table, but we don't know yet how big it + * is. Don't add it until we reach the end of the function or + * the beginning of another switch table in the same function. + */ + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, + rela); + if (ret) + return ret; + } + + prev_jump = insn; + prev_rela = rela; + } + + if (prev_jump) { + ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + if (ret) + return ret; + } + + return 0; +} + +/* + * For some switch statements, gcc generates a jump table in the .rodata + * section which contains a list of addresses within the function to jump to. + * This finds these jump tables and adds them to the insn->alts lists. + */ +static int add_switch_table_alts(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + int ret; + + if (!file->rodata || !file->rodata->rela) + return 0; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + ret = add_func_switch_tables(file, func); + if (ret) + return ret; + } + } + + return 0; +} + +static int decode_sections(struct objtool_file *file) +{ + int ret; + + ret = decode_instructions(file); + if (ret) + return ret; + + ret = add_dead_ends(file); + if (ret) + return ret; + + add_ignores(file); + + ret = add_jump_destinations(file); + if (ret) + return ret; + + ret = add_call_destinations(file); + if (ret) + return ret; + + ret = add_special_section_alts(file); + if (ret) + return ret; + + ret = add_switch_table_alts(file); + if (ret) + return ret; + + return 0; +} + +static bool is_fentry_call(struct instruction *insn) +{ + if (insn->type == INSN_CALL && + insn->call_dest->type == STT_NOTYPE && + !strcmp(insn->call_dest->name, "__fentry__")) + return true; + + return false; +} + +static bool has_modified_stack_frame(struct insn_state *state) +{ + int i; + + if (state->cfa.base != initial_func_cfi.cfa.base || + state->cfa.offset != initial_func_cfi.cfa.offset || + state->stack_size != initial_func_cfi.cfa.offset || + state->drap) + return true; + + for (i = 0; i < CFI_NUM_REGS; i++) + if (state->regs[i].base != initial_func_cfi.regs[i].base || + state->regs[i].offset != initial_func_cfi.regs[i].offset) + return true; + + return false; +} + +static bool has_valid_stack_frame(struct insn_state *state) +{ + if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA && + state->regs[CFI_BP].offset == -16) + return true; + + if (state->drap && state->regs[CFI_BP].base == CFI_BP) + return true; + + return false; +} + +static void save_reg(struct insn_state *state, unsigned char reg, int base, + int offset) +{ + if ((arch_callee_saved_reg(reg) || + (state->drap && reg == state->drap_reg)) && + state->regs[reg].base == CFI_UNDEFINED) { + state->regs[reg].base = base; + state->regs[reg].offset = offset; + } +} + +static void restore_reg(struct insn_state *state, unsigned char reg) +{ + state->regs[reg].base = CFI_UNDEFINED; + state->regs[reg].offset = 0; +} + +/* + * A note about DRAP stack alignment: + * + * GCC has the concept of a DRAP register, which is used to help keep track of + * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP + * register. The typical DRAP pattern is: + * + * 4c 8d 54 24 08 lea 0x8(%rsp),%r10 + * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp + * 41 ff 72 f8 pushq -0x8(%r10) + * 55 push %rbp + * 48 89 e5 mov %rsp,%rbp + * (more pushes) + * 41 52 push %r10 + * ... + * 41 5a pop %r10 + * (more pops) + * 5d pop %rbp + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * There are some variations in the epilogues, like: + * + * 5b pop %rbx + * 41 5a pop %r10 + * 41 5c pop %r12 + * 41 5d pop %r13 + * 41 5e pop %r14 + * c9 leaveq + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * and: + * + * 4c 8b 55 e8 mov -0x18(%rbp),%r10 + * 48 8b 5d e0 mov -0x20(%rbp),%rbx + * 4c 8b 65 f0 mov -0x10(%rbp),%r12 + * 4c 8b 6d f8 mov -0x8(%rbp),%r13 + * c9 leaveq + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * Sometimes r13 is used as the DRAP register, in which case it's saved and + * restored beforehand: + * + * 41 55 push %r13 + * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13 + * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp + * ... + * 49 8d 65 f0 lea -0x10(%r13),%rsp + * 41 5d pop %r13 + * c3 retq + */ +static int update_insn_state(struct instruction *insn, struct insn_state *state) +{ + struct stack_op *op = &insn->stack_op; + struct cfi_reg *cfa = &state->cfa; + struct cfi_reg *regs = state->regs; + + /* stack operations don't make sense with an undefined CFA */ + if (cfa->base == CFI_UNDEFINED) { + if (insn->func) { + WARN_FUNC("undefined stack state", insn->sec, insn->offset); + return -1; + } + return 0; + } + + switch (op->dest.type) { + + case OP_DEST_REG: + switch (op->src.type) { + + case OP_SRC_REG: + if (cfa->base == op->src.reg && cfa->base == CFI_SP && + op->dest.reg == CFI_BP && regs[CFI_BP].base == CFI_CFA && + regs[CFI_BP].offset == -cfa->offset) { + + /* mov %rsp, %rbp */ + cfa->base = op->dest.reg; + state->bp_scratch = false; + } else if (state->drap) { + + /* drap: mov %rsp, %rbp */ + regs[CFI_BP].base = CFI_BP; + regs[CFI_BP].offset = -state->stack_size; + state->bp_scratch = false; + } else if (!nofp) { + + WARN_FUNC("unknown stack-related register move", + insn->sec, insn->offset); + return -1; + } + + break; + + case OP_SRC_ADD: + if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { + + /* add imm, %rsp */ + state->stack_size -= op->src.offset; + if (cfa->base == CFI_SP) + cfa->offset -= op->src.offset; + break; + } + + if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + + /* lea disp(%rbp), %rsp */ + state->stack_size = -(op->src.offset + regs[CFI_BP].offset); + break; + } + + if (op->dest.reg != CFI_BP && op->src.reg == CFI_SP && + cfa->base == CFI_SP) { + + /* drap: lea disp(%rsp), %drap */ + state->drap_reg = op->dest.reg; + break; + } + + if (state->drap && op->dest.reg == CFI_SP && + op->src.reg == state->drap_reg) { + + /* drap: lea disp(%drap), %rsp */ + cfa->base = CFI_SP; + cfa->offset = state->stack_size = -op->src.offset; + state->drap_reg = CFI_UNDEFINED; + state->drap = false; + break; + } + + if (op->dest.reg == state->cfa.base) { + WARN_FUNC("unsupported stack register modification", + insn->sec, insn->offset); + return -1; + } + + break; + + case OP_SRC_AND: + if (op->dest.reg != CFI_SP || + (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || + (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { + WARN_FUNC("unsupported stack pointer realignment", + insn->sec, insn->offset); + return -1; + } + + if (state->drap_reg != CFI_UNDEFINED) { + /* drap: and imm, %rsp */ + cfa->base = state->drap_reg; + cfa->offset = state->stack_size = 0; + state->drap = true; + + } + + /* + * Older versions of GCC (4.8ish) realign the stack + * without DRAP, with a frame pointer. + */ + + break; + + case OP_SRC_POP: + if (!state->drap && op->dest.type == OP_DEST_REG && + op->dest.reg == cfa->base) { + + /* pop %rbp */ + cfa->base = CFI_SP; + } + + if (regs[op->dest.reg].offset == -state->stack_size) { + + if (state->drap && cfa->base == CFI_BP_INDIRECT && + op->dest.type == OP_DEST_REG && + op->dest.reg == state->drap_reg) { + + /* drap: pop %drap */ + cfa->base = state->drap_reg; + cfa->offset = 0; + } + + restore_reg(state, op->dest.reg); + } + + state->stack_size -= 8; + if (cfa->base == CFI_SP) + cfa->offset -= 8; + + break; + + case OP_SRC_REG_INDIRECT: + if (state->drap && op->src.reg == CFI_BP && + op->src.offset == regs[op->dest.reg].offset) { + + /* drap: mov disp(%rbp), %reg */ + if (op->dest.reg == state->drap_reg) { + cfa->base = state->drap_reg; + cfa->offset = 0; + } + + restore_reg(state, op->dest.reg); + + } else if (op->src.reg == cfa->base && + op->src.offset == regs[op->dest.reg].offset + cfa->offset) { + + /* mov disp(%rbp), %reg */ + /* mov disp(%rsp), %reg */ + restore_reg(state, op->dest.reg); + } + + break; + + default: + WARN_FUNC("unknown stack-related instruction", + insn->sec, insn->offset); + return -1; + } + + break; + + case OP_DEST_PUSH: + state->stack_size += 8; + if (cfa->base == CFI_SP) + cfa->offset += 8; + + if (op->src.type != OP_SRC_REG) + break; + + if (state->drap) { + if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { + + /* drap: push %drap */ + cfa->base = CFI_BP_INDIRECT; + cfa->offset = -state->stack_size; + + /* save drap so we know when to undefine it */ + save_reg(state, op->src.reg, CFI_CFA, -state->stack_size); + + } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) { + + /* drap: push %rbp */ + state->stack_size = 0; + + } else if (regs[op->src.reg].base == CFI_UNDEFINED) { + + /* drap: push %reg */ + save_reg(state, op->src.reg, CFI_BP, -state->stack_size); + } + + } else { + + /* push %reg */ + save_reg(state, op->src.reg, CFI_CFA, -state->stack_size); + } + + /* detect when asm code uses rbp as a scratch register */ + if (!nofp && insn->func && op->src.reg == CFI_BP && + cfa->base != CFI_BP) + state->bp_scratch = true; + break; + + case OP_DEST_REG_INDIRECT: + + if (state->drap) { + if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { + + /* drap: mov %drap, disp(%rbp) */ + cfa->base = CFI_BP_INDIRECT; + cfa->offset = op->dest.offset; + + /* save drap so we know when to undefine it */ + save_reg(state, op->src.reg, CFI_CFA, op->dest.offset); + } + + else if (regs[op->src.reg].base == CFI_UNDEFINED) { + + /* drap: mov reg, disp(%rbp) */ + save_reg(state, op->src.reg, CFI_BP, op->dest.offset); + } + + } else if (op->dest.reg == cfa->base) { + + /* mov reg, disp(%rbp) */ + /* mov reg, disp(%rsp) */ + save_reg(state, op->src.reg, CFI_CFA, + op->dest.offset - state->cfa.offset); + } + + break; + + case OP_DEST_LEAVE: + if ((!state->drap && cfa->base != CFI_BP) || + (state->drap && cfa->base != state->drap_reg)) { + WARN_FUNC("leave instruction with modified stack frame", + insn->sec, insn->offset); + return -1; + } + + /* leave (mov %rbp, %rsp; pop %rbp) */ + + state->stack_size = -state->regs[CFI_BP].offset - 8; + restore_reg(state, CFI_BP); + + if (!state->drap) { + cfa->base = CFI_SP; + cfa->offset -= 8; + } + + break; + + case OP_DEST_MEM: + if (op->src.type != OP_SRC_POP) { + WARN_FUNC("unknown stack-related memory operation", + insn->sec, insn->offset); + return -1; + } + + /* pop mem */ + state->stack_size -= 8; + if (cfa->base == CFI_SP) + cfa->offset -= 8; + + break; + + default: + WARN_FUNC("unknown stack-related instruction", + insn->sec, insn->offset); + return -1; + } + + return 0; +} + +static bool insn_state_match(struct instruction *insn, struct insn_state *state) +{ + struct insn_state *state1 = &insn->state, *state2 = state; + int i; + + if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) { + WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", + insn->sec, insn->offset, + state1->cfa.base, state1->cfa.offset, + state2->cfa.base, state2->cfa.offset); + + } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) { + for (i = 0; i < CFI_NUM_REGS; i++) { + if (!memcmp(&state1->regs[i], &state2->regs[i], + sizeof(struct cfi_reg))) + continue; + + WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d", + insn->sec, insn->offset, + i, state1->regs[i].base, state1->regs[i].offset, + i, state2->regs[i].base, state2->regs[i].offset); + break; + } + + } else if (state1->drap != state2->drap || + (state1->drap && state1->drap_reg != state2->drap_reg)) { + WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)", + insn->sec, insn->offset, + state1->drap, state1->drap_reg, + state2->drap, state2->drap_reg); + + } else + return true; + + return false; +} + +/* + * Follow the branch starting at the given instruction, and recursively follow + * any other branches (jumps). Meanwhile, track the frame pointer state at + * each instruction and validate all the rules described in + * tools/objtool/Documentation/stack-validation.txt. + */ +static int validate_branch(struct objtool_file *file, struct instruction *first, + struct insn_state state) +{ + struct alternative *alt; + struct instruction *insn; + struct section *sec; + struct symbol *func = NULL; + int ret; + + insn = first; + sec = insn->sec; + + if (insn->alt_group && list_empty(&insn->alts)) { + WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", + sec, insn->offset); + return -1; + } + + while (1) { + if (file->c_file && insn->func) { + if (func && func != insn->func) { + WARN("%s() falls through to next function %s()", + func->name, insn->func->name); + return 1; + } + } + + func = insn->func; + + if (insn->visited) { + if (!!insn_state_match(insn, &state)) + return 1; + + return 0; + } + + insn->state = state; + + insn->visited = true; + + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } + + switch (insn->type) { + + case INSN_RETURN: + if (func && has_modified_stack_frame(&state)) { + WARN_FUNC("return with modified stack frame", + sec, insn->offset); + return 1; + } + + if (state.bp_scratch) { + WARN("%s uses BP as a scratch register", + insn->func->name); + return 1; + } + + return 0; + + case INSN_CALL: + if (is_fentry_call(insn)) + break; + + ret = dead_end_function(file, insn->call_dest); + if (ret == 1) + return 0; + if (ret == -1) + return 1; + + /* fallthrough */ + case INSN_CALL_DYNAMIC: + if (!nofp && func && !has_valid_stack_frame(&state)) { + WARN_FUNC("call without frame pointer save/setup", + sec, insn->offset); + return 1; + } + break; + + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + if (insn->jump_dest) { + ret = validate_branch(file, insn->jump_dest, + state); + if (ret) + return 1; + } else if (func && has_modified_stack_frame(&state)) { + WARN_FUNC("sibling call from callable instruction with modified stack frame", + sec, insn->offset); + return 1; + } /* else it's a sibling call */ + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + + case INSN_JUMP_DYNAMIC: + if (func && list_empty(&insn->alts) && + has_modified_stack_frame(&state)) { + WARN_FUNC("sibling call from callable instruction with modified stack frame", + sec, insn->offset); + return 1; + } + + return 0; + + case INSN_STACK: + if (update_insn_state(insn, &state)) + return -1; + + break; + + default: + break; + } + + if (insn->dead_end) + return 0; + + insn = next_insn_same_sec(file, insn); + if (!insn) { + WARN("%s: unexpected end of section", sec->name); + return 1; + } + } + + return 0; +} + +static bool is_kasan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, "__asan_handle_no_return")); +} + +static bool is_ubsan_insn(struct instruction *insn) +{ + return (insn->type == INSN_CALL && + !strcmp(insn->call_dest->name, + "__ubsan_handle_builtin_unreachable")); +} + +static bool ignore_unreachable_insn(struct instruction *insn) +{ + int i; + + if (insn->ignore || insn->type == INSN_NOP) + return true; + + /* + * Ignore any unused exceptions. This can happen when a whitelisted + * function has an exception table entry. + */ + if (!strcmp(insn->sec->name, ".fixup")) + return true; + + /* + * Check if this (or a subsequent) instruction is related to + * CONFIG_UBSAN or CONFIG_KASAN. + * + * End the search at 5 instructions to avoid going into the weeds. + */ + if (!insn->func) + return false; + for (i = 0; i < 5; i++) { + + if (is_kasan_insn(insn) || is_ubsan_insn(insn)) + return true; + + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { + insn = insn->jump_dest; + continue; + } + + if (insn->offset + insn->len >= insn->func->offset + insn->func->len) + break; + insn = list_next_entry(insn, list); + } + + return false; +} + +static int validate_functions(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + struct instruction *insn; + struct insn_state state; + int ret, warnings = 0; + + clear_insn_state(&state); + + state.cfa = initial_func_cfi.cfa; + memcpy(&state.regs, &initial_func_cfi.regs, + CFI_NUM_REGS * sizeof(struct cfi_reg)); + state.stack_size = initial_func_cfi.cfa.offset; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->type != STT_FUNC) + continue; + + insn = find_insn(file, sec, func->offset); + if (!insn || insn->ignore) + continue; + + ret = validate_branch(file, insn, state); + warnings += ret; + } + } + + return warnings; +} + +static int validate_reachable_instructions(struct objtool_file *file) +{ + struct instruction *insn; + + if (file->ignore_unreachables) + return 0; + + for_each_insn(file, insn) { + if (insn->visited || ignore_unreachable_insn(insn)) + continue; + + /* + * gcov produces a lot of unreachable instructions. If we get + * an unreachable warning and the file has gcov enabled, just + * ignore it, and all other such warnings for the file. Do + * this here because this is an expensive function. + */ + if (gcov_enabled(file)) + return 0; + + WARN_FUNC("unreachable instruction", insn->sec, insn->offset); + return 1; + } + + return 0; +} + +static void cleanup(struct objtool_file *file) +{ + struct instruction *insn, *tmpinsn; + struct alternative *alt, *tmpalt; + + list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) { + list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) { + list_del(&alt->list); + free(alt); + } + list_del(&insn->list); + hash_del(&insn->hash); + free(insn); + } + elf_close(file->elf); +} + +int check(const char *_objname, bool _nofp) +{ + struct objtool_file file; + int ret, warnings = 0; + + objname = _objname; + nofp = _nofp; + + file.elf = elf_open(objname); + if (!file.elf) + return 1; + + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); + file.rodata = find_section_by_name(file.elf, ".rodata"); + file.ignore_unreachables = false; + file.c_file = find_section_by_name(file.elf, ".comment"); + + arch_initial_func_cfi_state(&initial_func_cfi); + + ret = decode_sections(&file); + if (ret < 0) + goto out; + warnings += ret; + + if (list_empty(&file.insn_list)) + goto out; + + ret = validate_functions(&file); + if (ret < 0) + goto out; + warnings += ret; + + if (!warnings) { + ret = validate_reachable_instructions(&file); + if (ret < 0) + goto out; + warnings += ret; + } + +out: + cleanup(&file); + + /* ignore warnings for now until we get all the code cleaned up */ + if (ret || warnings) + return 0; + return 0; +} diff --git a/tools/objtool/check.h b/tools/objtool/check.h new file mode 100644 index 000000000000..da85f5b00ec6 --- /dev/null +++ b/tools/objtool/check.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _CHECK_H +#define _CHECK_H + +#include <stdbool.h> +#include "elf.h" +#include "cfi.h" +#include "arch.h" +#include <linux/hashtable.h> + +struct insn_state { + struct cfi_reg cfa; + struct cfi_reg regs[CFI_NUM_REGS]; + int stack_size; + bool bp_scratch; + bool drap; + int drap_reg; +}; + +struct instruction { + struct list_head list; + struct hlist_node hash; + struct section *sec; + unsigned long offset; + unsigned int len; + unsigned char type; + unsigned long immediate; + bool alt_group, visited, dead_end, ignore; + struct symbol *call_dest; + struct instruction *jump_dest; + struct list_head alts; + struct symbol *func; + struct stack_op stack_op; + struct insn_state state; +}; + +struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 16); + struct section *rodata, *whitelist; + bool ignore_unreachables, c_file; +}; + +int check(const char *objname, bool nofp); + +#define for_each_insn(file, insn) \ + list_for_each_entry(insn, &file->insn_list, list) + +#endif /* _CHECK_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d897702ce742..1a7e8aa2af58 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -37,6 +37,9 @@ #define ELF_C_READ_MMAP ELF_C_READ #endif +#define WARN_ELF(format, ...) \ + WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -139,12 +142,12 @@ static int read_sections(struct elf *elf) int i; if (elf_getshdrnum(elf->elf, §ions_nr)) { - perror("elf_getshdrnum"); + WARN_ELF("elf_getshdrnum"); return -1; } if (elf_getshdrstrndx(elf->elf, &shstrndx)) { - perror("elf_getshdrstrndx"); + WARN_ELF("elf_getshdrstrndx"); return -1; } @@ -165,37 +168,36 @@ static int read_sections(struct elf *elf) s = elf_getscn(elf->elf, i); if (!s) { - perror("elf_getscn"); + WARN_ELF("elf_getscn"); return -1; } sec->idx = elf_ndxscn(s); if (!gelf_getshdr(s, &sec->sh)) { - perror("gelf_getshdr"); + WARN_ELF("gelf_getshdr"); return -1; } sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name); if (!sec->name) { - perror("elf_strptr"); + WARN_ELF("elf_strptr"); return -1; } - sec->elf_data = elf_getdata(s, NULL); - if (!sec->elf_data) { - perror("elf_getdata"); + sec->data = elf_getdata(s, NULL); + if (!sec->data) { + WARN_ELF("elf_getdata"); return -1; } - if (sec->elf_data->d_off != 0 || - sec->elf_data->d_size != sec->sh.sh_size) { + if (sec->data->d_off != 0 || + sec->data->d_size != sec->sh.sh_size) { WARN("unexpected data attributes for %s", sec->name); return -1; } - sec->data = (unsigned long)sec->elf_data->d_buf; - sec->len = sec->elf_data->d_size; + sec->len = sec->data->d_size; } /* sanity check, one more call to elf_nextscn() should return NULL */ @@ -232,15 +234,15 @@ static int read_symbols(struct elf *elf) sym->idx = i; - if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) { - perror("gelf_getsym"); + if (!gelf_getsym(symtab->data, i, &sym->sym)) { + WARN_ELF("gelf_getsym"); goto err; } sym->name = elf_strptr(elf->elf, symtab->sh.sh_link, sym->sym.st_name); if (!sym->name) { - perror("elf_strptr"); + WARN_ELF("elf_strptr"); goto err; } @@ -322,8 +324,8 @@ static int read_relas(struct elf *elf) } memset(rela, 0, sizeof(*rela)); - if (!gelf_getrela(sec->elf_data, i, &rela->rela)) { - perror("gelf_getrela"); + if (!gelf_getrela(sec->data, i, &rela->rela)) { + WARN_ELF("gelf_getrela"); return -1; } @@ -362,12 +364,6 @@ struct elf *elf_open(const char *name) INIT_LIST_HEAD(&elf->sections); - elf->name = strdup(name); - if (!elf->name) { - perror("strdup"); - goto err; - } - elf->fd = open(name, O_RDONLY); if (elf->fd == -1) { perror("open"); @@ -376,12 +372,12 @@ struct elf *elf_open(const char *name) elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); if (!elf->elf) { - perror("elf_begin"); + WARN_ELF("elf_begin"); goto err; } if (!gelf_getehdr(elf->elf, &elf->ehdr)) { - perror("gelf_getehdr"); + WARN_ELF("gelf_getehdr"); goto err; } @@ -407,6 +403,12 @@ void elf_close(struct elf *elf) struct symbol *sym, *tmpsym; struct rela *rela, *tmprela; + if (elf->elf) + elf_end(elf->elf); + + if (elf->fd > 0) + close(elf->fd); + list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) { list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) { list_del(&sym->list); @@ -421,11 +423,6 @@ void elf_close(struct elf *elf) list_del(&sec->list); free(sec); } - if (elf->name) - free(elf->name); - if (elf->fd > 0) - close(elf->fd); - if (elf->elf) - elf_end(elf->elf); + free(elf); } diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 731973e1a3f5..343968b778cb 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -37,10 +37,9 @@ struct section { DECLARE_HASHTABLE(rela_hash, 16); struct section *base, *rela; struct symbol *sym; - Elf_Data *elf_data; + Elf_Data *data; char *name; int idx; - unsigned long data; unsigned int len; }; @@ -86,6 +85,7 @@ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, struct symbol *find_containing_func(struct section *sec, unsigned long offset); void elf_close(struct elf *elf); - +#define for_each_sec(file, sec) \ + list_for_each_entry(sec, &file->elf->sections, list) #endif /* _OBJTOOL_ELF_H */ diff --git a/tools/objtool/special.c b/tools/objtool/special.c index bff8abb3a4aa..84f001d52322 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, alt->jump_or_nop = entry->jump_or_nop; if (alt->group) { - alt->orig_len = *(unsigned char *)(sec->data + offset + + alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset + entry->orig_len); - alt->new_len = *(unsigned char *)(sec->data + offset + + alt->new_len = *(unsigned char *)(sec->data->d_buf + offset + entry->new_len); } if (entry->feature) { unsigned short feature; - feature = *(unsigned short *)(sec->data + offset + + feature = *(unsigned short *)(sec->data->d_buf + offset + entry->feature); /* diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h index ac7e07523e84..afd9f7a05f6d 100644 --- a/tools/objtool/warn.h +++ b/tools/objtool/warn.h @@ -18,6 +18,13 @@ #ifndef _WARN_H #define _WARN_H +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "elf.h" + extern const char *objname; static inline char *offstr(struct section *sec, unsigned long offset) @@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) free(_str); \ }) +#define WARN_ELF(format, ...) \ + WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) + #endif /* _WARN_H */ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index b0b3007d3c9c..4b6cdbf8f935 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -108,6 +108,9 @@ approach is available to export the data to a postgresql database. Refer to script export-to-postgresql.py for more details, and to script call-graph-from-postgresql.py for an example of using the database. +There is also script intel-pt-events.py which provides an example of how to +unpack the raw data for power events and PTWRITE. + As mentioned above, it is easy to capture too much data. One way to limit the data captured is to use 'snapshot' mode which is explained further below. Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. @@ -364,6 +367,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc CYC packets are not requested by default. +pt Specifies pass-through which enables the 'branch' config term. + + The default config selects 'pt' if it is available, so a user will + never need to specify this term. + +branch Enable branch tracing. Branch tracing is enabled by default so to + disable branch tracing use 'branch=0'. + + The default config selects 'branch' if it is available. + +ptw Enable PTWRITE packets which are produced when a ptwrite instruction + is executed. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/ptwrite + + which contains "1" if the feature is supported and + "0" otherwise. + +fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet + provides the address of the ptwrite instruction. In the absence of + fup_on_ptw, the decoder will use the address of the previous branch + if branch tracing is enabled, otherwise the address will be zero. + Note that fup_on_ptw will work even when branch tracing is disabled. + +pwr_evt Enable power events. The power events provide information about + changes to the CPU C-state. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace + + which contains "1" if the feature is supported and + "0" otherwise. + new snapshot option ------------------- @@ -674,13 +713,15 @@ Having no option is the same as which, in turn, is the same as - --itrace=ibxe + --itrace=ibxwpe The letters are: i synthesize "instructions" events b synthesize "branches" events x synthesize "transactions" events + w synthesize "ptwrite" events + p synthesize "power" events c synthesize branches events (calls only) r synthesize branches events (returns only) e synthesize tracing error events @@ -699,7 +740,40 @@ and "r" can be combined to get calls and returns. 'flags' field can be used in perf script to determine whether the event is a tranasaction start, commit or abort. -Error events are new. They show where the decoder lost the trace. Error events +Note that "instructions", "branches" and "transactions" events depend on code +flow packets which can be disabled by using the config term "branch=0". Refer +to the config terms section above. + +"ptwrite" events record the payload of the ptwrite instruction and whether +"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are +recorded only if the "ptw" config term was used. Refer to the config terms +section above. perf script "synth" field displays "ptwrite" information like +this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was +used. + +"Power" events correspond to power event packets and CBR (core-to-bus ratio) +packets. While CBR packets are always recorded when tracing is enabled, power +event packets are recorded only if the "pwr_evt" config term was used. Refer to +the config terms section above. The power events record information about +C-state changes, whereas CBR is indicative of CPU frequency. perf script +"event,synth" fields display information like this: + cbr: cbr: 22 freq: 2189 MHz (200%) + mwait: hints: 0x60 extensions: 0x1 + pwre: hw: 0 cstate: 2 sub-cstate: 0 + exstop: ip: 1 + pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 +Where: + "cbr" includes the frequency and the percentage of maximum non-turbo + "mwait" shows mwait hints and extensions + "pwre" shows C-state transitions (to a C-state deeper than C0) and + whether initiated by hardware + "exstop" indicates execution stopped and whether the IP was recorded + exactly, + "pwrx" indicates return to C0 +For more details refer to the Intel 64 and IA-32 Architectures Software +Developer Manuals. + +Error events show where the decoder lost the trace. Error events are quite important. Users must know if what they are seeing is a complete picture or not. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index e2a4c5e0dbe5..a3abe04c779d 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -3,13 +3,15 @@ c synthesize branches events (calls only) r synthesize branches events (returns only) x synthesize transactions events + w synthesize ptwrite events + p synthesize power events e synthesize error events d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) s skip initial number of events - The default is all events i.e. the same as --itrace=ibxe + The default is all events i.e. the same as --itrace=ibxwpe In addition, the period (default 100000) for instructions events can be specified in units of: @@ -26,8 +28,8 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. - It is also possible to skip events generated (instructions, branches, transactions) - at the beginning. This is useful to ignore initialization code. + It is also possible to skip events generated (instructions, branches, transactions, + ptwrite, power) at the beginning. This is useful to ignore initialization code. --itrace=i0nss1000000 diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 6e6a8b22c859..721a447f046e 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -48,6 +48,39 @@ OPTIONS Ranges of CPUs are specified with -: 0-2. Default is to trace on all online CPUs. +-T:: +--trace-funcs=:: + Only trace functions given by the argument. Multiple functions + can be given by using this option more than once. The function + argument also can be a glob pattern. It will be passed to + 'set_ftrace_filter' in tracefs. + +-N:: +--notrace-funcs=:: + Do not trace functions given by the argument. Like -T option, + this can be used more than once to specify multiple functions + (or glob patterns). It will be passed to 'set_ftrace_notrace' + in tracefs. + +-G:: +--graph-funcs=:: + Set graph filter on the given function (or a glob pattern). + This is useful for the function_graph tracer only and enables + tracing for functions executed from the given function. + This can be used more than once to specify multiple functions. + It will be passed to 'set_graph_function' in tracefs. + +-g:: +--nograph-funcs=:: + Set graph notrace filter on the given function (or a glob pattern). + Like -G option, this is useful for the function_graph tracer only + and disables tracing for function executed from the given function. + This can be used more than once to specify multiple functions. + It will be passed to 'set_graph_notrace' in tracefs. + +-D:: +--graph-depth=:: + Set max depth for function graph tracer to follow SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index e6c9902c6d82..165c2b1d4317 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -240,9 +240,13 @@ Add a probe on schedule() function 12th line with recording cpu local variable: or ./perf probe --add='schedule:12 cpu' - this will add one or more probes which has the name start with "schedule". +Add one or more probes which has the name start with "schedule". - Add probes on lines in schedule() function which calls update_rq_clock(). + ./perf probe schedule* + or + ./perf probe --add='schedule*' + +Add probes on lines in schedule() function which calls update_rq_clock(). ./perf probe 'schedule;update_rq_clock*' or diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index dfbb506d2c34..142606c0ec9c 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -39,7 +39,7 @@ EVENT HANDLERS When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is -ignored (or passed to a 'trace_handled' function, see below) and the +ignored (or passed to a 'trace_unhandled' function, see below) and the next event is processed. Most of the event's field values are passed as arguments to the diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 54acba221558..51ec2d20068a 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -149,10 +149,8 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu, print "id=%d, args=%s\n" % \ (id, args), -def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, - common_pid, common_comm): - print_header(event_name, common_cpu, common_secs, common_nsecs, - common_pid, common_comm) +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) def print_header(event_name, cpu, secs, nsecs, pid, comm): print "%-20s %5u %05u.%09u %8u %-20s " % \ @@ -321,7 +319,7 @@ So those are the essential steps in writing and running a script. The process can be generalized to any tracepoint or set of tracepoints you're interested in - basically find the tracepoint(s) you're interested in by looking at the list of available events shown by -'perf list' and/or look in /sys/kernel/debug/tracing events for +'perf list' and/or look in /sys/kernel/debug/tracing/events/ for detailed event and field info, record the corresponding trace data using 'perf record', passing it the list of interesting events, generate a skeleton script using 'perf script -g python' and modify the @@ -334,7 +332,7 @@ right place, you can have your script listed alongside the other scripts listed by the 'perf script -l' command e.g.: ---- -root@tropicana:~# perf script -l +# perf script -l List of available trace scripts: wakeup-latency system-wide min/max/avg wakeup latency rw-by-file <comm> r/w activity for a program, by file @@ -383,8 +381,6 @@ source tree: ---- # ls -al kernel-source/tools/perf/scripts/python - -root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python total 32 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. @@ -399,7 +395,7 @@ otherwise your script won't show up at run-time), 'perf script -l' should show a new entry for your script: ---- -root@tropicana:~# perf script -l +# perf script -l List of available trace scripts: wakeup-latency system-wide min/max/avg wakeup latency rw-by-file <comm> r/w activity for a program, by file @@ -437,7 +433,7 @@ EVENT HANDLERS When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is -ignored (or passed to a 'trace_handled' function, see below) and the +ignored (or passed to a 'trace_unhandled' function, see below) and the next event is processed. Most of the event's field values are passed as arguments to the @@ -532,7 +528,7 @@ can implement a set of optional functions: gives scripts a chance to do setup tasks: ---- -def trace_begin: +def trace_begin(): pass ---- @@ -541,7 +537,7 @@ def trace_begin: as display results: ---- -def trace_end: +def trace_end(): pass ---- @@ -550,8 +546,7 @@ def trace_end: of common arguments are passed into it: ---- -def trace_unhandled(event_name, context, common_cpu, common_secs, - common_nsecs, common_pid, common_comm): +def trace_unhandled(event_name, context, event_fields_dict): pass ---- diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index cb0eda3925e6..5ee8796be96e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,9 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, + callindent, insn, insnlen, synth. + Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -130,6 +131,14 @@ OPTIONS i.e., the specified fields apply to all event types if the type string is not given. + In addition to overriding fields, it is also possible to add or remove + fields from the defaults. For example + + -F -cpu,+insn + + removes the cpu field and adds the insn field. Adding/removing fields + cannot be mixed with normal overriding. + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: @@ -185,6 +194,9 @@ OPTIONS instruction bytes and the instruction length of the current instruction. + The synth field is used by synthesized events which may be created when + Instruction Trace decoding. + Finally, a user may not set fields to none for all event types. i.e., -F "" is not allowed. @@ -203,6 +215,8 @@ OPTIONS is printed. This is the full execution path leading to the sample. This is only supported when the sample was recorded with perf record -b or -j any. + The brstackoff field will print an offset into a specific dso/binary. + -k:: --vmlinux=<file>:: vmlinux pathname @@ -311,6 +325,10 @@ include::itrace.txt[] Set the maximum number of program blocks to print with brstackasm for each sample. +--inline:: + If a callgraph address belongs to an inlined function, the inline stack + will be printed. Each entry has function name and file/line. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index bd0e4417f2be..698076313606 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -239,6 +239,20 @@ taskset. --no-merge:: Do not merge results from same PMUs. +--smi-cost:: +Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. + +During the measurement, the /sys/device/cpu/freeze_on_smi will be set to +freeze core counters on SMI. +The aperf counter will not be effected by the setting. +The cost of SMI can be measured by (aperf - unhalted core cycles). + +In practice, the percentages of SMI cycles is very useful for performance +oriented analysis. --metric_only will be applied by default. +The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf + +Users who wants to get the actual value can apply --no-metric-only. + EXAMPLES -------- diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 8354d04b392f..bdf0e87f9b29 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -228,12 +228,12 @@ ifeq ($(DEBUG),0) endif INC_FLAGS += -I$(src-perf)/util/include -INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include +INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -355,7 +355,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -380,7 +380,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else - msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else msg := $(warning DWARF support is off, BPF prologue is disabled); @@ -406,7 +406,7 @@ ifdef PERF_HAVE_JITDUMP endif endif -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif @@ -487,7 +487,7 @@ else endif ifndef NO_LOCAL_LIBUNWIND - ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) msg := $(warning No debug_frame support found in libunwind); @@ -740,7 +740,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq ($(ARCH), x86) + ifneq ($(SRCARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -769,7 +769,7 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(ARCH),x86) + ifeq ($(SRCARCH),x86) ifeq ($(feature-get_cpuid), 0) msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); NO_AUXTRACE := 1 @@ -872,7 +872,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(ARCH)$(IS_64_BIT), x861) +ifeq ($(SRCARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 79fe31f20a17..5008f51a08a2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -226,7 +226,7 @@ endif ifeq ($(config),0) include $(srctree)/tools/scripts/Makefile.arch --include arch/$(ARCH)/Makefile +-include arch/$(SRCARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 109eb75cf7de..d9b6af837c7d 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ libperf-y += common.o -libperf-y += $(ARCH)/ +libperf-y += $(SRCARCH)/ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 29361d9b635a..7ce3d1a25133 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -17,6 +17,7 @@ #include <api/fs/fs.h> #include <linux/bitops.h> +#include <linux/compiler.h> #include <linux/coresight-pmu.h> #include <linux/kernel.h> #include <linux/log2.h> @@ -202,19 +203,18 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); - if (cs_etm_evsel) { - /* - * To obtain the auxtrace buffer file descriptor, the auxtrace - * event must come first. - */ - perf_evlist__to_front(evlist, cs_etm_evsel); - /* - * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. - */ - if (!cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(cs_etm_evsel, CPU); - } + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, cs_etm_evsel); + + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(cs_etm_evsel, CPU); /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { @@ -583,8 +583,7 @@ static FILE *cs_device__open_file(const char *name) } -static __attribute__((format(printf, 2, 3))) -int cs_device__print_file(const char *name, const char *fmt, ...) +static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) { va_list args; FILE *file; diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 837067f48a4c..6b40e9f01740 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -26,6 +26,7 @@ const char *const arm64_triplets[] = { const char *const powerpc_triplets[] = { "powerpc-unknown-linux-gnu-", + "powerpc-linux-gnu-", "powerpc64-unknown-linux-gnu-", "powerpc64-linux-gnu-", "powerpc64le-linux-gnu-", diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 90ad64b231cd..2e6595310420 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -5,4 +5,6 @@ libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o + libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c new file mode 100644 index 000000000000..3a24b3c43273 --- /dev/null +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -0,0 +1,73 @@ +#include <elfutils/libdwfl.h> +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + +/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ +static const int special_regs[3][2] = { + { 65, PERF_REG_POWERPC_LINK }, + { 101, PERF_REG_POWERPC_XER }, + { 109, PERF_REG_POWERPC_CTR }, +}; + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[32], dwarf_nip; + size_t i; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(R0); + dwarf_regs[1] = REG(R1); + dwarf_regs[2] = REG(R2); + dwarf_regs[3] = REG(R3); + dwarf_regs[4] = REG(R4); + dwarf_regs[5] = REG(R5); + dwarf_regs[6] = REG(R6); + dwarf_regs[7] = REG(R7); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(R16); + dwarf_regs[17] = REG(R17); + dwarf_regs[18] = REG(R18); + dwarf_regs[19] = REG(R19); + dwarf_regs[20] = REG(R20); + dwarf_regs[21] = REG(R21); + dwarf_regs[22] = REG(R22); + dwarf_regs[23] = REG(R23); + dwarf_regs[24] = REG(R24); + dwarf_regs[25] = REG(R25); + dwarf_regs[26] = REG(R26); + dwarf_regs[27] = REG(R27); + dwarf_regs[28] = REG(R28); + dwarf_regs[29] = REG(R29); + dwarf_regs[30] = REG(R30); + dwarf_regs[31] = REG(R31); + if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs)) + return false; + + dwarf_nip = REG(NIP); + dwfl_thread_state_register_pc(thread, dwarf_nip); + for (i = 0; i < ARRAY_SIZE(special_regs); i++) { + Dwarf_Word val = 0; + perf_reg_value(&val, user_regs, special_regs[i][1]); + if (!dwfl_thread_state_registers(thread, + special_regs[i][0], 1, + &val)) + return false; + } + + return true; +} diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 0f196eec9f48..3cbf6fad169f 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -1664,3 +1664,15 @@ "0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "", +"f3 0f ae 20 \tptwritel (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "", +"f3 0f ae 20 \tptwritel (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index af25bc8240d0..aa512fa944dd 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -1696,3 +1696,33 @@ "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "", +"f3 0f ae 20 \tptwritel (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "", +"f3 41 0f ae 20 \tptwritel (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "", +"f3 0f ae 20 \tptwritel (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "", +"f3 41 0f ae 20 \tptwritel (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x48, 0x0f, 0xae, 0x20, }, 5, 0, "", "", +"f3 48 0f ae 20 \tptwriteq (%rax)",}, +{{0xf3, 0x49, 0x0f, 0xae, 0x20, }, 5, 0, "", "", +"f3 49 0f ae 20 \tptwriteq (%r8)",}, +{{0xf3, 0x48, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 48 0f ae 24 25 78 56 34 12 \tptwriteq 0x12345678",}, +{{0xf3, 0x48, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 979487dae8d4..6cdb65d25b79 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1343,6 +1343,26 @@ int main(void) asm volatile("xrstors 0x12345678(%rax,%rcx,8)"); asm volatile("xrstors 0x12345678(%r8,%rcx,8)"); + /* ptwrite */ + + asm volatile("ptwrite (%rax)"); + asm volatile("ptwrite (%r8)"); + asm volatile("ptwrite (0x12345678)"); + asm volatile("ptwrite 0x12345678(%rax,%rcx,8)"); + asm volatile("ptwrite 0x12345678(%r8,%rcx,8)"); + + asm volatile("ptwritel (%rax)"); + asm volatile("ptwritel (%r8)"); + asm volatile("ptwritel (0x12345678)"); + asm volatile("ptwritel 0x12345678(%rax,%rcx,8)"); + asm volatile("ptwritel 0x12345678(%r8,%rcx,8)"); + + asm volatile("ptwriteq (%rax)"); + asm volatile("ptwriteq (%r8)"); + asm volatile("ptwriteq (0x12345678)"); + asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)"); + asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -2653,6 +2673,16 @@ int main(void) asm volatile("xrstors (0x12345678)"); asm volatile("xrstors 0x12345678(%eax,%ecx,8)"); + /* ptwrite */ + + asm volatile("ptwrite (%eax)"); + asm volatile("ptwrite (0x12345678)"); + asm volatile("ptwrite 0x12345678(%eax,%ecx,8)"); + + asm volatile("ptwritel (%eax)"); + asm volatile("ptwritel (0x12345678)"); + asm volatile("ptwritel 0x12345678(%eax,%ecx,8)"); + #endif /* #ifndef __x86_64__ */ /* Following line is a marker for the awk script - do not change */ diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index af2bce7a2cd6..781df40b2966 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -35,10 +35,6 @@ #define KiB_MASK(x) (KiB(x) - 1) #define MiB_MASK(x) (MiB(x) - 1) -#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) - -#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) - struct intel_bts_snapshot_ref { void *ref_buf; size_t ref_offset; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f630de0206a1..9535be57033f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -40,10 +40,6 @@ #define KiB_MASK(x) (KiB(x) - 1) #define MiB_MASK(x) (MiB(x) - 1) -#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) - -#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) - #define INTEL_PT_PSB_PERIOD_NEAR 256 struct intel_pt_snapshot_ref { @@ -196,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) int psb_cyc, psb_periods, psb_period; int pos = 0; u64 config; + char c; pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); @@ -229,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) } } + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) + pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 27de0c8c5c19..469d65b21122 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -700,7 +700,7 @@ static inline uint32_t lfsr_32(uint32_t lfsr) * kernel (KSM, zero page, etc.) cannot optimize away RAM * accesses: */ -static inline u64 access_data(u64 *data __attribute__((unused)), u64 val) +static inline u64 access_data(u64 *data, u64 val) { if (g->p.data_reads) val += *data; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 620a467ee304..475999e48f66 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1725,10 +1725,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) tok; tok = strtok_r(NULL, ", ", &tmp)) { \ ret = _fn(hpp_list, tok); \ if (ret == -EINVAL) { \ - error("Invalid --fields key: `%s'", tok); \ + pr_err("Invalid --fields key: `%s'", tok); \ break; \ } else if (ret == -ESRCH) { \ - error("Unknown --fields key: `%s'", tok); \ + pr_err("Unknown --fields key: `%s'", tok); \ break; \ } \ } \ diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 80668fa7556e..ece45582a48d 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -156,7 +156,7 @@ static int parse_config_arg(char *arg, char **var, char **value) int cmd_config(int argc, const char **argv) { - int i, ret = 0; + int i, ret = -1; struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); const char *config_filename; @@ -186,10 +186,8 @@ int cmd_config(int argc, const char **argv) * because of reinitializing with options config file location. */ set = perf_config_set__new(); - if (!set) { - ret = -1; + if (!set) goto out_err; - } switch (actions) { case ACTION_LIST: @@ -197,41 +195,54 @@ int cmd_config(int argc, const char **argv) pr_err("Error: takes no arguments\n"); parse_options_usage(config_usage, config_options, "l", 1); } else { - ret = show_config(set); - if (ret < 0) + if (show_config(set) < 0) { pr_err("Nothing configured, " "please check your %s \n", config_filename); + goto out_err; + } } break; default: - if (argc) { - for (i = 0; argv[i]; i++) { - char *var, *value; - char *arg = strdup(argv[i]); - - if (!arg) { - pr_err("%s: strdup failed\n", __func__); - ret = -1; - break; - } + if (!argc) { + usage_with_options(config_usage, config_options); + break; + } - if (parse_config_arg(arg, &var, &value) < 0) { - free(arg); - ret = -1; - break; - } + for (i = 0; argv[i]; i++) { + char *var, *value; + char *arg = strdup(argv[i]); + + if (!arg) { + pr_err("%s: strdup failed\n", __func__); + goto out_err; + } - if (value == NULL) - ret = show_spec_config(set, var); - else - ret = set_config(set, config_filename, var, value); + if (parse_config_arg(arg, &var, &value) < 0) { free(arg); + goto out_err; } - } else - usage_with_options(config_usage, config_options); + + if (value == NULL) { + if (show_spec_config(set, var) < 0) { + pr_err("%s is not configured: %s\n", + var, config_filename); + free(arg); + goto out_err; + } + } else { + if (set_config(set, config_filename, var, value) < 0) { + pr_err("Failed to set '%s=%s' on %s\n", + var, value, config_filename); + free(arg); + goto out_err; + } + } + free(arg); + } } - perf_config_set__delete(set); + ret = 0; out_err: + perf_config_set__delete(set); return ret; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index eec5df80f5a3..0cd4cf6a344b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1302,7 +1302,10 @@ static int diff__config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "diff.order")) { - sort_compute = perf_config_int(var, value); + int ret; + if (perf_config_int(&ret, var, value) < 0) + return -1; + sort_compute = ret; return 0; } if (!strcmp(var, "diff.compute")) { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9e0b35cd0eea..dd26c62c9893 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -28,9 +28,19 @@ #define DEFAULT_TRACER "function_graph" struct perf_ftrace { - struct perf_evlist *evlist; - struct target target; - const char *tracer; + struct perf_evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; + int graph_depth; +}; + +struct filter_entry { + struct list_head list; + char name[]; }; static bool done; @@ -61,6 +71,7 @@ static int __write_tracing_file(const char *name, const char *val, bool append) int fd, ret = -1; ssize_t size = strlen(val); int flags = O_WRONLY; + char errbuf[512]; file = get_tracing_file(name); if (!file) { @@ -75,14 +86,16 @@ static int __write_tracing_file(const char *name, const char *val, bool append) fd = open(file, flags); if (fd < 0) { - pr_debug("cannot open tracing file: %s\n", name); + pr_debug("cannot open tracing file: %s: %s\n", + name, str_error_r(errno, errbuf, sizeof(errbuf))); goto out; } if (write(fd, val, size) == size) ret = 0; else - pr_debug("write '%s' to tracing/%s failed\n", val, name); + pr_debug("write '%s' to tracing/%s failed: %s\n", + val, name, str_error_r(errno, errbuf, sizeof(errbuf))); close(fd); out: @@ -101,6 +114,7 @@ static int append_tracing_file(const char *name, const char *val) } static int reset_tracing_cpu(void); +static void reset_tracing_filters(void); static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { @@ -116,6 +130,10 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (reset_tracing_cpu() < 0) return -1; + if (write_tracing_file("max_graph_depth", "0") < 0) + return -1; + + reset_tracing_filters(); return 0; } @@ -181,6 +199,68 @@ static int reset_tracing_cpu(void) return ret; } +static int __set_tracing_filter(const char *filter_file, struct list_head *funcs) +{ + struct filter_entry *pos; + + list_for_each_entry(pos, funcs, list) { + if (append_tracing_file(filter_file, pos->name) < 0) + return -1; + } + + return 0; +} + +static int set_tracing_filters(struct perf_ftrace *ftrace) +{ + int ret; + + ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters); + if (ret < 0) + return ret; + + ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace); + if (ret < 0) + return ret; + + ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs); + if (ret < 0) + return ret; + + /* old kernels do not have this filter */ + __set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs); + + return ret; +} + +static void reset_tracing_filters(void) +{ + write_tracing_file("set_ftrace_filter", " "); + write_tracing_file("set_ftrace_notrace", " "); + write_tracing_file("set_graph_function", " "); + write_tracing_file("set_graph_notrace", " "); +} + +static int set_tracing_depth(struct perf_ftrace *ftrace) +{ + char buf[16]; + + if (ftrace->graph_depth == 0) + return 0; + + if (ftrace->graph_depth < 0) { + pr_err("invalid graph depth: %d\n", ftrace->graph_depth); + return -1; + } + + snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth); + + if (write_tracing_file("max_graph_depth", buf) < 0) + return -1; + + return 0; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; @@ -223,11 +303,23 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_reset; } + if (set_tracing_filters(ftrace) < 0) { + pr_err("failed to set tracing filters\n"); + goto out_reset; + } + + if (set_tracing_depth(ftrace) < 0) { + pr_err("failed to set graph depth\n"); + goto out_reset; + } + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { pr_err("failed to set current_tracer to %s\n", ftrace->tracer); goto out_reset; } + setup_pager(); + trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); @@ -251,8 +343,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_close_fd; } - setup_pager(); - perf_evlist__start_workload(ftrace->evlist); while (!done) { @@ -307,6 +397,32 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } +static int parse_filter_func(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct list_head *head = opt->value; + struct filter_entry *entry; + + entry = malloc(sizeof(*entry) + strlen(str) + 1); + if (entry == NULL) + return -ENOMEM; + + strcpy(entry->name, str); + list_add_tail(&entry->list, head); + + return 0; +} + +static void delete_filter_func(struct list_head *head) +{ + struct filter_entry *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, head, list) { + list_del(&pos->list); + free(pos); + } +} + int cmd_ftrace(int argc, const char **argv) { int ret; @@ -330,9 +446,24 @@ int cmd_ftrace(int argc, const char **argv) "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "list of cpus to monitor"), + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "trace given functions only", parse_filter_func), + OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func", + "do not trace given functions", parse_filter_func), + OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func", + "Set graph filter on given functions", parse_filter_func), + OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func", + "Set nograph filter on given functions", parse_filter_func), + OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth, + "Max depth for function graph tracer"), OPT_END() }; + INIT_LIST_HEAD(&ftrace.filters); + INIT_LIST_HEAD(&ftrace.notrace); + INIT_LIST_HEAD(&ftrace.graph_funcs); + INIT_LIST_HEAD(&ftrace.nograph_funcs); + ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; @@ -348,12 +479,14 @@ int cmd_ftrace(int argc, const char **argv) target__strerror(&ftrace.target, ret, errbuf, 512); pr_err("%s\n", errbuf); - return -EINVAL; + goto out_delete_filters; } ftrace.evlist = perf_evlist__new(); - if (ftrace.evlist == NULL) - return -ENOMEM; + if (ftrace.evlist == NULL) { + ret = -ENOMEM; + goto out_delete_filters; + } ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target); if (ret < 0) @@ -364,5 +497,11 @@ int cmd_ftrace(int argc, const char **argv) out_delete_evlist: perf_evlist__delete(ftrace.evlist); +out_delete_filters: + delete_filter_func(&ftrace.filters); + delete_filter_func(&ftrace.notrace); + delete_filter_func(&ftrace.graph_funcs); + delete_filter_func(&ftrace.nograph_funcs); + return ret; } diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 492f8e14ab09..530a7f2fa0f3 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -108,10 +108,14 @@ out: return ret; } -static void exec_woman_emacs(const char *path, const char *page) +static void exec_failed(const char *cmd) { char sbuf[STRERR_BUFSIZE]; + pr_warning("failed to exec '%s': %s", cmd, str_error_r(errno, sbuf, sizeof(sbuf))); +} +static void exec_woman_emacs(const char *path, const char *page) +{ if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ char *man_page; @@ -122,8 +126,7 @@ static void exec_woman_emacs(const char *path, const char *page) execlp(path, "emacsclient", "-e", man_page, NULL); free(man_page); } - warning("failed to exec '%s': %s", path, - str_error_r(errno, sbuf, sizeof(sbuf))); + exec_failed(path); } } @@ -134,7 +137,6 @@ static void exec_man_konqueror(const char *path, const char *page) if (display && *display) { char *man_page; const char *filename = "kfmclient"; - char sbuf[STRERR_BUFSIZE]; /* It's simpler to launch konqueror using kfmclient. */ if (path) { @@ -155,33 +157,27 @@ static void exec_man_konqueror(const char *path, const char *page) execlp(path, filename, "newTab", man_page, NULL); free(man_page); } - warning("failed to exec '%s': %s", path, - str_error_r(errno, sbuf, sizeof(sbuf))); + exec_failed(path); } } static void exec_man_man(const char *path, const char *page) { - char sbuf[STRERR_BUFSIZE]; - if (!path) path = "man"; execlp(path, "man", page, NULL); - warning("failed to exec '%s': %s", path, - str_error_r(errno, sbuf, sizeof(sbuf))); + exec_failed(path); } static void exec_man_cmd(const char *cmd, const char *page) { - char sbuf[STRERR_BUFSIZE]; char *shell_cmd; if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) { execl("/bin/sh", "sh", "-c", shell_cmd, NULL); free(shell_cmd); } - warning("failed to exec '%s': %s", cmd, - str_error_r(errno, sbuf, sizeof(sbuf))); + exec_failed(cmd); } static void add_man_viewer(const char *name) @@ -214,6 +210,12 @@ static void do_add_man_viewer_info(const char *name, man_viewer_info_list = new; } +static void unsupported_man_viewer(const char *name, const char *var) +{ + pr_warning("'%s': path for unsupported man viewer.\n" + "Please consider using 'man.<tool>.%s' instead.", name, var); +} + static int add_man_viewer_path(const char *name, size_t len, const char *value) @@ -221,9 +223,7 @@ static int add_man_viewer_path(const char *name, if (supported_man_viewer(name, len)) do_add_man_viewer_info(name, len, value); else - warning("'%s': path for unsupported man viewer.\n" - "Please consider using 'man.<tool>.cmd' instead.", - name); + unsupported_man_viewer(name, "cmd"); return 0; } @@ -233,9 +233,7 @@ static int add_man_viewer_cmd(const char *name, const char *value) { if (supported_man_viewer(name, len)) - warning("'%s': cmd for supported man viewer.\n" - "Please consider using 'man.<tool>.path' instead.", - name); + unsupported_man_viewer(name, "path"); else do_add_man_viewer_info(name, len, value); @@ -247,8 +245,10 @@ static int add_man_viewer_info(const char *var, const char *value) const char *name = var + 4; const char *subkey = strrchr(name, '.'); - if (!subkey) - return error("Config with no key for man viewer: %s", name); + if (!subkey) { + pr_err("Config with no key for man viewer: %s", name); + return -1; + } if (!strcmp(subkey, ".path")) { if (!value) @@ -261,7 +261,7 @@ static int add_man_viewer_info(const char *var, const char *value) return add_man_viewer_cmd(name, subkey - name, value); } - warning("'%s': unsupported man viewer sub key.", subkey); + pr_warning("'%s': unsupported man viewer sub key.", subkey); return 0; } @@ -332,7 +332,7 @@ static void setup_man_path(void) setenv("MANPATH", new_path, 1); free(new_path); } else { - error("Unable to setup man path"); + pr_err("Unable to setup man path"); } } @@ -349,7 +349,7 @@ static void exec_viewer(const char *name, const char *page) else if (info) exec_man_cmd(info, page); else - warning("'%s': unknown man viewer.", name); + pr_warning("'%s': unknown man viewer.", name); } static int show_man_page(const char *perf_cmd) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9409c9464667..0a8a1c45af87 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1715,7 +1715,7 @@ static int setup_slab_sorting(struct list_head *sort_list, const char *arg) if (!tok) break; if (slab_sort_dimension__add(tok, sort_list) < 0) { - error("Unknown slab --sort key: '%s'", tok); + pr_err("Unknown slab --sort key: '%s'", tok); free(str); return -1; } @@ -1741,7 +1741,7 @@ static int setup_page_sorting(struct list_head *sort_list, const char *arg) if (!tok) break; if (page_sort_dimension__add(tok, sort_list) < 0) { - error("Unknown page --sort key: '%s'", tok); + pr_err("Unknown page --sort key: '%s'", tok); free(str); return -1; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ee7d0a82ccd0..17a14bcce34a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -453,7 +453,7 @@ try_again: } if (perf_evlist__apply_filters(evlist, &pos)) { - error("failed to set filter \"%s\" on event %s with %d (%s)\n", + pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, perf_evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); rc = -1; @@ -461,7 +461,7 @@ try_again: } if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { - error("failed to set config \"%s\" on event %s with %d (%s)\n", + pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", err_term->val.drv_cfg, perf_evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); rc = -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 22478ff2b706..79a33eb1a10d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -94,10 +94,9 @@ static int report__config(const char *var, const char *value, void *cb) symbol_conf.cumulate_callchain = perf_config_bool(var, value); return 0; } - if (!strcmp(var, "report.queue-size")) { - rep->queue_size = perf_config_u64(var, value); - return 0; - } + if (!strcmp(var, "report.queue-size")) + return perf_config_u64(&rep->queue_size, var, value); + if (!strcmp(var, "report.sort_order")) { default_sort_order = strdup(value); return 0; @@ -558,6 +557,7 @@ static int __cmd_report(struct report *rep) ui__error("failed to set cpu bitmap\n"); return ret; } + session->itrace_synth_opts->cpu_bitmap = rep->cpu_bitmap; } if (rep->show_threads) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 39996c53995a..322b4def8411 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2066,7 +2066,7 @@ static void save_task_callchain(struct perf_sched *sched, if (thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, sched->max_stack + 2) != 0) { if (verbose > 0) - error("Failed to resolve callchain. Skipping\n"); + pr_err("Failed to resolve callchain. Skipping\n"); return; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d05aec491cff..83cdc0a61fd6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -85,6 +85,8 @@ enum perf_output_field { PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, PERF_OUTPUT_BRSTACKINSN = 1U << 23, + PERF_OUTPUT_BRSTACKOFF = 1U << 24, + PERF_OUTPUT_SYNTH = 1U << 25, }; struct output_option { @@ -115,6 +117,13 @@ struct output_option { {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, + {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, + {.str = "synth", .field = PERF_OUTPUT_SYNTH}, +}; + +enum { + OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX, + OUTPUT_TYPE_MAX }; /* default set to maintain compatibility with current format */ @@ -124,7 +133,7 @@ static struct { unsigned int print_ip_opts; u64 fields; u64 invalid_fields; -} output[PERF_TYPE_MAX] = { +} output[OUTPUT_TYPE_MAX] = { [PERF_TYPE_HARDWARE] = { .user_set = false, @@ -182,12 +191,44 @@ static struct { .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, + + [OUTPUT_TYPE_SYNTH] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | + PERF_OUTPUT_SYNTH, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, }; +static inline int output_type(unsigned int type) +{ + switch (type) { + case PERF_TYPE_SYNTH: + return OUTPUT_TYPE_SYNTH; + default: + return type; + } +} + +static inline unsigned int attr_type(unsigned int type) +{ + switch (type) { + case OUTPUT_TYPE_SYNTH: + return PERF_TYPE_SYNTH; + default: + return type; + } +} + static bool output_set_by_user(void) { int j; - for (j = 0; j < PERF_TYPE_MAX; ++j) { + for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { if (output[j].user_set) return true; } @@ -208,7 +249,7 @@ static const char *output_field2str(enum perf_output_field field) return str; } -#define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x) +#define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x) static int perf_evsel__do_check_stype(struct perf_evsel *evsel, u64 sample_type, const char *sample_msg, @@ -216,7 +257,7 @@ static int perf_evsel__do_check_stype(struct perf_evsel *evsel, bool allow_user_set) { struct perf_event_attr *attr = &evsel->attr; - int type = attr->type; + int type = output_type(attr->type); const char *evname; if (attr->sample_type & sample_type) @@ -298,10 +339,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected.\n"); return -EINVAL; } - if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { - pr_err("Display of DSO requested but neither sample IP nor " - "sample address\nis selected. Hence, no addresses to convert " - "to DSO.\n"); + if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && + !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) { + pr_err("Display of DSO requested but no address to convert. Select\n" + "sample IP, sample address, brstack, brstacksym, or brstackoff.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -346,7 +387,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, static void set_print_ip_opts(struct perf_event_attr *attr) { - unsigned int type = attr->type; + unsigned int type = output_type(attr->type); output[type].print_ip_opts = 0; if (PRINT_FIELD(IP)) @@ -374,16 +415,17 @@ static int perf_session__check_output_opt(struct perf_session *session) unsigned int j; struct perf_evsel *evsel; - for (j = 0; j < PERF_TYPE_MAX; ++j) { - evsel = perf_session__find_first_evtype(session, j); + for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { + evsel = perf_session__find_first_evtype(session, attr_type(j)); /* * even if fields is set to 0 (ie., show nothing) event must * exist if user explicitly includes it on the command line */ - if (!evsel && output[j].user_set && !output[j].wildcard_set) { + if (!evsel && output[j].user_set && !output[j].wildcard_set && + j != OUTPUT_TYPE_SYNTH) { pr_err("%s events do not exist. " - "Remove corresponding -f option to proceed.\n", + "Remove corresponding -F option to proceed.\n", event_type(j)); return -1; } @@ -514,18 +556,43 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(struct perf_sample *sample) +static void print_sample_brstack(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; - u64 i; + struct addr_location alf, alt; + u64 i, from, to; if (!(br && br->nr)) return; for (i = 0; i < br->nr; i++) { - printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", - br->entries[i].from, - br->entries[i].to, + from = br->entries[i].from; + to = br->entries[i].to; + + if (PRINT_FIELD(DSO)) { + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + } + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + + printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', br->entries[i].flags.abort? 'A' : '-', @@ -534,7 +601,8 @@ static void print_sample_brstack(struct perf_sample *sample) } static void print_sample_brstacksym(struct perf_sample *sample, - struct thread *thread) + struct thread *thread, + struct perf_event_attr *attr) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; @@ -559,8 +627,18 @@ static void print_sample_brstacksym(struct perf_sample *sample, alt.sym = map__find_symbol(alt.map, alt.addr); symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } putchar('/'); symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } printf("/%c/%c/%c/%d ", mispred_str( br->entries + i), br->entries[i].flags.in_tx? 'X' : '-', @@ -569,6 +647,51 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +static void print_sample_brstackoff(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); + if (alf.map && !alf.map->dso->adjust_symbols) + from = map__map_ip(alf.map, from); + + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + if (alt.map && !alt.map->dso->adjust_symbols) + to = map__map_ip(alt.map, to); + + printf("0x%"PRIx64, from); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alf.map, stdout); + printf(")"); + } + printf("/0x%"PRIx64, to); + if (PRINT_FIELD(DSO)) { + printf("("); + map__fprintf_dsoname(alt.map, stdout); + printf(")"); + } + printf("/%c/%c/%c/%d ", + mispred_str(br->entries + i), + br->entries[i].flags.in_tx ? 'X' : '-', + br->entries[i].flags.abort ? 'A' : '-', + br->entries[i].flags.cycles); + } +} #define MAXBB 16384UL static int grab_bb(u8 *buffer, u64 start, u64 end, @@ -906,6 +1029,7 @@ static void print_sample_bts(struct perf_sample *sample, struct machine *machine) { struct perf_event_attr *attr = &evsel->attr; + unsigned int type = output_type(attr->type); bool print_srcline_last = false; if (PRINT_FIELD(CALLINDENT)) @@ -913,7 +1037,7 @@ static void print_sample_bts(struct perf_sample *sample, /* print branch_from information */ if (PRINT_FIELD(IP)) { - unsigned int print_opts = output[attr->type].print_ip_opts; + unsigned int print_opts = output[type].print_ip_opts; struct callchain_cursor *cursor = NULL; if (symbol_conf.use_callchain && sample->callchain && @@ -936,7 +1060,7 @@ static void print_sample_bts(struct perf_sample *sample, /* print branch_to information */ if (PRINT_FIELD(ADDR) || ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && - !output[attr->type].user_set)) { + !output[type].user_set)) { printf(" => "); print_sample_addr(sample, thread, attr); } @@ -1079,6 +1203,127 @@ static void print_sample_bpf_output(struct perf_sample *sample) (char *)(sample->raw_data)); } +static void print_sample_spacing(int len, int spacing) +{ + if (len > 0 && len < spacing) + printf("%*s", spacing - len, ""); +} + +static void print_sample_pt_spacing(int len) +{ + print_sample_spacing(len, 34); +} + +static void print_sample_synth_ptwrite(struct perf_sample *sample) +{ + struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" IP: %u payload: %#" PRIx64 " ", + data->ip, le64_to_cpu(data->payload)); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_mwait(struct perf_sample *sample) +{ + struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" hints: %#x extensions: %#x ", + data->hints, data->extensions); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_pwre(struct perf_sample *sample) +{ + struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" hw: %u cstate: %u sub-cstate: %u ", + data->hw, data->cstate, data->subcstate); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_exstop(struct perf_sample *sample) +{ + struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" IP: %u ", data->ip); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_pwrx(struct perf_sample *sample) +{ + struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample); + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ", + data->deepest_cstate, data->last_cstate, + data->wake_reason); + print_sample_pt_spacing(len); +} + +static void print_sample_synth_cbr(struct perf_sample *sample) +{ + struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample); + unsigned int percent, freq; + int len; + + if (perf_sample__bad_synth_size(sample, *data)) + return; + + freq = (le32_to_cpu(data->freq) + 500) / 1000; + len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq); + if (data->max_nonturbo) { + percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10; + len += printf("(%3u%%) ", percent); + } + print_sample_pt_spacing(len); +} + +static void print_sample_synth(struct perf_sample *sample, + struct perf_evsel *evsel) +{ + switch (evsel->attr.config) { + case PERF_SYNTH_INTEL_PTWRITE: + print_sample_synth_ptwrite(sample); + break; + case PERF_SYNTH_INTEL_MWAIT: + print_sample_synth_mwait(sample); + break; + case PERF_SYNTH_INTEL_PWRE: + print_sample_synth_pwre(sample); + break; + case PERF_SYNTH_INTEL_EXSTOP: + print_sample_synth_exstop(sample); + break; + case PERF_SYNTH_INTEL_PWRX: + print_sample_synth_pwrx(sample); + break; + case PERF_SYNTH_INTEL_CBR: + print_sample_synth_cbr(sample); + break; + default: + break; + } +} + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1132,8 +1377,9 @@ static void process_event(struct perf_script *script, { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; + unsigned int type = output_type(attr->type); - if (output[attr->type].fields == 0) + if (output[type].fields == 0) return; print_sample_start(sample, thread, evsel); @@ -1162,6 +1408,10 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(TRACE)) event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); + + if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH)) + print_sample_synth(sample, evsel); + if (PRINT_FIELD(ADDR)) print_sample_addr(sample, thread, attr); @@ -1180,16 +1430,18 @@ static void process_event(struct perf_script *script, cursor = &callchain_cursor; putchar(cursor ? '\n' : ' '); - sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout); + sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, stdout); } if (PRINT_FIELD(IREGS)) print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(sample); + print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(sample, thread); + print_sample_brstacksym(sample, thread, attr); + else if (PRINT_FIELD(BRSTACKOFF)) + print_sample_brstackoff(sample, thread, attr); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); @@ -1325,7 +1577,8 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, evlist = *pevlist; evsel = perf_evlist__last(*pevlist); - if (evsel->attr.type >= PERF_TYPE_MAX) + if (evsel->attr.type >= PERF_TYPE_MAX && + evsel->attr.type != PERF_TYPE_SYNTH) return 0; evlist__for_each_entry(evlist, pos) { @@ -1727,6 +1980,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused, int rc = 0; char *str = strdup(arg); int type = -1; + enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT; if (!str) return -ENOMEM; @@ -1749,6 +2003,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused, type = PERF_TYPE_RAW; else if (!strcmp(str, "break")) type = PERF_TYPE_BREAKPOINT; + else if (!strcmp(str, "synth")) + type = OUTPUT_TYPE_SYNTH; else { fprintf(stderr, "Invalid event type in field string.\n"); rc = -EINVAL; @@ -1772,23 +2028,44 @@ static int parse_output_fields(const struct option *opt __maybe_unused, goto out; } + /* Don't override defaults for +- */ + if (strchr(str, '+') || strchr(str, '-')) + goto parse; + if (output_set_by_user()) pr_warning("Overriding previous field request for all events.\n"); - for (j = 0; j < PERF_TYPE_MAX; ++j) { + for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { output[j].fields = 0; output[j].user_set = true; output[j].wildcard_set = true; } } +parse: for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) { + if (*tok == '+') { + if (change == SET) + goto out_badmix; + change = ADD; + tok++; + } else if (*tok == '-') { + if (change == SET) + goto out_badmix; + change = REMOVE; + tok++; + } else { + if (change != SET && change != DEFAULT) + goto out_badmix; + change = SET; + } + for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) break; } if (i == imax && strcmp(tok, "flags") == 0) { - print_flags = true; + print_flags = change == REMOVE ? false : true; continue; } if (i == imax) { @@ -1801,12 +2078,16 @@ static int parse_output_fields(const struct option *opt __maybe_unused, /* add user option to all events types for * which it is valid */ - for (j = 0; j < PERF_TYPE_MAX; ++j) { + for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { if (output[j].invalid_fields & all_output_options[i].field) { pr_warning("\'%s\' not valid for %s events. Ignoring.\n", all_output_options[i].str, event_type(j)); - } else - output[j].fields |= all_output_options[i].field; + } else { + if (change == REMOVE) + output[j].fields &= ~all_output_options[i].field; + else + output[j].fields |= all_output_options[i].field; + } } } else { if (output[type].invalid_fields & all_output_options[i].field) { @@ -1826,7 +2107,11 @@ static int parse_output_fields(const struct option *opt __maybe_unused, "Events will not be displayed.\n", event_type(type)); } } + goto out; +out_badmix: + fprintf(stderr, "Cannot mix +-field with overridden fields\n"); + rc = -EINVAL; out: free(str); return rc; @@ -2444,10 +2729,11 @@ int cmd_script(int argc, const char **argv) symbol__config_symfs), OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " - "Valid types: hw,sw,trace,raw. " + "+field to add and -field to remove." + "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen,brstackinsn", + "bpf-output,callindent,insn,insnlen,brstackinsn,synth", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -2494,6 +2780,8 @@ int cmd_script(int argc, const char **argv) "Enable kernel symbol demangling"), OPT_STRING(0, "time", &script.time_str, "str", "Time span of interest (start,stop)"), + OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, + "Show inline function"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -2704,6 +2992,7 @@ int cmd_script(int argc, const char **argv) err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); if (err < 0) goto out_delete; + itrace_synth_opts.cpu_bitmap = cpu_bitmap; } if (!no_callchain) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a935b5023732..48ac53b199fc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -86,6 +86,7 @@ #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" +#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" static void print_counters(struct timespec *ts, int argc, const char **argv); @@ -122,6 +123,14 @@ static const char * topdown_attrs[] = { NULL, }; +static const char *smi_cost_attrs = { + "{" + "msr/aperf/," + "msr/smi/," + "cycles" + "}" +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -137,6 +146,8 @@ static bool null_run = false; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; +static bool smi_cost = false; +static bool smi_reset = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -625,14 +636,14 @@ try_again: } if (perf_evlist__apply_filters(evsel_list, &counter)) { - error("failed to set filter \"%s\" on event %s with %d (%s)\n", + pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", counter->filter, perf_evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); return -1; } if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { - error("failed to set config \"%s\" on event %s with %d (%s)\n", + pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", err_term->val.drv_cfg, perf_evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); return -1; @@ -1578,6 +1589,7 @@ static void print_header(int argc, const char **argv) static void print_footer(void) { FILE *output = stat_config.output; + int n; if (!null_run) fprintf(output, "\n"); @@ -1590,7 +1602,9 @@ static void print_footer(void) } fprintf(output, "\n\n"); - if (print_free_counters_hint) + if (print_free_counters_hint && + sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && + n > 0) fprintf(output, "Some events weren't counted. Try disabling the NMI watchdog:\n" " echo 0 > /proc/sys/kernel/nmi_watchdog\n" @@ -1779,6 +1793,8 @@ static const struct option stat_options[] = { "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure topdown level 1 statistics"), + OPT_BOOLEAN(0, "smi-cost", &smi_cost, + "measure SMI cost"), OPT_END() }; @@ -2157,6 +2173,39 @@ static int add_default_attributes(void) return 0; } + if (smi_cost) { + int smi; + + if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { + fprintf(stderr, "freeze_on_smi is not supported.\n"); + return -1; + } + + if (!smi) { + if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { + fprintf(stderr, "Failed to set freeze_on_smi.\n"); + return -1; + } + smi_reset = true; + } + + if (pmu_have_event("msr", "aperf") && + pmu_have_event("msr", "smi")) { + if (!force_metric_only) + metric_only = true; + err = parse_events(evsel_list, smi_cost_attrs, NULL); + } else { + fprintf(stderr, "To measure SMI cost, it needs " + "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + return -1; + } + if (err) { + fprintf(stderr, "Cannot set up SMI cost events\n"); + return -1; + } + return 0; + } + if (topdown_run) { char *str = NULL; bool warn = false; @@ -2739,6 +2788,9 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: + if (smi_cost && smi_reset) + sysfs__write_int(FREEZE_ON_SMI_PATH, 0); + perf_evlist__delete(evsel_list); return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 10b6362ca0bf..6052376634c0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0); + err = symbol__disassemble(sym, map, NULL, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; @@ -958,7 +958,7 @@ static int __cmd_top(struct perf_top *top) ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term); if (ret) { - error("failed to set config \"%s\" on event %s with %d (%s)\n", + pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", err_term->val.drv_cfg, perf_evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); goto out_delete; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d014350adc52..4b2a5d298197 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -681,6 +681,10 @@ static struct syscall_fmt { { .name = "mlockall", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mmap", .hexret = true, +/* The standard mmap maps to old_mmap on s390x */ +#if defined(__s390x__) + .alias = "old_mmap", +#endif .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ [3] = SCA_MMAP_FLAGS, /* flags */ }, }, diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index e9651a9d670e..cf36de7ea255 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -304,7 +304,7 @@ jvmti_close(void *agent) FILE *fp = agent; if (!fp) { - warnx("jvmti: incalid fd in close_agent"); + warnx("jvmti: invalid fd in close_agent"); return -1; } diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index bedf5d0ba9ff..c53a41f48b63 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -5,8 +5,6 @@ #include <stdint.h> #include <jvmti.h> -#define __unused __attribute__((unused)) - #if defined(__cplusplus) extern "C" { #endif diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index 5612641c69b4..6d710904c837 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -1,3 +1,4 @@ +#include <linux/compiler.h> #include <sys/types.h> #include <stdio.h> #include <string.h> @@ -238,7 +239,7 @@ code_generated_cb(jvmtiEnv *jvmti, } JNIEXPORT jint JNICALL -Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused) { jvmtiEventCallbacks cb; jvmtiCapabilities caps1; @@ -313,7 +314,7 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) } JNIEXPORT void JNICALL -Agent_OnUnload(JavaVM *jvm __unused) +Agent_OnUnload(JavaVM *jvm __maybe_unused) { int ret; diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 9213a1273697..999a4e878162 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -2,7 +2,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o pmu-events-y += pmu-events.o -JDIR = pmu-events/arch/$(ARCH) +JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') # @@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \ # directory and create tables in pmu-events.c. # $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index baa073f38334..bd0aabb2bd0f 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -48,10 +48,6 @@ #include "json.h" #include "jevents.h" -#ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) -#endif - int verbose; char *prog; diff --git a/tools/perf/scripts/python/bin/intel-pt-events-record b/tools/perf/scripts/python/bin/intel-pt-events-record new file mode 100644 index 000000000000..10fe2b6977d4 --- /dev/null +++ b/tools/perf/scripts/python/bin/intel-pt-events-record @@ -0,0 +1,13 @@ +#!/bin/bash + +# +# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs +# to be specified with appropriate config terms. +# +if ! echo "$@" | grep -q intel_pt ; then + echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/" + echo "and for power events it probably needs to be system wide i.e. -a option" + echo "For example: -a -e intel_pt/pwr_evt,branch=0/ sleep 1" + exit 1 +fi +perf record $@ diff --git a/tools/perf/scripts/python/bin/intel-pt-events-report b/tools/perf/scripts/python/bin/intel-pt-events-report new file mode 100644 index 000000000000..9a9c92fcd026 --- /dev/null +++ b/tools/perf/scripts/python/bin/intel-pt-events-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: print Intel PT Power Events and PTWRITE +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py
\ No newline at end of file diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py new file mode 100644 index 000000000000..b19172d673af --- /dev/null +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -0,0 +1,128 @@ +# intel-pt-events.py: Print Intel PT Power Events and PTWRITE +# Copyright (c) 2017, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +import os +import sys +import struct + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +# These perf imports are not used at present +#from perf_trace_context import * +#from Core import * + +def trace_begin(): + print "Intel PT Power Events and PTWRITE" + +def trace_end(): + print "End" + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def print_ptwrite(raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + flags = data[0] + payload = data[1] + exact_ip = flags & 1 + print "IP: %u payload: %#x" % (exact_ip, payload), + +def print_cbr(raw_buf): + data = struct.unpack_from("<BBBBII", raw_buf) + cbr = data[0] + f = (data[4] + 500) / 1000 + p = ((cbr * 1000 / data[2]) + 5) / 10 + print "%3u freq: %4u MHz (%3u%%)" % (cbr, f, p), + +def print_mwait(raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hints = payload & 0xff + extensions = (payload >> 32) & 0x3 + print "hints: %#x extensions: %#x" % (hints, extensions), + +def print_pwre(raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hw = (payload >> 7) & 1 + cstate = (payload >> 12) & 0xf + subcstate = (payload >> 8) & 0xf + print "hw: %u cstate: %u sub-cstate: %u" % (hw, cstate, subcstate), + +def print_exstop(raw_buf): + data = struct.unpack_from("<I", raw_buf) + flags = data[0] + exact_ip = flags & 1 + print "IP: %u" % (exact_ip), + +def print_pwrx(raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + deepest_cstate = payload & 0xf + last_cstate = (payload >> 4) & 0xf + wake_reason = (payload >> 8) & 0xf + print "deepest cstate: %u last cstate: %u wake reason: %#x" % (deepest_cstate, last_cstate, wake_reason), + +def print_common_start(comm, sample, name): + ts = sample["time"] + cpu = sample["cpu"] + pid = sample["pid"] + tid = sample["tid"] + print "%16s %5u/%-5u [%03u] %9u.%09u %7s:" % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name), + +def print_common_ip(sample, symbol, dso): + ip = sample["ip"] + print "%16x %s (%s)" % (ip, symbol, dso) + +def process_event(param_dict): + event_attr = param_dict["attr"] + sample = param_dict["sample"] + raw_buf = param_dict["raw_buf"] + comm = param_dict["comm"] + name = param_dict["ev_name"] + + # Symbol and dso info are not always resolved + if (param_dict.has_key("dso")): + dso = param_dict["dso"] + else: + dso = "[unknown]" + + if (param_dict.has_key("symbol")): + symbol = param_dict["symbol"] + else: + symbol = "[unknown]" + + if name == "ptwrite": + print_common_start(comm, sample, name) + print_ptwrite(raw_buf) + print_common_ip(sample, symbol, dso) + elif name == "cbr": + print_common_start(comm, sample, name) + print_cbr(raw_buf) + print_common_ip(sample, symbol, dso) + elif name == "mwait": + print_common_start(comm, sample, name) + print_mwait(raw_buf) + print_common_ip(sample, symbol, dso) + elif name == "pwre": + print_common_start(comm, sample, name) + print_pwre(raw_buf) + print_common_ip(sample, symbol, dso) + elif name == "exstop": + print_common_start(comm, sample, name) + print_exstop(raw_buf) + print_common_ip(sample, symbol, dso) + elif name == "pwrx": + print_common_start(comm, sample, name) + print_pwrx(raw_buf) + print_common_ip(sample, symbol, dso) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index af58ebc243ef..84222bdb8689 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -75,7 +75,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ -ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc)) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0dd77494bb58..0e77b2cf61ec 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -18,6 +18,7 @@ * permissions. All the event text files are stored there. */ +#include <debug.h> #include <errno.h> #include <inttypes.h> #include <stdlib.h> @@ -29,14 +30,11 @@ #include <sys/stat.h> #include <unistd.h> #include "../perf.h" -#include "util.h" #include <subcmd/exec-cmd.h> #include "tests.h" #define ENV "PERF_TEST_ATTR" -extern int verbose; - static char *dir; void test_attr__init(void) @@ -138,8 +136,10 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, { int errno_saved = errno; - if (store_event(attr, pid, cpu, fd, group_fd, flags)) - die("test attr FAILED"); + if (store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } errno = errno_saved; } diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index 1091bd47adfd..cdf21a9d0c35 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -16,6 +16,13 @@ class Fail(Exception): def getMsg(self): return '\'%s\' - %s' % (self.test.path, self.msg) +class Notest(Exception): + def __init__(self, test, arch): + self.arch = arch + self.test = test + def getMsg(self): + return '[%s] \'%s\'' % (self.arch, self.test.path) + class Unsup(Exception): def __init__(self, test): self.test = test @@ -112,6 +119,9 @@ class Event(dict): # 'command' - perf command name # 'args' - special command arguments # 'ret' - expected command return value (0 by default) +# 'arch' - architecture specific test (optional) +# comma separated list, ! at the beginning +# negates it. # # [eventX:base] # - one or multiple instances in file @@ -134,6 +144,12 @@ class Test(object): except: self.ret = 0 + try: + self.arch = parser.get('config', 'arch') + log.warning("test limitation '%s'" % self.arch) + except: + self.arch = '' + self.expect = {} self.result = {} log.debug(" loading expected events"); @@ -145,6 +161,31 @@ class Test(object): else: return True + def skip_test(self, myarch): + # If architecture not set always run test + if self.arch == '': + # log.warning("test for arch %s is ok" % myarch) + return False + + # Allow multiple values in assignment separated by ',' + arch_list = self.arch.split(',') + + # Handle negated list such as !s390x,ppc + if arch_list[0][0] == '!': + arch_list[0] = arch_list[0][1:] + log.warning("excluded architecture list %s" % arch_list) + for arch_item in arch_list: + # log.warning("test for %s arch is %s" % (arch_item, myarch)) + if arch_item == myarch: + return True + return False + + for arch_item in arch_list: + # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch)) + if arch_item == myarch: + return False + return True + def load_events(self, path, events): parser_event = ConfigParser.SafeConfigParser() parser_event.read(path) @@ -168,6 +209,11 @@ class Test(object): events[section] = e def run_cmd(self, tempdir): + junk1, junk2, junk3, junk4, myarch = (os.uname()) + + if self.skip_test(myarch): + raise Notest(self, myarch) + cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir, self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) @@ -265,6 +311,8 @@ def run_tests(options): Test(f, options).run() except Unsup, obj: log.warning("unsupp %s" % obj.getMsg()) + except Notest, obj: + log.warning("skipped %s" % obj.getMsg()) def setup_log(verbose): global log diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index e7664fe3bd33..39bbb97cd30a 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -62,8 +62,7 @@ static void __test_function(volatile long *ptr) } #endif -__attribute__ ((noinline)) -static int test_function(void) +static noinline int test_function(void) { __test_function(&the_var); the_var++; @@ -288,3 +287,17 @@ int test__bp_signal(int subtest __maybe_unused) return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } + +bool test__bp_signal_is_supported(void) +{ +/* + * The powerpc so far does not have support to even create + * instruction breakpoint using the perf event interface. + * Once it's there we can release this. + */ +#ifdef __powerpc__ + return false; +#else + return true; +#endif +} diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c index 89f92fa67cc4..3b1ac6f31b15 100644 --- a/tools/perf/tests/bp_signal_overflow.c +++ b/tools/perf/tests/bp_signal_overflow.c @@ -28,8 +28,7 @@ static int overflows; -__attribute__ ((noinline)) -static int test_function(void) +static noinline int test_function(void) { return time(NULL); } diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index 7230e62c70fc..b4ebc75e25ae 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -10,6 +10,15 @@ #include <uapi/linux/fs.h> +/* + * If CONFIG_PROFILE_ALL_BRANCHES is selected, + * 'if' is redefined after include kernel header. + * Recover 'if' for BPF object code. + */ +#ifdef if +# undef if +#endif + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9e08d297f1a9..3ccfd58a8c3c 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -97,10 +97,12 @@ static struct test generic_tests[] = { { .desc = "Breakpoint overflow signal handler", .func = test__bp_signal, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Breakpoint overflow sampling", .func = test__bp_signal_overflow, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", @@ -401,6 +403,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) if (!perf_test__matches(t, curr, argc, argv)) continue; + if (t->is_supported && !t->is_supported()) { + pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); + continue; + } + pr_info("%2d: %-*s:", i, width, t->desc); if (intlist__find(skiplist, i)) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 1f14e7612cbb..94b7c7b02bde 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -229,6 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, unsigned char buf2[BUFSZ]; size_t ret_len; u64 objdump_addr; + const char *objdump_name; + char decomp_name[KMOD_DECOMP_LEN]; int ret; pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); @@ -289,9 +291,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, state->done[state->done_cnt++] = al.map->start; } + objdump_name = al.map->dso->long_name; + if (dso__needs_decompress(al.map->dso)) { + if (dso__decompress_kmodule_path(al.map->dso, objdump_name, + decomp_name, + sizeof(decomp_name)) < 0) { + pr_debug("decompression failed\n"); + return -1; + } + + objdump_name = decomp_name; + } + /* Read the object code using objdump */ objdump_addr = map__rip_2objdump(al.map, al.addr); - ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); + ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); + + if (dso__needs_decompress(al.map->dso)) + unlink(objdump_name); + if (ret > 0) { /* * The kernel maps are inaccurate - assume objdump is right in diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index dfe5c89e2049..3e56d08f7995 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -76,8 +76,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__attribute__ ((noinline)) -static int unwind_thread(struct thread *thread) +static noinline int unwind_thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -108,8 +107,7 @@ static int unwind_thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__attribute__ ((noinline)) -static int compare(void *p1, void *p2) +static noinline int compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -128,8 +126,7 @@ static int compare(void *p1, void *p2) return p1 - p2; } -__attribute__ ((noinline)) -static int krava_3(struct thread *thread) +static noinline int krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -147,14 +144,12 @@ static int krava_3(struct thread *thread) return global_unwind_retval; } -__attribute__ ((noinline)) -static int krava_2(struct thread *thread) +static noinline int krava_2(struct thread *thread) { return krava_3(thread); } -__attribute__ ((noinline)) -static int krava_1(struct thread *thread) +static noinline int krava_1(struct thread *thread) { return krava_2(thread); } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7fad885491c5..812a053d1941 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1810,17 +1810,6 @@ static int test_pmu_events(void) return ret; } -static void debug_warn(const char *warn, va_list params) -{ - char msg[1024]; - - if (verbose <= 0) - return; - - vsnprintf(msg, sizeof(msg), warn, params); - fprintf(stderr, " Warning: %s\n", msg); -} - int test__parse_events(int subtest __maybe_unused) { int ret1, ret2 = 0; @@ -1832,8 +1821,6 @@ do { \ ret2 = ret1; \ } while (0) - set_warning_routine(debug_warn); - TEST_EVENTS(test__events); if (test_pmu()) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 32873ec91a4e..cf00ebad2ef5 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -83,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused) evsel = perf_evlist__first(evlist); evsel->attr.task = 1; - evsel->attr.sample_freq = 0; + evsel->attr.sample_freq = 1; evsel->attr.inherit = 0; evsel->attr.watermark = 0; evsel->attr.wakeup_events = 1; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 631859629403..577363809c9b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -34,6 +34,7 @@ struct test { int (*get_nr)(void); const char *(*get_desc)(int subtest); } subtest; + bool (*is_supported)(void); }; /* Tests */ @@ -99,6 +100,8 @@ const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(int subtest); +bool test__bp_signal_is_supported(void); + #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d990ad08a3c6..27f41f28dcb4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -46,12 +46,15 @@ static struct annotate_browser_opt { .jump_arrows = true, }; +struct arch; + struct annotate_browser { struct ui_browser b; struct rb_root entries; struct rb_node *curr_hot; struct disasm_line *selection; struct disasm_line **offsets; + struct arch *arch; int nr_events; u64 start; int nr_asm_entries; @@ -125,43 +128,57 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int int i, pcnt_width = annotate_browser__pcnt_width(ab); double percent_max = 0.0; char bf[256]; + bool show_title = false; for (i = 0; i < ab->nr_events; i++) { if (bdl->samples[i].percent > percent_max) percent_max = bdl->samples[i].percent; } + if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) { + if (ab->have_cycles) { + if (dl->ipc == 0.0 && dl->cycles == 0) + show_title = true; + } else + show_title = true; + } + if (dl->offset != -1 && percent_max != 0.0) { - if (percent_max != 0.0) { - for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, - bdl->samples[i].percent, - current_entry); - if (annotate_browser__opts.show_total_period) { - ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].nr); - } else { - ui_browser__printf(browser, "%6.2f ", - bdl->samples[i].percent); - } + for (i = 0; i < ab->nr_events; i++) { + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, + current_entry); + if (annotate_browser__opts.show_total_period) { + ui_browser__printf(browser, "%6" PRIu64 " ", + bdl->samples[i].nr); + } else { + ui_browser__printf(browser, "%6.2f ", + bdl->samples[i].percent); } - } else { - ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); } } else { ui_browser__set_percent_color(browser, 0, current_entry); - ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + + if (!show_title) + ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + else + ui_browser__printf(browser, "%*s", 7, "Percent"); } if (ab->have_cycles) { if (dl->ipc) ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); - else + else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); + else + ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); + if (dl->cycles) ui_browser__printf(browser, "%*" PRIu64 " ", CYCLES_WIDTH - 1, dl->cycles); - else + else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); + else + ui_browser__printf(browser, "%*s ", CYCLES_WIDTH - 1, "Cycle"); } SLsmg_write_char(' '); @@ -1056,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + sizeof_bdl, &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index e99ba86158d2..d903fd493416 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0); + err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 59addd52d9cd..ddb2c6fbdf91 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -210,6 +210,8 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, return 0; ret = b->callchain->max_depth - a->callchain->max_depth; + if (callchain_param.order == ORDER_CALLER) + ret = -ret; } return ret; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 683f8340460c..be1caabb9290 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -239,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op const char *s = strchr(ops->raw, '+'); const char *c = strchr(ops->raw, ','); - if (c++ != NULL) + /* + * skip over possible up to 2 operands to get to address, e.g.: + * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> + */ + if (c++ != NULL) { ops->target.addr = strtoull(c, NULL, 16); - else + if (!ops->target.addr) { + c = strchr(c, ','); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + } + } else { ops->target.addr = strtoull(ops->raw, NULL, 16); + } if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); @@ -257,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { + const char *c = strchr(ops->raw, ','); + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); + if (c != NULL) { + const char *c2 = strchr(c + 1, ','); + + /* check for 3-op insn */ + if (c2 != NULL) + c = c2; + c++; + + /* mirror arch objdump's space-after-comma style */ + if (*c == ' ') + c++; + } + + return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + ins->name, c ? c - ops->raw : 0, ops->raw, + ops->target.offset); } static struct ins_ops jump_ops = { @@ -1294,6 +1321,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil char linkname[PATH_MAX]; char *build_id_filename; char *build_id_path = NULL; + char *pos; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1313,7 +1341,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil if (!build_id_path) return -1; - dirname(build_id_path); + /* + * old style build-id cache has name of XX/XXXXXXX.. while + * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. + * extract the build-id part of dirname in the new style only. + */ + pos = strrchr(build_id_path, '/'); + if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) + dirname(build_id_path); if (dso__is_kcore(dso) || readlink(build_id_path, linkname, sizeof(linkname)) < 0 || @@ -1344,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1370,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na if (arch == NULL) return -ENOTSUP; + if (parch) + *parch = arch; + if (arch->init) { err = arch->init(arch); if (err) { @@ -1396,31 +1436,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na sizeof(symfs_filename)); } } else if (dso__needs_decompress(dso)) { - char tmp[PATH_MAX]; - struct kmod_path m; - int fd; - bool ret; - - if (kmod_path__parse_ext(&m, symfs_filename)) - goto out; - - snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); - - fd = mkstemp(tmp); - if (fd < 0) { - free(m.ext); - goto out; - } - - ret = decompress_to_file(m.ext, symfs_filename, fd); - - if (ret) - pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename); - - free(m.ext); - close(fd); + char tmp[KMOD_DECOMP_LEN]; - if (!ret) + if (dso__decompress_kmodule_path(dso, symfs_filename, + tmp, sizeof(tmp)) < 0) goto out; strcpy(symfs_filename, tmp); @@ -1429,7 +1448,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand", + " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -1887,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) + if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), + 0, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 948aa8e6fd39..21055034aedd 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 0daf63b9ee3e..5547457566a7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -322,6 +322,13 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, return auxtrace_queues__add_buffer(queues, idx, buffer); } +static bool filter_cpu(struct perf_session *session, int cpu) +{ + unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; + + return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); +} + int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, union perf_event *event, off_t data_offset, @@ -331,6 +338,9 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, unsigned int idx; int err; + if (filter_cpu(session, event->auxtrace.cpu)) + return 0; + buffer = zalloc(sizeof(struct auxtrace_buffer)); if (!buffer) return -ENOMEM; @@ -947,6 +957,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; + synth_opts->ptwrites = true; + synth_opts->pwr_events = true; synth_opts->errors = true; synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; @@ -1030,6 +1042,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'x': synth_opts->transactions = true; break; + case 'w': + synth_opts->ptwrites = true; + break; + case 'p': + synth_opts->pwr_events = true; + break; case 'e': synth_opts->errors = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 9f0de72d58e2..33b5e6cdf38c 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -59,6 +59,8 @@ enum itrace_period_type { * @instructions: whether to synthesize 'instructions' events * @branches: whether to synthesize 'branches' events * @transactions: whether to synthesize events for transactions + * @ptwrites: whether to synthesize events for ptwrites + * @pwr_events: whether to synthesize power events * @errors: whether to synthesize decoder error events * @dont_decode: whether to skip decoding entirely * @log: write a decoding log @@ -72,6 +74,7 @@ enum itrace_period_type { * @period: 'instructions' events period * @period_type: 'instructions' events period type * @initial_skip: skip N events at the beginning. + * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all */ struct itrace_synth_opts { bool set; @@ -79,6 +82,8 @@ struct itrace_synth_opts { bool instructions; bool branches; bool transactions; + bool ptwrites; + bool pwr_events; bool errors; bool dont_decode; bool log; @@ -92,6 +97,7 @@ struct itrace_synth_opts { unsigned long long period; enum itrace_period_type period_type; unsigned long initial_skip; + unsigned long *cpu_bitmap; }; /** diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 168cc49654e7..e0148b081bdf 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -278,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return bf; } -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) -{ - char *id_name = NULL, *ch; - struct stat sb; - char sbuild_id[SBUILD_ID_SIZE]; - - if (!dso->has_build_id) - goto err; - - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - id_name = build_id_cache__linkname(sbuild_id, NULL, 0); - if (!id_name) - goto err; - if (access(id_name, F_OK)) - goto err; - if (lstat(id_name, &sb) == -1) - goto err; - if ((size_t)sb.st_size > size - 1) - goto err; - if (readlink(id_name, bf, size - 1) < 0) - goto err; - - bf[sb.st_size] = '\0'; - - /* - * link should be: - * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 - */ - ch = strrchr(bf, '/'); - if (!ch) - goto err; - if (ch - 3 < bf) - goto err; - - free(id_name); - return strncmp(".ko", ch - 3, 3) == 0; -err: - pr_err("Invalid build id: %s\n", id_name ? : - dso->long_name ? : - dso->short_name ? : - "[unknown]"); - free(id_name); - return false; -} - #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8a89b195c1fc..96690a55c62c 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -17,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 0328f297a748..0175765c05b9 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -5,6 +5,7 @@ #include <subcmd/pager.h> #include "../ui/ui.h" +#include <linux/compiler.h> #include <linux/string.h> #define CMD_EXEC_PATH "--exec-path" @@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __printf(1, 2); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 81fc29ac798f..b4204b43ed58 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -621,14 +621,19 @@ enum match_result { static enum match_result match_chain_srcline(struct callchain_cursor_node *node, struct callchain_list *cnode) { - char *left = get_srcline(cnode->ms.map->dso, + char *left = NULL; + char *right = NULL; + enum match_result ret = MATCH_EQ; + int cmp; + + if (cnode->ms.map) + left = get_srcline(cnode->ms.map->dso, map__rip_2objdump(cnode->ms.map, cnode->ip), cnode->ms.sym, true, false); - char *right = get_srcline(node->map->dso, + if (node->map) + right = get_srcline(node->map->dso, map__rip_2objdump(node->map, node->ip), node->sym, true, false); - enum match_result ret = MATCH_EQ; - int cmp; if (left && right) cmp = strcmp(left, right); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 8d724f0fa5a8..31a7dea248d0 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -335,32 +335,42 @@ static int perf_parse_long(const char *value, long *ret) return 0; } -static void die_bad_config(const char *name) +static void bad_config(const char *name) { if (config_file_name) - die("bad config value for '%s' in %s", name, config_file_name); - die("bad config value for '%s'", name); + pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name); + else + pr_warning("bad config value for '%s', ignoring...\n", name); } -u64 perf_config_u64(const char *name, const char *value) +int perf_config_u64(u64 *dest, const char *name, const char *value) { long long ret = 0; - if (!perf_parse_llong(value, &ret)) - die_bad_config(name); - return (u64) ret; + if (!perf_parse_llong(value, &ret)) { + bad_config(name); + return -1; + } + + *dest = ret; + return 0; } -int perf_config_int(const char *name, const char *value) +int perf_config_int(int *dest, const char *name, const char *value) { long ret = 0; - if (!perf_parse_long(value, &ret)) - die_bad_config(name); - return ret; + if (!perf_parse_long(value, &ret)) { + bad_config(name); + return -1; + } + *dest = ret; + return 0; } static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) { + int ret; + *is_bool = 1; if (!value) return 1; @@ -371,7 +381,7 @@ static int perf_config_bool_or_int(const char *name, const char *value, int *is_ if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) return 0; *is_bool = 0; - return perf_config_int(name, value); + return perf_config_int(&ret, name, value) < 0 ? -1 : ret; } int perf_config_bool(const char *name, const char *value) @@ -657,8 +667,7 @@ static int perf_config_set__init(struct perf_config_set *set) user_config = strdup(mkpath("%s/.perfconfig", home)); if (user_config == NULL) { - warning("Not enough memory to process %s/.perfconfig, " - "ignoring it.", home); + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); goto out; } @@ -671,8 +680,7 @@ static int perf_config_set__init(struct perf_config_set *set) ret = 0; if (st.st_uid && (st.st_uid != geteuid())) { - warning("File %s not owned by current user or root, " - "ignoring it.", user_config); + pr_warning("File %s not owned by current user or root, ignoring it.", user_config); goto out_free; } @@ -795,7 +803,8 @@ void perf_config_set__delete(struct perf_config_set *set) */ int config_error_nonbool(const char *var) { - return error("Missing value for '%s'", var); + pr_err("Missing value for '%s'", var); + return -1; } void set_buildid_dir(const char *dir) diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 1a59a6b43f8b..b6bb11f3f165 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -27,8 +27,8 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); -int perf_config_int(const char *, const char *); -u64 perf_config_u64(const char *, const char *); +int perf_config_int(int *dest, const char *, const char *); +int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 89d50318833d..3149b70799fd 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1444,10 +1444,8 @@ static int convert__config(const char *var, const char *value, void *cb) { struct convert *c = cb; - if (!strcmp(var, "convert.queue-size")) { - c->queue_size = perf_config_u64(var, value); - return 0; - } + if (!strcmp(var, "convert.queue-size")) + return perf_config_u64(&c->queue_size, var, value); return 0; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8a23ea1a71c7..c818bdb1c1ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include <string.h> +#include <linux/compiler.h> #include "event.h" #include "../ui/helpline.h" #include "../ui/progress.h" @@ -40,16 +41,16 @@ extern int debug_data_convert; #define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */ -int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int dump_printf(const char *fmt, ...) __printf(1, 2); void trace_event(union perf_event *event); -int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); +int ui__error(const char *format, ...) __printf(1, 2); +int ui__warning(const char *format, ...) __printf(1, 2); void pr_stat(const char *fmt, ...); -int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); -int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4); +int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5); int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a96a99d2369f..4e7ab611377a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -248,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } +static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf) +{ + int fd = -1; + struct kmod_path m; + + if (!dso__needs_decompress(dso)) + return -1; + + if (kmod_path__parse_ext(&m, dso->long_name)) + return -1; + + if (!m.comp) + goto out; + + fd = mkstemp(tmpbuf); + if (fd < 0) { + dso->load_errno = errno; + goto out; + } + + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; + close(fd); + fd = -1; + } + +out: + free(m.ext); + return fd; +} + +int dso__decompress_kmodule_fd(struct dso *dso, const char *name) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + unlink(tmpbuf); + return fd; +} + +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + if (fd < 0) { + unlink(tmpbuf); + return -1; + } + + strncpy(pathname, tmpbuf, len); + close(fd); + return 0; +} + /* * Parses kernel module specified in @path and updates * @m argument like: @@ -335,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, return 0; } +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine) +{ + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); +} + /* * Global list of open DSOs and the counter. */ @@ -381,7 +454,7 @@ static int do_open(char *name) static int __open_dso(struct dso *dso, struct machine *machine) { - int fd; + int fd = -EINVAL; char *root_dir = (char *)""; char *name = malloc(PATH_MAX); @@ -392,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine) root_dir = machine->root_dir; if (dso__read_binary_type_filename(dso, dso->binary_type, - root_dir, name, PATH_MAX)) { - free(name); - return -EINVAL; - } + root_dir, name, PATH_MAX)) + goto out; if (!is_regular_file(name)) - return -EINVAL; + goto out; + + if (dso__needs_decompress(dso)) { + char newpath[KMOD_DECOMP_LEN]; + size_t len = sizeof(newpath); + + if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) { + fd = -dso->load_errno; + goto out; + } + + strcpy(name, newpath); + } fd = do_open(name); + + if (dso__needs_decompress(dso)) + unlink(name); + +out: free(name); return fd; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 12350b171727..bd061ba7b47c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext); bool is_kernel_module(const char *pathname, int cpumode); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +int dso__decompress_kmodule_fd(struct dso *dso, const char *name); +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len); + +#define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX" +#define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME) struct kmod_path { char *name; @@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) #define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine); + /* * The dso__data_* external interface provides following functions: * dso__data_get_fd diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 7c3fa1c8cbcd..9967c87af7a6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -252,6 +252,127 @@ enum auxtrace_error_type { PERF_AUXTRACE_ERROR_MAX }; +/* Attribute type for custom synthesized events */ +#define PERF_TYPE_SYNTH (INT_MAX + 1U) + +/* Attribute config for custom synthesized events */ +enum perf_synth_id { + PERF_SYNTH_INTEL_PTWRITE, + PERF_SYNTH_INTEL_MWAIT, + PERF_SYNTH_INTEL_PWRE, + PERF_SYNTH_INTEL_EXSTOP, + PERF_SYNTH_INTEL_PWRX, + PERF_SYNTH_INTEL_CBR, +}; + +/* + * Raw data formats for synthesized events. Note that 4 bytes of padding are + * present to match the 'size' member of PERF_SAMPLE_RAW data which is always + * 8-byte aligned. That means we must dereference raw_data with an offset of 4. + * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the + * structure sizes are 4 bytes bigger than the raw_size, refer + * perf_synth__raw_size(). + */ + +struct perf_synth_intel_ptwrite { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; + u64 payload; +}; + +struct perf_synth_intel_mwait { + u32 padding; + u32 reserved; + union { + struct { + u64 hints : 8, + reserved1 : 24, + extensions : 2, + reserved2 : 30; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_pwre { + u32 padding; + u32 reserved; + union { + struct { + u64 reserved1 : 7, + hw : 1, + subcstate : 4, + cstate : 4, + reserved2 : 48; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_exstop { + u32 padding; + union { + struct { + u32 ip : 1, + reserved : 31; + }; + u32 flags; + }; +}; + +struct perf_synth_intel_pwrx { + u32 padding; + u32 reserved; + union { + struct { + u64 deepest_cstate : 4, + last_cstate : 4, + wake_reason : 4, + reserved1 : 52; + }; + u64 payload; + }; +}; + +struct perf_synth_intel_cbr { + u32 padding; + union { + struct { + u32 cbr : 8, + reserved1 : 8, + max_nonturbo : 8, + reserved2 : 8; + }; + u32 flags; + }; + u32 freq; + u32 reserved3; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +static inline void *perf_synth__raw_data(void *p) +{ + return p + 4; +} + +#define perf_synth__raw_size(d) (sizeof(d) - 4) + +#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4) + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 94cea4398a13..8d601fbdd8d6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,6 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 +#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/refcount.h> #include <linux/list.h> @@ -34,7 +35,7 @@ struct perf_mmap { refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; static inline size_t diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e4f7902d5afa..6f4882f8d61f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -11,13 +11,17 @@ #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> +#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/compiler.h> #include <linux/err.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/types.h> +#include <dirent.h> #include "asm/bug.h" #include "callchain.h" #include "cgroup.h" @@ -273,8 +277,20 @@ struct perf_evsel *perf_evsel__new_cycles(void) struct perf_evsel *evsel; event_attr_init(&attr); + /* + * Unnamed union member, not supported as struct member named + * initializer in older compilers such as gcc 4.4.7 + * + * Just for probing the precise_ip: + */ + attr.sample_period = 1; perf_event_attr__set_max_precise_ip(&attr); + /* + * Now let the usual logic to set up the perf_event_attr defaults + * to kick in when we return and before perf_evsel__open() is called. + */ + attr.sample_period = 0; evsel = perf_evsel__new(&attr); if (evsel == NULL) @@ -1429,7 +1445,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, } static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, " %-32s %s\n", name, val); } @@ -2459,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, return false; } +static bool find_process(const char *name) +{ + size_t len = strlen(name); + DIR *dir; + struct dirent *d; + int ret = -1; + + dir = opendir(procfs__mountpoint()); + if (!dir) + return false; + + /* Walk through the directory. */ + while (ret && (d = readdir(dir)) != NULL) { + char path[PATH_MAX]; + char *data; + size_t size; + + if ((d->d_type != DT_DIR) || + !strcmp(".", d->d_name) || + !strcmp("..", d->d_name)) + continue; + + scnprintf(path, sizeof(path), "%s/%s/comm", + procfs__mountpoint(), d->d_name); + + if (filename__read_str(path, &data, &size)) + continue; + + ret = strncmp(name, data, len); + free(data); + } + + closedir(dir); + return ret ? false : true; +} + int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size) { diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index e415aee6a245..583f3a602506 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -7,6 +7,7 @@ #include "map.h" #include "strlist.h" #include "symbol.h" +#include "srcline.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -168,6 +169,38 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); + if (symbol_conf.inline_name && node->map) { + struct inline_node *inode; + + addr = map__rip_2objdump(node->map, node->ip), + inode = dso__parse_addr_inlines(node->map->dso, addr); + + if (inode) { + struct inline_list *ilist; + + list_for_each_entry(ilist, &inode->val, list) { + if (print_arrow) + printed += fprintf(fp, " <-"); + + /* IP is same, just skip it */ + if (print_ip) + printed += fprintf(fp, "%c%16s", + s, ""); + if (print_sym) + printed += fprintf(fp, " %s", + ilist->funcname); + if (print_srcline) + printed += fprintf(fp, "\n %s:%d", + ilist->filename, + ilist->line_nr); + if (!print_oneline) + printed += fprintf(fp, "\n"); + } + + inline_node__delete(inode); + } + } + if (symbol_conf.bt_stop_list && node->sym && strlist__has_entry(symbol_conf.bt_stop_list, diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index 5980f7d256b1..40789d8603d0 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -11,6 +11,7 @@ * @remark Copyright 2007 OProfile authors * @author Philippe Elie */ +#include <linux/compiler.h> #include <sys/types.h> #include <stdio.h> #include <getopt.h> @@ -125,7 +126,7 @@ struct debug_line_header { * and filesize, last entry is followed by en empty string. */ /* follow the first program statement */ -} __attribute__((packed)); +} __packed; /* DWARF 2 spec talk only about one possible compilation unit header while * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not @@ -138,7 +139,7 @@ struct compilation_unit_header { uhalf version; uword debug_abbrev_offset; ubyte pointer_size; -} __attribute__((packed)); +} __packed; #define DW_LNS_num_opcode (DW_LNS_set_isa + 1) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 314a07151fb7..76ed7d03e500 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,6 +8,7 @@ #include <unistd.h> #include <stdio.h> #include <stdlib.h> +#include <linux/compiler.h> #include <linux/list.h> #include <linux/kernel.h> #include <linux/bitops.h> @@ -841,7 +842,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { @@ -1274,7 +1275,7 @@ error: } static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __attribute__((unused))) + void *priv __maybe_unused) { return fprintf(fp, ", %s = %s", name, val); } @@ -1469,8 +1470,16 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, cpumode)) - dso->kernel = dso_type; + if (dso_type != DSO_TYPE_USER) { + struct kmod_path m = { .name = NULL, }; + + if (!kmod_path__parse_name(&m, filename) && m.kmod) + dso__set_module_info(dso, &m, machine); + else + dso->kernel = dso_type; + + free(m.name); + } build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 1c88ad6425b8..15b95300d7f3 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -12,7 +12,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) - autocorrect = perf_config_int(var,value); + return perf_config_int(&autocorrect, var,value); return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index b2834ac7b1f5..218ee2bac9a5 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish) fprintf(stdout, intel_bts_info_fmts[i], arr[i]); } -u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; - int intel_bts_process_auxtrace_info(union perf_event *event, struct perf_session *session) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 7cf7f7aca4d2..aa1593ce551d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -87,11 +106,13 @@ struct intel_pt_decoder { const unsigned char *buf; size_t len; bool return_compression; + bool branch_enable; bool mtc_insn; bool pge; bool have_tma; bool have_cyc; bool fixup_last_mtc; + bool have_last_ip; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -99,6 +120,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -119,6 +141,7 @@ struct intel_pt_decoder { int pkt_len; int last_packet_type; unsigned int cbr; + unsigned int cbr_seen; unsigned int max_non_turbo_ratio; double max_non_turbo_ratio_fp; double cbr_cyc_to_tsc; @@ -136,9 +159,18 @@ struct intel_pt_decoder { bool continuous_period; bool overflow; bool set_fup_tx_flags; + bool set_fup_ptw; + bool set_fup_mwait; + bool set_fup_pwre; + bool set_fup_exstop; unsigned int fup_tx_flags; unsigned int tx_flags; + uint64_t fup_ptw_payload; + uint64_t fup_mwait_payload; + uint64_t fup_pwre_payload; + uint64_t cbr_payload; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->pgd_ip = params->pgd_ip; decoder->data = params->data; decoder->return_compression = params->return_compression; + decoder->branch_enable = params->branch_enable; decoder->period = params->period; decoder->period_type = params->period_type; @@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); + decoder->have_last_ip = true; } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_PAD: case INTEL_PT_VMCS: case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: return 0; case INTEL_PT_MTC: @@ -675,6 +711,12 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) break; case INTEL_PT_TSC: + /* + * For now, do not support using TSC packets - refer + * intel_pt_calc_cyc_to_tsc(). + */ + if (data->from_mtc) + return 1; timestamp = pkt_info->packet.payload | (data->timestamp & (0xffULL << 56)); if (data->from_mtc && timestamp < data->timestamp && @@ -733,6 +775,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_TIP_PGD: case INTEL_PT_TRACESTOP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: case INTEL_PT_OVF: case INTEL_PT_BAD: /* Does not happen */ default: @@ -787,6 +834,14 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, .cbr_cyc_to_tsc = 0, }; + /* + * For now, do not support using TSC packets for at least the reasons: + * 1) timing might have stopped + * 2) TSC packets within PSB+ can slip against CYC packets + */ + if (!from_mtc) + return; + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); } @@ -898,6 +953,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -990,6 +1046,57 @@ out_no_progress: return err; } +static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) +{ + bool ret = false; + + if (decoder->set_fup_tx_flags) { + decoder->set_fup_tx_flags = false; + decoder->tx_flags = decoder->fup_tx_flags; + decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.flags = decoder->fup_tx_flags; + return true; + } + if (decoder->set_fup_ptw) { + decoder->set_fup_ptw = false; + decoder->state.type = INTEL_PT_PTW; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->fup_ptw_payload; + return true; + } + if (decoder->set_fup_mwait) { + decoder->set_fup_mwait = false; + decoder->state.type = INTEL_PT_MWAIT_OP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.mwait_payload = decoder->fup_mwait_payload; + ret = true; + } + if (decoder->set_fup_pwre) { + decoder->set_fup_pwre = false; + decoder->state.type |= INTEL_PT_PWR_ENTRY; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwre_payload = decoder->fup_pwre_payload; + ret = true; + } + if (decoder->set_fup_exstop) { + decoder->set_fup_exstop = false; + decoder->state.type |= INTEL_PT_EX_STOP; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.flags |= INTEL_PT_FUP_IP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + ret = true; + } + return ret; +} + static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; @@ -1003,15 +1110,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) if (err == INTEL_PT_RETURN) return 0; if (err == -EAGAIN) { - if (decoder->set_fup_tx_flags) { - decoder->set_fup_tx_flags = false; - decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.flags = decoder->fup_tx_flags; + if (intel_pt_fup_event(decoder)) return 0; - } return err; } decoder->set_fup_tx_flags = false; @@ -1360,7 +1460,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) { - unsigned int cbr = decoder->packet.payload; + unsigned int cbr = decoder->packet.payload & 0xff; + + decoder->cbr_payload = decoder->packet.payload; if (decoder->cbr == cbr) return; @@ -1417,6 +1519,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); return -EAGAIN; @@ -1446,7 +1555,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - intel_pt_set_last_ip(decoder); + if (decoder->packet.count) + intel_pt_set_last_ip(decoder); break; case INTEL_PT_MODE_TSX: @@ -1497,6 +1607,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; return -ENOENT; @@ -1625,6 +1742,15 @@ next: break; } intel_pt_set_last_ip(decoder); + if (!decoder->branch_enable) { + decoder->ip = decoder->last_ip; + if (intel_pt_fup_event(decoder)) + return 0; + no_tip = false; + break; + } + if (decoder->set_fup_mwait) + no_tip = true; err = intel_pt_walk_fup(decoder); if (err != -EAGAIN) { if (err) @@ -1650,6 +1776,8 @@ next: break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); if (err == -EAGAIN) @@ -1696,6 +1824,16 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); + if (!decoder->branch_enable && + decoder->cbr != decoder->cbr_seen) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type = INTEL_PT_CBR_CHG; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.cbr_payload = + decoder->packet.payload; + return 0; + } break; case INTEL_PT_MODE_EXEC: @@ -1722,6 +1860,71 @@ next: case INTEL_PT_PAD: break; + case INTEL_PT_PTWRITE_IP: + decoder->fup_ptw_payload = decoder->packet.payload; + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_ptw = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after PTWRITE", + decoder->pos); + } + goto next; + + case INTEL_PT_PTWRITE: + decoder->state.type = INTEL_PT_PTW; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_MWAIT: + decoder->fup_mwait_payload = decoder->packet.payload; + decoder->set_fup_mwait = true; + break; + + case INTEL_PT_PWRE: + if (decoder->set_fup_mwait) { + decoder->fup_pwre_payload = + decoder->packet.payload; + decoder->set_fup_pwre = true; + break; + } + decoder->state.type = INTEL_PT_PWR_ENTRY; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + + case INTEL_PT_EXSTOP_IP: + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_exstop = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after EXSTOP", + decoder->pos); + } + goto next; + + case INTEL_PT_EXSTOP: + decoder->state.type = INTEL_PT_EX_STOP; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; + + case INTEL_PT_PWRX: + decoder->state.type = INTEL_PT_PWR_EXIT; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.pwrx_payload = decoder->packet.payload; + return 0; + default: return intel_pt_bug(decoder); } @@ -1730,8 +1933,9 @@ next: static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) { - return decoder->last_ip || decoder->packet.count == 0 || - decoder->packet.count == 3 || decoder->packet.count == 6; + return decoder->packet.count && + (decoder->have_last_ip || decoder->packet.count == 3 || + decoder->packet.count == 6); } /* Walk PSB+ packets to get in sync. */ @@ -1750,6 +1954,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) __fallthrough; case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: intel_pt_log("ERROR: Unexpected packet\n"); return -ENOENT; @@ -1854,14 +2065,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_FUP: - if (decoder->overflow) { - if (intel_pt_have_ip(decoder)) - intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - } - if (decoder->packet.count) - intel_pt_set_last_ip(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (decoder->ip) + return 0; break; case INTEL_PT_MTC: @@ -1910,6 +2117,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psb(decoder); if (err) return err; @@ -1925,6 +2135,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_VMCS: case INTEL_PT_MNT: case INTEL_PT_PAD: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: default: break; } @@ -1935,6 +2152,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) { int err; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + + if (!decoder->branch_enable) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->overflow = false; + decoder->state.type = 0; /* Do not have a sample */ + return 0; + } + intel_pt_log("Scanning for full IP\n"); err = intel_pt_walk_to_ip(decoder); if (err) @@ -2043,6 +2273,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; + decoder->have_last_ip = false; decoder->last_ip = 0; decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); @@ -2051,6 +2282,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; err = intel_pt_walk_psb(decoder); @@ -2069,7 +2301,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2077,7 +2309,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2093,8 +2325,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_sync(decoder); break; case INTEL_PT_STATE_NO_IP: + decoder->have_last_ip = false; decoder->last_ip = 0; - /* Fall through */ + decoder->ip = 0; + __fallthrough; case INTEL_PT_STATE_ERR_RESYNC: err = intel_pt_sync_ip(decoder); break; @@ -2130,15 +2364,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } } while (err == -ENOLINK); - decoder->state.err = err ? intel_pt_ext_err(err) : 0; - decoder->state.timestamp = decoder->timestamp; + if (err) { + decoder->state.err = intel_pt_ext_err(err); + decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } else { + decoder->state.err = 0; + if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + decoder->cbr_seen = decoder->cbr; + decoder->state.type |= INTEL_PT_CBR_CHG; + decoder->state.cbr_payload = decoder->cbr_payload; + } + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } + } + + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; - if (err) - decoder->state.from_ip = decoder->ip; - return &decoder->state; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index e90619a43c0c..921b22e8ca0e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -25,11 +25,18 @@ #define INTEL_PT_IN_TX (1 << 0) #define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ASYNC (1 << 2) +#define INTEL_PT_FUP_IP (1 << 3) enum intel_pt_sample_type { INTEL_PT_BRANCH = 1 << 0, INTEL_PT_INSTRUCTION = 1 << 1, INTEL_PT_TRANSACTION = 1 << 2, + INTEL_PT_PTW = 1 << 3, + INTEL_PT_MWAIT_OP = 1 << 4, + INTEL_PT_PWR_ENTRY = 1 << 5, + INTEL_PT_EX_STOP = 1 << 6, + INTEL_PT_PWR_EXIT = 1 << 7, + INTEL_PT_CBR_CHG = 1 << 8, }; enum intel_pt_period_type { @@ -63,6 +70,11 @@ struct intel_pt_state { uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; + uint64_t ptw_payload; + uint64_t mwait_payload; + uint64_t pwre_payload; + uint64_t pwrx_payload; + uint64_t cbr_payload; uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len; @@ -87,6 +99,7 @@ struct intel_pt_params { bool (*pgd_ip)(uint64_t ip, void *data); void *data; bool return_compression; + bool branch_enable; uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index debe751dc3d6..45b64f93f358 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -16,6 +16,7 @@ #ifndef INCLUDE__INTEL_PT_LOG_H__ #define INCLUDE__INTEL_PT_LOG_H__ +#include <linux/compiler.h> #include <stdint.h> #include <inttypes.h> @@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -__attribute__((format(printf, 1, 2))) -void __intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...) __printf(1, 2); #define intel_pt_log(fmt, ...) \ do { \ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 7528ae4f7e28..ba4c9dd18643 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -64,6 +64,13 @@ static const char * const packet_name[] = { [INTEL_PT_PIP] = "PIP", [INTEL_PT_OVF] = "OVF", [INTEL_PT_MNT] = "MNT", + [INTEL_PT_PTWRITE] = "PTWRITE", + [INTEL_PT_PTWRITE_IP] = "PTWRITE", + [INTEL_PT_EXSTOP] = "EXSTOP", + [INTEL_PT_EXSTOP_IP] = "EXSTOP", + [INTEL_PT_MWAIT] = "MWAIT", + [INTEL_PT_PWRE] = "PWRE", + [INTEL_PT_PWRX] = "PWRX", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, if (len < 4) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_CBR; - packet->payload = buf[2]; + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2)); return 4; } @@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len, } } +static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + packet->count = (buf[1] >> 5) & 0x3; + packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP : + INTEL_PT_PTWRITE; + + switch (packet->count) { + case 0: + if (len < 6) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2)); + return 6; + case 1: + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; + default: + return INTEL_PT_BAD_PACKET; + } +} + +static int intel_pt_get_exstop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP; + return 2; +} + +static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_EXSTOP_IP; + return 2; +} + +static int intel_pt_get_mwait(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 10) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MWAIT; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + return 10; +} + +static int intel_pt_get_pwre(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 4) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRE; + memcpy_le64(&packet->payload, buf + 2, 2); + return 4; +} + +static int intel_pt_get_pwrx(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_PWRX; + memcpy_le64(&packet->payload, buf + 2, 5); + return 7; +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { if (len < 2) return INTEL_PT_NEED_MORE_BYTES; + if ((buf[1] & 0x1f) == 0x12) + return intel_pt_get_ptwrite(buf, len, packet); + switch (buf[1]) { case 0xa3: /* Long TNT */ return intel_pt_get_long_tnt(buf, len, packet); @@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_tma(buf, len, packet); case 0xC3: /* 3-byte header */ return intel_pt_get_3byte(buf, len, packet); + case 0x62: /* EXSTOP no IP */ + return intel_pt_get_exstop(packet); + case 0xE2: /* EXSTOP with IP */ + return intel_pt_get_exstop_ip(packet); + case 0xC2: /* MWAIT */ + return intel_pt_get_mwait(buf, len, packet); + case 0x22: /* PWRE */ + return intel_pt_get_pwre(buf, len, packet); + case 0xA2: /* PWRX */ + return intel_pt_get_pwrx(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", name, payload, nr); return ret; + case INTEL_PT_PTWRITE: + return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); + case INTEL_PT_PTWRITE_IP: + return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); + case INTEL_PT_EXSTOP: + return snprintf(buf, buf_len, "%s IP:0", name); + case INTEL_PT_EXSTOP_IP: + return snprintf(buf, buf_len, "%s IP:1", name); + case INTEL_PT_MWAIT: + return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x", + name, payload, (unsigned int)(payload & 0xff), + (unsigned int)((payload >> 32) & 0x3)); + case INTEL_PT_PWRE: + return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u", + name, payload, !!(payload & 0x80), + (unsigned int)((payload >> 12) & 0xf), + (unsigned int)((payload >> 8) & 0xf)); + case INTEL_PT_PWRX: + return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x", + name, payload, + (unsigned int)((payload >> 4) & 0xf), + (unsigned int)(payload & 0xf), + (unsigned int)((payload >> 8) & 0xf)); default: break; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 781bb79883bd..73ddc3a88d07 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -52,6 +52,13 @@ enum intel_pt_pkt_type { INTEL_PT_PIP, INTEL_PT_OVF, INTEL_PT_MNT, + INTEL_PT_PTWRITE, + INTEL_PT_PTWRITE_IP, + INTEL_PT_EXSTOP, + INTEL_PT_EXSTOP_IP, + INTEL_PT_MWAIT, + INTEL_PT_PWRE, + INTEL_PT_PWRX, }; struct intel_pt_pkt { diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 767be7c76034..12e377184ee4 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -1009,7 +1009,7 @@ GrpTable: Grp15 1: fxstor | RDGSBASE Ry (F3),(11B) 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -4: XSAVE +4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) 7: clflush | clflushopt (66) | sfence (11B) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4c7718f87a08..b58f9fd1e2ee 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -81,7 +81,6 @@ struct intel_pt { bool sample_instructions; u64 instructions_sample_type; - u64 instructions_sample_period; u64 instructions_id; bool sample_branches; @@ -93,6 +92,18 @@ struct intel_pt { u64 transactions_sample_type; u64 transactions_id; + bool sample_ptwrites; + u64 ptwrites_sample_type; + u64 ptwrites_id; + + bool sample_pwr_events; + u64 pwr_events_sample_type; + u64 mwait_id; + u64 pwre_id; + u64 exstop_id; + u64 pwrx_id; + u64 cbr_id; + bool synth_needs_swap; u64 tsc_bit; @@ -103,6 +114,7 @@ struct intel_pt { u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; + unsigned cbr2khz; unsigned long num_events; @@ -668,6 +680,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt) return true; } +static bool intel_pt_branch_enable(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & 1) && !(config & 0x2000)) + return false; + } + return true; +} + static unsigned int intel_pt_mtc_period(struct intel_pt *pt) { struct perf_evsel *evsel; @@ -799,6 +824,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.walk_insn = intel_pt_walk_next_insn; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); + params.branch_enable = intel_pt_branch_enable(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; @@ -1044,6 +1070,36 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) bs->nr += 1; } +static inline bool intel_pt_skip_event(struct intel_pt *pt) +{ + return pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip; +} + +static void intel_pt_prep_b_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample->cpumode = PERF_RECORD_MISC_USER; + sample->ip = ptq->state->from_ip; + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->addr = ptq->state->to_ip; + sample->period = 1; + sample->cpu = ptq->cpu; + sample->flags = ptq->flags; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); +} + static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type, bool swapped) @@ -1052,9 +1108,35 @@ static int intel_pt_inject_event(union perf_event *event, return perf_event__synthesize_sample(event, type, 0, sample, swapped); } -static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +static inline int intel_pt_opt_inject(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) +{ + if (!pt->synth_opts.inject) + return 0; + + return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); +} + +static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; + + ret = intel_pt_opt_inject(pt, event, sample, type); + if (ret) + return ret; + + ret = perf_session__deliver_synth_event(pt->session, event, sample); + if (ret) + pr_err("Intel PT: failed to deliver event, error %d\n", ret); + + return ret; +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; @@ -1066,29 +1148,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); + intel_pt_prep_b_sample(pt, ptq, event, &sample); - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; sample.id = ptq->pt->branches_id; sample.stream_id = ptq->pt->branches_id; - sample.period = 1; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); /* * perf report cannot handle events without a branch stack when using @@ -1105,144 +1171,251 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->branches_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; + return intel_pt_deliver_synth_b_event(pt, event, &sample, + pt->branches_sample_type); +} + +static void intel_pt_prep_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + intel_pt_prep_b_sample(pt, ptq, event, sample); + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample->ip); + sample->callchain = ptq->chain; } - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", - ret); + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample->branch_stack = ptq->last_branch; + } +} + +static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample, + u64 type) +{ + int ret; + + ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); + + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); return ret; } static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { - int ret; struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + intel_pt_prep_sample(pt, ptq, event, &sample); - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample.ip); - sample.callchain = ptq->chain; - } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->instructions_sample_type); +} - if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; - } +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->instructions_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; - } + if (intel_pt_skip_event(pt)) + return 0; - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", - ret); + intel_pt_prep_sample(pt, ptq, event, &sample); - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; - return ret; + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->transactions_sample_type); } -static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +static void intel_pt_prep_p_sample(struct intel_pt *pt, + struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + intel_pt_prep_sample(pt, ptq, event, sample); + + /* + * Zero IP is used to mean "trace start" but that is not the case for + * power or PTWRITE events with no IP, so clear the flags. + */ + if (!sample->ip) + sample->flags = 0; +} + +static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) { - int ret; struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_ptwrite raw; - if (pt->synth_opts.initial_skip && - pt->num_events++ < pt->synth_opts.initial_skip) + if (intel_pt_skip_event(pt)) return 0; - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); + intel_pt_prep_p_sample(pt, ptq, event, &sample); - if (!pt->timeless_decoding) - sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + sample.id = ptq->pt->ptwrites_id; + sample.stream_id = ptq->pt->ptwrites_id; - sample.cpumode = PERF_RECORD_MISC_USER; - sample.ip = ptq->state->from_ip; - sample.pid = ptq->pid; - sample.tid = ptq->tid; - sample.addr = ptq->state->to_ip; - sample.id = ptq->pt->transactions_id; - sample.stream_id = ptq->pt->transactions_id; - sample.period = 1; - sample.cpu = ptq->cpu; - sample.flags = ptq->flags; - sample.insn_len = ptq->insn_len; - memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + raw.flags = 0; + raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + raw.payload = cpu_to_le64(ptq->state->ptw_payload); - if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample.ip); - sample.callchain = ptq->chain; - } + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); - if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; - } + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->ptwrites_sample_type); +} - if (pt->synth_opts.inject) { - ret = intel_pt_inject_event(event, &sample, - pt->transactions_sample_type, - pt->synth_needs_swap); - if (ret) - return ret; - } +static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_cbr raw; + u32 flags; - ret = perf_session__deliver_synth_event(pt->session, event, &sample); - if (ret) - pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", - ret); + if (intel_pt_skip_event(pt)) + return 0; - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); + intel_pt_prep_p_sample(pt, ptq, event, &sample); - return ret; + sample.id = ptq->pt->cbr_id; + sample.stream_id = ptq->pt->cbr_id; + + flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); + raw.flags = cpu_to_le32(flags); + raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); + raw.reserved3 = 0; + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_mwait raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->mwait_id; + sample.stream_id = ptq->pt->mwait_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->mwait_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_pwre raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->pwre_id; + sample.stream_id = ptq->pt->pwre_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->pwre_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_exstop raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->exstop_id; + sample.stream_id = ptq->pt->exstop_id; + + raw.flags = 0; + raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); +} + +static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct perf_synth_intel_pwrx raw; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_p_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->pwrx_id; + sample.stream_id = ptq->pt->pwrx_id; + + raw.reserved = 0; + raw.payload = cpu_to_le64(ptq->state->pwrx_payload); + + sample.raw_size = perf_synth__raw_size(raw); + sample.raw_data = perf_synth__raw_data(&raw); + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + pt->pwr_events_sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -1296,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); } +#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ + INTEL_PT_CBR_CHG) + static int intel_pt_sample(struct intel_pt_queue *ptq) { const struct intel_pt_state *state = ptq->state; @@ -1307,24 +1484,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { + if (state->type & INTEL_PT_CBR_CHG) { + err = intel_pt_synth_cbr_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_MWAIT_OP) { + err = intel_pt_synth_mwait_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_ENTRY) { + err = intel_pt_synth_pwre_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_EX_STOP) { + err = intel_pt_synth_exstop_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_EXIT) { + err = intel_pt_synth_pwrx_sample(ptq); + if (err) + return err; + } + } + + if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } - if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION) && - (!pt->synth_opts.initial_skip || - pt->num_events++ >= pt->synth_opts.initial_skip)) { + if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; } + if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { + err = intel_pt_synth_ptwrite_sample(ptq); + if (err) + return err; + } + if (!(state->type & INTEL_PT_BRANCH)) return 0; @@ -1925,36 +2130,65 @@ static int intel_pt_event_synth(struct perf_tool *tool, NULL); } -static int intel_pt_synth_event(struct perf_session *session, +static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { struct intel_pt_synth intel_pt_synth; + int err; + + pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + name, id, (u64)attr->sample_type); memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); intel_pt_synth.session = session; - return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); + if (err) + pr_err("%s: failed to synthesize '%s' event type\n", + __func__, name); + + return err; } -static int intel_pt_synth_events(struct intel_pt *pt, - struct perf_session *session) +static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id, + const char *name) { - struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; - struct perf_event_attr attr; - bool found = false; - u64 id; - int err; evlist__for_each_entry(evlist, evsel) { - if (evsel->attr.type == pt->pmu_type && evsel->ids) { - found = true; + if (evsel->id && evsel->id[0] == id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup(name); break; } } +} - if (!found) { +static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) + return evsel; + } + + return NULL; +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel = intel_pt_evsel(pt, evlist); + struct perf_event_attr attr; + u64 id; + int err; + + if (!evsel) { pr_debug("There are no selected events with Intel Processor Trace data\n"); return 0; } @@ -1983,6 +2217,25 @@ static int intel_pt_synth_events(struct intel_pt *pt, if (!id) id = 1; + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + err = intel_pt_synth_event(session, "branches", &attr, id); + if (err) + return err; + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + id += 1; + attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; + } + + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (pt->synth_opts.instructions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) @@ -1990,70 +2243,90 @@ static int intel_pt_synth_events(struct intel_pt *pt, intel_pt_ns_to_ticks(pt, pt->synth_opts.period); else attr.sample_period = pt->synth_opts.period; - pt->instructions_sample_period = attr.sample_period; - if (pt->synth_opts.callchain) - attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) - attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'instructions' event type\n", - __func__); + err = intel_pt_synth_event(session, "instructions", &attr, id); + if (err) return err; - } pt->sample_instructions = true; pt->instructions_sample_type = attr.sample_type; pt->instructions_id = id; id += 1; } + attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; + attr.sample_period = 1; + if (pt->synth_opts.transactions) { attr.config = PERF_COUNT_HW_INSTRUCTIONS; - attr.sample_period = 1; - if (pt->synth_opts.callchain) - attr.sample_type |= PERF_SAMPLE_CALLCHAIN; - if (pt->synth_opts.last_branch) - attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'transactions' event type\n", - __func__); + err = intel_pt_synth_event(session, "transactions", &attr, id); + if (err) return err; - } pt->sample_transactions = true; + pt->transactions_sample_type = attr.sample_type; pt->transactions_id = id; + intel_pt_set_event_name(evlist, id, "transactions"); id += 1; - evlist__for_each_entry(evlist, evsel) { - if (evsel->id && evsel->id[0] == pt->transactions_id) { - if (evsel->name) - zfree(&evsel->name); - evsel->name = strdup("transactions"); - break; - } - } } - if (pt->synth_opts.branches) { - attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; - attr.sample_period = 1; - attr.sample_type |= PERF_SAMPLE_ADDR; - attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; - attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; - pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", - id, (u64)attr.sample_type); - err = intel_pt_synth_event(session, &attr, id); - if (err) { - pr_err("%s: failed to synthesize 'branches' event type\n", - __func__); + attr.type = PERF_TYPE_SYNTH; + attr.sample_type |= PERF_SAMPLE_RAW; + + if (pt->synth_opts.ptwrites) { + attr.config = PERF_SYNTH_INTEL_PTWRITE; + err = intel_pt_synth_event(session, "ptwrite", &attr, id); + if (err) return err; - } - pt->sample_branches = true; - pt->branches_sample_type = attr.sample_type; - pt->branches_id = id; + pt->sample_ptwrites = true; + pt->ptwrites_sample_type = attr.sample_type; + pt->ptwrites_id = id; + intel_pt_set_event_name(evlist, id, "ptwrite"); + id += 1; + } + + if (pt->synth_opts.pwr_events) { + pt->sample_pwr_events = true; + pt->pwr_events_sample_type = attr.sample_type; + + attr.config = PERF_SYNTH_INTEL_CBR; + err = intel_pt_synth_event(session, "cbr", &attr, id); + if (err) + return err; + pt->cbr_id = id; + intel_pt_set_event_name(evlist, id, "cbr"); + id += 1; + } + + if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { + attr.config = PERF_SYNTH_INTEL_MWAIT; + err = intel_pt_synth_event(session, "mwait", &attr, id); + if (err) + return err; + pt->mwait_id = id; + intel_pt_set_event_name(evlist, id, "mwait"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_PWRE; + err = intel_pt_synth_event(session, "pwre", &attr, id); + if (err) + return err; + pt->pwre_id = id; + intel_pt_set_event_name(evlist, id, "pwre"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_EXSTOP; + err = intel_pt_synth_event(session, "exstop", &attr, id); + if (err) + return err; + pt->exstop_id = id; + intel_pt_set_event_name(evlist, id, "exstop"); + id += 1; + + attr.config = PERF_SYNTH_INTEL_PWRX; + err = intel_pt_synth_event(session, "pwrx", &attr, id); + if (err) + return err; + pt->pwrx_id = id; + intel_pt_set_event_name(evlist, id, "pwrx"); + id += 1; } pt->synth_needs_swap = evsel->needs_swap; @@ -2322,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); intel_pt_log("Maximum non-turbo ratio %u\n", pt->max_non_turbo_ratio); + pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; } if (pt->synth_opts.calls) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d97e014c3df3..5de2b86b9880 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -572,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, if (dso == NULL) goto out_unlock; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; - - /* _KMODULE_COMP should be next to _KMODULE */ - if (m->kmod && m->comp) - dso->symtab_type++; - - dso__set_short_name(dso, strdup(m->name), true); + dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); } @@ -1218,10 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine) */ map_groups__fixup_end(&machine->kmaps); - if (machine__get_running_kernel_start(machine, &name, &addr)) { - } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { - machine__destroy_kernel_maps(machine); - return -1; + if (!machine__get_running_kernel_start(machine, &name, &addr)) { + if (name && + maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { + machine__destroy_kernel_maps(machine); + return -1; + } } return 0; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ea7f450dc609..389e9729331f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,6 +2,7 @@ #define __PMU_H #include <linux/bitmap.h> +#include <linux/compiler.h> #include <linux/perf_event.h> #include <stdbool.h> #include "evsel.h" @@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, - ...) __attribute__((format(scanf, 3, 4))); +int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); int perf_pmu__test(void); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 84e7e698411e..a2670e9d652d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp, struct map *map, unsigned long offs) { struct symbol *sym; - u64 addr = tp->address + tp->offset - offs; + u64 addr = tp->address - offs; sym = map__find_symbol(map, addr); if (!sym) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 373842656fb6..5812947418dd 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -1,6 +1,7 @@ #ifndef _PROBE_EVENT_H #define _PROBE_EVENT_H +#include <linux/compiler.h> #include <stdbool.h> #include "intlist.h" @@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ -int e_snprintf(char *str, size_t size, const char *format, ...) - __attribute__((format(printf, 3, 4))); +int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9d92af7d0718..57b7a00e6f16 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -28,6 +28,7 @@ #include <stdbool.h> #include <errno.h> #include <linux/bitmap.h> +#include <linux/compiler.h> #include <linux/time64.h> #include "../../perf.h" @@ -84,7 +85,7 @@ struct tables { static struct tables tables_global; -static void handler_call_die(const char *handler_name) NORETURN; +static void handler_call_die(const char *handler_name) __noreturn; static void handler_call_die(const char *handler_name) { PyErr_Print(); @@ -1219,7 +1220,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# be retrieved using Python functions of the form " "common_*(context).\n"); - fprintf(ofp, "# See the perf-trace-python Documentation for the list " + fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); fprintf(ofp, "import os\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7dc1096264c5..d19c40a81040 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { pr_err("File does not contain CPU events. " - "Remove -c option to proceed.\n"); + "Remove -C option to proceed.\n"); return -1; } } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5762ae4e9e91..8b327c955a4f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2532,12 +2532,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) - error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); + pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); else - error("Invalid --sort key: `%s'", tok); + pr_err("Invalid --sort key: `%s'", tok); break; } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); + pr_err("Unknown --sort key: `%s'", tok); break; } } @@ -2594,7 +2594,7 @@ static int setup_sort_order(struct perf_evlist *evlist) return 0; if (sort_order[1] == '\0') { - error("Invalid --sort key: `+'"); + pr_err("Invalid --sort key: `+'"); return -EINVAL; } @@ -2604,7 +2604,7 @@ static int setup_sort_order(struct perf_evlist *evlist) */ if (asprintf(&new_sort_order, "%s,%s", get_default_sort_order(evlist), sort_order + 1) < 0) { - error("Not enough memory to set up --sort"); + pr_err("Not enough memory to set up --sort"); return -ENOMEM; } @@ -2668,7 +2668,7 @@ static int __setup_sorting(struct perf_evlist *evlist) str = strdup(sort_keys); if (str == NULL) { - error("Not enough memory to setup sort keys"); + pr_err("Not enough memory to setup sort keys"); return -ENOMEM; } @@ -2678,7 +2678,7 @@ static int __setup_sorting(struct perf_evlist *evlist) if (!is_strict_order(field_order)) { str = setup_overhead(str); if (str == NULL) { - error("Not enough memory to setup overhead keys"); + pr_err("Not enough memory to setup overhead keys"); return -ENOMEM; } } @@ -2834,10 +2834,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) tok; tok = strtok_r(NULL, ", ", &tmp)) { ret = output_field_add(list, tok); if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); + pr_err("Invalid --fields key: `%s'", tok); break; } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); + pr_err("Unknown --fields key: `%s'", tok); break; } } @@ -2877,7 +2877,7 @@ static int __setup_output_field(void) strp = str = strdup(field_order); if (str == NULL) { - error("Not enough memory to setup output fields"); + pr_err("Not enough memory to setup output fields"); return -ENOMEM; } @@ -2885,7 +2885,7 @@ static int __setup_output_field(void) strp++; if (!strlen(strp)) { - error("Invalid --fields key: `+'"); + pr_err("Invalid --fields key: `+'"); goto out; } diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index df051a52393c..ebc88a74e67b 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -56,7 +56,10 @@ static int inline_list__append(char *filename, char *funcname, int line_nr, } } - list_add_tail(&ilist->list, &node->val); + if (callchain_param.order == ORDER_CALLEE) + list_add_tail(&ilist->list, &node->val); + else + list_add(&ilist->list, &node->val); return 0; } @@ -200,12 +203,14 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 -static void inline_list__reverse(struct inline_node *node) +static int inline_list__append_dso_a2l(struct dso *dso, + struct inline_node *node) { - struct inline_list *ilist, *n; + struct a2l_data *a2l = dso->a2l; + char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL; + char *filename = a2l->filename ? strdup(a2l->filename) : NULL; - list_for_each_entry_safe_reverse(ilist, n, &node->val, list) - list_move_tail(&ilist->list, &node->val); + return inline_list__append(filename, funcname, a2l->line, node, dso); } static int addr2line(const char *dso_name, u64 addr, @@ -230,36 +235,36 @@ static int addr2line(const char *dso_name, u64 addr, bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); - if (a2l->found && unwind_inlines) { + if (!a2l->found) + return 0; + + if (unwind_inlines) { int cnt = 0; + if (node && inline_list__append_dso_a2l(dso, node)) + return 0; + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, &a2l->funcname, &a2l->line) && cnt++ < MAX_INLINE_NEST) { if (node != NULL) { - if (inline_list__append(strdup(a2l->filename), - strdup(a2l->funcname), - a2l->line, node, - dso) != 0) + if (inline_list__append_dso_a2l(dso, node)) return 0; + // found at least one inline frame + ret = 1; } } + } - if ((node != NULL) && - (callchain_param.order != ORDER_CALLEE)) { - inline_list__reverse(node); - } + if (file) { + *file = a2l->filename ? strdup(a2l->filename) : NULL; + ret = *file ? 1 : 0; } - if (a2l->found && a2l->filename) { - *file = strdup(a2l->filename); + if (line) *line = a2l->line; - if (*file) - ret = 1; - } - return ret; } @@ -278,8 +283,6 @@ void dso__free_a2l(struct dso *dso) static struct inline_node *addr2inlines(const char *dso_name, u64 addr, struct dso *dso) { - char *file = NULL; - unsigned int line = 0; struct inline_node *node; node = zalloc(sizeof(*node)); @@ -291,7 +294,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - if (!addr2line(dso_name, addr, &file, &line, dso, TRUE, node)) + if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node)) goto out_free_inline_node; if (list_empty(&node->val)) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index ac10cc675d39..719d6cb86952 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; @@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); + memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); next = rb_first(&runtime_saved_values.entries); while (next) { @@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, SMI_NUM)) + update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, APERF)) + update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); if (counter->collect_stat) { struct saved_value *v = saved_value_lookup(counter, cpu, ctx, @@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu) return sanitize_val(1.0 - sum); } +static void print_smi_cost(int cpu, struct perf_evsel *evsel, + struct perf_stat_output_ctx *out) +{ + double smi_num, aperf, cycles, cost = 0.0; + int ctx = evsel_context(evsel); + const char *color = NULL; + + smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); + aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); + cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if ((cycles == 0) || (aperf == 0)) + return; + + if (smi_num) + cost = (aperf - cycles) / aperf * 100.00; + + if (cost > 10) + color = PERF_COLOR_RED; + out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { + print_smi_cost(cpu, evsel, out); } else { print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c58174443dc1..53b9a994a3dc 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), + ID(SMI_NUM, msr/smi/), + ID(APERF, msr/aperf/), }; #undef ID diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0a65ae23f495..7522bf10b03e 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -22,6 +22,8 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, + PERF_STAT_EVSEL_ID__SMI_NUM, + PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, }; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 318424ea561d..802d743378af 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -42,6 +42,7 @@ #include <stdarg.h> #include <stddef.h> #include <string.h> +#include <linux/compiler.h> #include <sys/types.h> extern char strbuf_slopbuf[]; @@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) { return strbuf_add(sb, s, strlen(s)); } -__attribute__((format(printf,2,3))) -int strbuf_addf(struct strbuf *sb, const char *fmt, ...); +int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3); /* XXX: if read fails, any partial read is undone */ ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e7ee47f7377a..502505cf236a 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -637,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } -static int decompress_kmodule(struct dso *dso, const char *name, - enum dso_binary_type type) -{ - int fd = -1; - char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; - struct kmod_path m; - - if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && - type != DSO_BINARY_TYPE__BUILD_ID_CACHE) - return -1; - - if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - name = dso->long_name; - - if (kmod_path__parse_ext(&m, name) || !m.comp) - return -1; - - fd = mkstemp(tmpbuf); - if (fd < 0) { - dso->load_errno = errno; - goto out; - } - - if (!decompress_to_file(m.ext, name, fd)) { - dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; - close(fd); - fd = -1; - } - - unlink(tmpbuf); - -out: - free(m.ext); - return fd; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -705,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, int fd; if (dso__needs_decompress(dso)) { - fd = decompress_kmodule(dso, name, type); + fd = dso__decompress_kmodule_fd(dso, name); if (fd < 0) return -1; + + type = dso->symtab_type; } else { fd = open(name, O_RDONLY); if (fd < 0) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8f2b068ff756..e7a98dbd2aed 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1562,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; - if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - if (dso__build_id_is_kmod(dso, name, PATH_MAX)) - kmod = true; - if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod); else diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 746bbee645d9..e0a6e9a6a053 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -24,7 +24,7 @@ #include <errno.h> #include "../perf.h" -#include "util.h" +#include "debug.h" #include "trace-event.h" #include "sane_ctype.h" @@ -150,7 +150,7 @@ void parse_ftrace_printk(struct pevent *pevent, while (line) { addr_str = strtok_r(line, ":", &fmt); if (!addr_str) { - warning("printk format with empty entry"); + pr_warning("printk format with empty entry"); break; } addr = strtoull(addr_str, NULL, 16); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index f90e11a555b2..7755a5e0fe5e 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -39,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip, return 0; mod = dwfl_addrmodule(ui->dwfl, ip); + if (mod) { + Dwarf_Addr s; + + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + if (s != al->map->start) + mod = 0; + } + if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, al->map->start, @@ -168,12 +176,24 @@ frame_callback(Dwfl_Frame *state, void *arg) { struct unwind_info *ui = arg; Dwarf_Addr pc; + bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } + // report the module before we query for isactivation + report_module(pc, ui); + + if (!dwfl_frame_pc(state, &pc, &isactivation)) { + pr_err("%s", dwfl_errmsg(-1)); + return DWARF_CB_ABORT; + } + + if (!isactivation) + --pc; + return entry(pc, ui) || !(--ui->max_stack) ? DWARF_CB_ABORT : DWARF_CB_OK; } @@ -220,7 +240,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); - if (err && !ui->max_stack) + if (err && ui->max_stack != max_stack) err = 0; /* diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index f8455bed6e65..672c2ada9357 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -692,6 +692,17 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, while (!ret && (unw_step(&c) > 0) && i < max_stack) { unw_get_reg(&c, UNW_REG_IP, &ips[i]); + + /* + * Decrement the IP for any non-activation frames. + * this is required to properly find the srcline + * for caller frames. + * See also the documentation for dwfl_frame_pc(), + * which this code tries to replicate. + */ + if (unw_is_signal_frame(&c) <= 0) + --ips[i]; + ++i; } diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 996046a66fe5..6cc9d9888ce0 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -9,75 +9,17 @@ #include "util.h" #include "debug.h" -static void report(const char *prefix, const char *err, va_list params) -{ - char msg[1024]; - vsnprintf(msg, sizeof(msg), err, params); - fprintf(stderr, " %s%s\n", prefix, msg); -} - -static NORETURN void usage_builtin(const char *err) +static __noreturn void usage_builtin(const char *err) { fprintf(stderr, "\n Usage: %s\n", err); exit(129); } -static NORETURN void die_builtin(const char *err, va_list params) -{ - report(" Fatal: ", err, params); - exit(128); -} - -static void error_builtin(const char *err, va_list params) -{ - report(" Error: ", err, params); -} - -static void warn_builtin(const char *warn, va_list params) -{ - report(" Warning: ", warn, params); -} - /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ -static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*error_routine)(const char *err, va_list params) = error_builtin; -static void (*warn_routine)(const char *err, va_list params) = warn_builtin; - -void set_warning_routine(void (*routine)(const char *err, va_list params)) -{ - warn_routine = routine; -} +static void (*usage_routine)(const char *err) __noreturn = usage_builtin; void usage(const char *err) { usage_routine(err); } - -void die(const char *err, ...) -{ - va_list params; - - va_start(params, err); - die_builtin(err, params); - va_end(params); -} - -int error(const char *err, ...) -{ - va_list params; - - va_start(params, err); - error_routine(err, params); - va_end(params); - return -1; -} - -void warning(const char *warn, ...) -{ - va_list params; - - va_start(params, warn); - warn_routine(warn, params); - va_end(params); -} diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 28c9f335006c..988111e0bab5 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -343,43 +343,6 @@ int perf_event_paranoid(void) return value; } - -bool find_process(const char *name) -{ - size_t len = strlen(name); - DIR *dir; - struct dirent *d; - int ret = -1; - - dir = opendir(procfs__mountpoint()); - if (!dir) - return false; - - /* Walk through the directory. */ - while (ret && (d = readdir(dir)) != NULL) { - char path[PATH_MAX]; - char *data; - size_t size; - - if ((d->d_type != DT_DIR) || - !strcmp(".", d->d_name) || - !strcmp("..", d->d_name)) - continue; - - scnprintf(path, sizeof(path), "%s/%s/comm", - procfs__mountpoint(), d->d_name); - - if (filename__read_str(path, &data, &size)) - continue; - - ret = strncmp(name, data, len); - free(data); - } - - closedir(dir); - return ret ? false : true; -} - static int fetch_ubuntu_kernel_version(unsigned int *puint) { @@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint) size_t line_len = 0; char *ptr, *line = NULL; int version, patchlevel, sublevel, err; - FILE *vsig = fopen("/proc/version_signature", "r"); + FILE *vsig; + + if (!puint) + return 0; + vsig = fopen("/proc/version_signature", "r"); if (!vsig) { pr_debug("Open /proc/version_signature failed: %s\n", strerror(errno)); @@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint) goto errout; } - if (puint) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; err = 0; errout: free(line); @@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str, str[str_size - 1] = '\0'; } + if (!puint || int_ver_ready) + return 0; + err = sscanf(utsname.release, "%d.%d.%d", &version, &patchlevel, &sublevel); @@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str, return -1; } - if (puint && !int_ver_ready) - *puint = (version << 16) + (patchlevel << 8) + sublevel; + *puint = (version << 16) + (patchlevel << 8) + sublevel; return 0; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 5dfb9bb6482d..2c9e58a45310 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,7 +1,6 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#define _ALL_SOURCE 1 #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 @@ -11,24 +10,12 @@ #include <stddef.h> #include <stdlib.h> #include <stdarg.h> +#include <linux/compiler.h> #include <linux/types.h> -#ifdef __GNUC__ -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN -#ifndef __attribute__ -#define __attribute__(x) -#endif -#endif - /* General helper functions */ -void usage(const char *err) NORETURN; -void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); - -void set_warning_routine(void (*routine)(const char *err, va_list params)); +void usage(const char *err) __noreturn; +void die(const char *err, ...) __noreturn __printf(1, 2); static inline void *zalloc(size_t size) { @@ -57,8 +44,6 @@ int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; extern int cacheline_size; -bool find_process(const char *name); - int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore new file mode 100644 index 000000000000..cba3d994995c --- /dev/null +++ b/tools/power/acpi/.gitignore @@ -0,0 +1,4 @@ +acpidbg +acpidump +ec +include diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index c04e8fea2c60..025c1b07049d 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -750,9 +750,9 @@ acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 msec_timeout) { acpi_status status = AE_OK; sem_t *sem = (sem_t *) handle; + int ret_val; #ifndef ACPI_USE_ALTERNATE_TIMEOUT struct timespec time; - int ret_val; #endif if (!sem) { @@ -778,7 +778,10 @@ acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 msec_timeout) case ACPI_WAIT_FOREVER: - if (sem_wait(sem)) { + while (((ret_val = sem_wait(sem)) == -1) && (errno == EINTR)) { + continue; /* Restart if interrupted */ + } + if (ret_val != 0) { status = (AE_TIME); } break; @@ -831,7 +834,8 @@ acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 msec_timeout) while (((ret_val = sem_timedwait(sem, &time)) == -1) && (errno == EINTR)) { - continue; + continue; /* Restart if interrupted */ + } if (ret_val != 0) { diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 6437ef39aeea..5fd5c5b8c7b8 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -26,6 +26,15 @@ union msr_pstate { unsigned res3:21; unsigned en:1; } bits; + struct { + unsigned fid:8; + unsigned did:6; + unsigned vid:8; + unsigned iddval:8; + unsigned idddiv:2; + unsigned res1:30; + unsigned en:1; + } fam17h_bits; unsigned long long val; }; @@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate) if (family == 0x12) t = pstate.val & 0xf; + else if (family == 0x17) + t = pstate.fam17h_bits.did; else t = pstate.bits.did; @@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate) static int get_cof(int family, union msr_pstate pstate) { int t; - int fid, did; + int fid, did, cof; did = get_did(family, pstate); - - t = 0x10; - fid = pstate.bits.fid; - if (family == 0x11) - t = 0x8; - - return (100 * (fid + t)) >> did; + if (family == 0x17) { + fid = pstate.fam17h_bits.fid; + cof = 200 * fid / did; + } else { + t = 0x10; + fid = pstate.bits.fid; + if (family == 0x11) + t = 0x8; + cof = (100 * (fid + t)) >> did; + } + return cof; } /* Needs: diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index afb66f80554e..799a18be60aa 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_IS_SNB 0x00000020 #define CPUPOWER_CAP_INTEL_IDA 0x00000040 +#define CPUPOWER_AMD_CPBDIS 0x02000000 + #define MAX_HW_PSTATES 10 struct cpupower_cpu_info { diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 1609243f5c64..601d719d4e60 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -2,11 +2,14 @@ #include "helpers/helpers.h" +#define MSR_AMD_HWCR 0xc0010015 + int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int *states) { struct cpupower_cpu_info cpu_info; int ret; + unsigned long long val; *support = *active = *states = 0; @@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) { *support = 1; - amd_pci_get_num_boost_states(active, states); - if (ret <= 0) - return ret; - *support = 1; + + /* AMD Family 0x17 does not utilize PCI D18F4 like prior + * families and has no fixed discrete boost states but + * has Hardware determined variable increments instead. + */ + + if (cpu_info.family == 0x17) { + if (!read_msr(cpu, MSR_AMD_HWCR, &val)) { + if (!(val & CPUPOWER_AMD_CPBDIS)) + *active = 1; + } + } else { + ret = amd_pci_get_num_boost_states(active, states); + if (ret) + return ret; + } } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) *support = *active = 1; return 0; diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b11294730771..0dafba2c1e7d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -57,7 +57,6 @@ unsigned int list_header_only; unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; -unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; @@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; +int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_COUNTERS 16 struct thread_data { + struct timeval tv_begin; + struct timeval tv_end; unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; @@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { + if (!do_migrate) + return 0; + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -384,8 +389,14 @@ struct msr_counter bic[] = { { 0x0, "CPU" }, { 0x0, "Mod%c6" }, { 0x0, "sysfs" }, + { 0x0, "Totl%C0" }, + { 0x0, "Any%C0" }, + { 0x0, "GFX%C0" }, + { 0x0, "CPUGFX%" }, }; + + #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) #define BIC_Package (1ULL << 0) #define BIC_Avg_MHz (1ULL << 1) @@ -426,6 +437,10 @@ struct msr_counter bic[] = { #define BIC_CPU (1ULL << 36) #define BIC_Mod_c6 (1ULL << 37) #define BIC_sysfs (1ULL << 38) +#define BIC_Totl_c0 (1ULL << 39) +#define BIC_Any_c0 (1ULL << 40) +#define BIC_GFX_c0 (1ULL << 41) +#define BIC_CPUGFX (1ULL << 42) unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; unsigned long long bic_present = BIC_sysfs; @@ -521,6 +536,8 @@ void print_header(char *delim) struct msr_counter *mp; int printed = 0; + if (debug) + outp += sprintf(outp, "usec %s", delim); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -599,12 +616,14 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_Any_c0)) outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFX_c0)) outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPUGFX)) outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); - } if (DO_BIC(BIC_Pkgpc2)) outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); @@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c, (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) return 0; + if (debug) { + /* on each row, print how many usec each timestamp took to gather */ + struct timeval tv; + + timersub(&t->tv_end, &t->tv_begin, &tv); + outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); + } + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; tsc = t->tsc * tsc_tweak; @@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); + if (DO_BIC(BIC_Any_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); + if (DO_BIC(BIC_GFX_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); + if (DO_BIC(BIC_CPUGFX)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); - } if (DO_BIC(BIC_Pkgpc2)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); @@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old) int i; struct msr_counter *mp; - if (do_skl_residency) { + + if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; - } + old->pc2 = new->pc2 - old->pc2; if (DO_BIC(BIC_Pkgpc3)) old->pc3 = new->pc3 - old->pc3; @@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; - } average.packages.pc2 += p->pc2; if (DO_BIC(BIC_Pkgpc3)) @@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c7 /= topo.num_cores; average.cores.mc6_us /= topo.num_cores; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) average.packages.pkg_wtd_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_Any_c0)) average.packages.pkg_any_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_GFX_c0)) average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + if (DO_BIC(BIC_CPUGFX)) average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; - } average.packages.pc2 /= topo.num_packages; if (DO_BIC(BIC_Pkgpc3)) @@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) struct msr_counter *mp; int i; + + gettimeofday(&t->tv_begin, (struct timezone *)NULL); + if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; @@ -1565,7 +1605,7 @@ retry: /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) - return 0; + goto done; if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) @@ -1601,15 +1641,21 @@ retry: /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) - return 0; + goto done; - if (do_skl_residency) { + if (DO_BIC(BIC_Totl_c0)) { if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) return -10; + } + if (DO_BIC(BIC_Any_c0)) { if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) return -11; + } + if (DO_BIC(BIC_GFX_c0)) { if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) return -12; + } + if (DO_BIC(BIC_CPUGFX)) { if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) return -13; } @@ -1688,6 +1734,8 @@ retry: if (get_mp(cpu, mp, &p->counter[i])) return -10; } +done: + gettimeofday(&t->tv_end, (struct timezone *)NULL); return 0; } @@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void) { unsigned long long msr; + if (!genuine_intel) + return; + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", base_cpu, msr, @@ -4198,7 +4249,12 @@ void process_cpuid() BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_hsw_msrs(family, model); - do_skl_residency = has_skl_msrs(family, model); + if (has_skl_msrs(family, model)) { + BIC_PRESENT(BIC_Totl_c0); + BIC_PRESENT(BIC_Any_c0); + BIC_PRESENT(BIC_GFX_c0); + BIC_PRESENT(BIC_CPUGFX); + } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); @@ -4578,7 +4634,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 17.04.12" + fprintf(outf, "turbostat version 17.06.23" " - Len Brown <lenb@kernel.org>\n"); } @@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, + {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; + case 'm': + do_migrate = 1; + break; case 'o': outf = fopen_or_die(optarg, "w"); break; diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 971c9ffdcb50..a711eec0c895 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,10 +1,27 @@ -DESTDIR ?= +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(CURDIR) +PREFIX := /usr +DESTDIR := + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif x86_energy_perf_policy : x86_energy_perf_policy.c +CFLAGS += -Wall +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ +.PHONY : clean clean : - rm -f x86_energy_perf_policy + @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy + +install : x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 -install : - install x86_energy_perf_policy ${DESTDIR}/usr/bin/ - install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 index 8eaaad648cdb..17db1c3af4d0 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -1,104 +1,213 @@ -.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> +.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com> .\" Distributed under the GPL, Copyleft 1994. .TH X86_ENERGY_PERF_POLICY 8 .SH NAME -x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS +x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers .SH SYNOPSIS -.ft B .B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB "\-r" +.RB "[ options ] [ scope ] [field \ value]" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'performance' +.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'normal' +.RB "cpu-list, pkg-list: # | #,# | #-# | all" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB 'powersave' +.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired" .br -.B x86_energy_perf_policy -.RB [ "\-c cpu" ] -.RB [ "\-v" ] -.RB n +.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)" .br +.RB "value: # | default | performance | balance-performance | balance-power | power" .SH DESCRIPTION \fBx86_energy_perf_policy\fP -allows software to convey -its policy for the relative importance of performance -versus energy savings to the processor. +displays and updates energy-performance policy settings specific to +Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR) +updates, no matter if the Linux cpufreq sub-system is enabled or not. -The processor uses this information in model-specific ways -when it must select trade-offs between performance and -energy efficiency. +Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB) +may affect a wide range of hardware decisions, +such as how aggressively the hardware enters and exits CPU idle states (C-states) +and Processor Performance States (P-states). +This policy hint does not replace explicit OS C-state and P-state selection. +Rather, it tells the hardware how aggressively to implement those selections. +Further, it allows the OS to influence energy/performance trade-offs where there +is no software interface, such as in the opportunistic "turbo-mode" P-state range. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU, +but some implementations +share a single MSR among all CPUs in each processor package. +On those systems, a write to EPB on one processor will +be visible, and will have an effect, on all CPUs +in the same processor package. -This policy hint does not supersede Processor Performance states -(P-states) or CPU Idle power states (C-states), but allows -software to have influence where it would otherwise be unable -to express a preference. +Hardware P-States (HWP) are effectively an expansion of hardware +P-state control from the opportunistic turbo-mode P-state range +to include the entire range of available P-states. +On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP. +That influence was removed in subsequent generations, +where it was moved to the +Energy_Performance_Preference (EPP) field in +a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG. -For example, this setting may tell the hardware how -aggressively or conservatively to control frequency -in the "turbo range" above the explicitly OS-controlled -P-state frequency range. It may also tell the hardware -how aggressively is should enter the OS requested C-states. +EPP is the most commonly managed knob in HWP mode, +but MSR_IA32_HWP_REQUEST also allows the user to specify +minimum-frequency for Quality-of-Service, +and maximum-frequency for power-capping. +MSR_IA32_HWP_REQUEST is defined per-CPU. -Support for this feature is indicated by CPUID.06H.ECX.bit3 -per the Intel Architectures Software Developer's Manual. +MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST, +but it can simultaneously set the default policy for all CPUs within a package. +A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is +over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG. -.SS Options -\fB-c\fP limits operation to a single CPU. -The default is to operate on all CPUs. -Note that MSR_IA32_ENERGY_PERF_BIAS is defined per -logical processor, but that the initial implementations -of the MSR were shared among all processors in each package. -.PP -\fB-v\fP increases verbosity. By default -x86_energy_perf_policy is silent. -.PP -\fB-r\fP is for "read-only" mode - the unchanged state -is read and displayed. +MSR_HWP_CAPABILITIES shows the default values for the fields +in MSR_IA32_HWP_REQUEST. It is displayed when no values +are being written. + +.SS SCOPE OPTIONS .PP -.I performance -Set a policy where performance is paramount. -The processor will be unwilling to sacrifice any performance -for the sake of energy saving. This is the hardware default. +\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list. +The CPU-list may be comma-separated CPU numbers, with dash for range +or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'. +When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu +MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1), +or exempt from MSR_IA32_HWP_REQUEST_PKG (0). + +\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list. +The list is a string of individual package numbers separated +by commas, and or ranges of package numbers separated by a dash, +or the string "all". +For example '--pkg 1,3' or '--pkg all' + +.SS VALUE OPTIONS .PP -.I normal +.I normal | default Set a policy with a normal balance between performance and energy efficiency. The processor will tolerate minor performance compromise for potentially significant energy savings. -This reasonable default for most desktops and servers. +This is a reasonable default for most desktops and servers. +"default" is a synonym for "normal". .PP -.I powersave +.I performance +Set a policy for maximum performance, +accepting no performance sacrifice for the benefit of energy efficiency. +.PP +.I balance-performance +Set a policy with a high priority on performance, +but allowing some performance loss to benefit energy efficiency. +.PP +.I balance-power +Set a policy where the performance and power are balanced. +This is the default. +.PP +.I power Set a policy where the processor can accept -a measurable performance hit to maximize energy efficiency. +a measurable performance impact to maximize energy efficiency. + .PP -.I n -Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. -The range of valid numbers is 0-15, where 0 is maximum -performance and 15 is maximum energy efficiency. +The following table shows the mapping from the value strings above to actual MSR values. +This mapping is defined in the Linux-kernel header, msr-index.h. +.nf +VALUE STRING EPB EPP +performance 0 0 +balance-performance 4 128 +normal, default 6 128 +balance-power 8 192 +power 15 255 +.fi +.PP +For MSR_IA32_HWP_REQUEST performance fields +(--hwp-min, --hwp-max, --hwp-desired), the value option +is in units of 100 MHz, Eg. 12 signifies 1200 MHz. + +.SS FIELD OPTIONS +\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with +the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled. +Thus "--all normal" will set a system without cpufreq into a well known configuration. +.PP +\fB-B, --epb\fP set EPB per-core or per-package. +See value strings in the table above. +.PP +\fB-d, --debug\fP debug increases verbosity. By default +x86_energy_perf_policy is silent for updates, +and verbose for read-only mode. +.PP +\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package. +See value strings in the table above. +.PP +\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.min. +.PP +\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.max. +.PP +\fB-D, --hwp-desired\fP request HWP 'desired' frequency. +The "normal" setting is 0, which +corresponds to 'full autonomous' HWP control. +Non-zero performance values request a specific performance +level on this processor, specified in multiples of 100 MHz. +.PP +\fB-w, --hwp-window\fP specify integer number of microsec +in the sliding window that HWP uses to maintain average frequency. +This parameter is meaningful only when the "desired" field above is non-zero. +Default is 0, allowing the HW to choose. +.SH OTHER OPTIONS +.PP +\fB-f, --force\fP writes the specified values without bounds checking. +.PP +\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu, +indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1) +or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings. +The default is exempt. +.PP +\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode. +.PP +\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode. +.PP +\fB-v, --version\fP print version and exit. +.PP +If no request to change policy is made, +the default behavior is to read +and display the current system state, +including the default capabilities. +.SH WARNING +.PP +This utility writes directly to Model Specific Registers. +There is no locking or coordination should this utility +be used to modify HWP limit fields at the same time that +intel_pstate's sysfs attributes access the same MSRs. +.PP +Note that --hwp-desired and --hwp-window are considered experimental. +Future versions of Linux reserve the right to access these +fields internally -- potentially conflicting with user-space access. +.SH EXAMPLE +.nf +# sudo x86_energy_perf_policy +cpu0: EPB 6 +cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu1: EPB 6 +cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu2: EPB 6 +cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu3: EPB 6 +cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35 +.fi .SH NOTES -.B "x86_energy_perf_policy " +.B "x86_energy_perf_policy" runs only as root. .SH FILES .ta .nf /dev/cpu/*/msr .fi - .SH "SEE ALSO" +.nf msr(4) +Intel(R) 64 and IA-32 Architectures Software Developer's Manual +.fi .PP .SH AUTHORS .nf -Written by Len Brown <len.brown@intel.com> +Len Brown diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 40b3e5482f8a..65bbe627a425 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -3,322 +3,1424 @@ * policy preference bias on recent X86 processors. */ /* - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2010 - 2017 Intel Corporation. * Len Brown <len.brown@intel.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * This program is released under GPL v2 */ +#define _GNU_SOURCE +#include MSRHEADER #include <stdio.h> #include <unistd.h> #include <sys/types.h> +#include <sched.h> #include <sys/stat.h> #include <sys/resource.h> +#include <getopt.h> +#include <err.h> #include <fcntl.h> #include <signal.h> #include <sys/time.h> +#include <limits.h> #include <stdlib.h> #include <string.h> +#include <cpuid.h> +#include <errno.h> + +#define OPTARG_NORMAL (INT_MAX - 1) +#define OPTARG_POWER (INT_MAX - 2) +#define OPTARG_BALANCE_POWER (INT_MAX - 3) +#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4) +#define OPTARG_PERFORMANCE (INT_MAX - 5) + +struct msr_hwp_cap { + unsigned char highest; + unsigned char guaranteed; + unsigned char efficient; + unsigned char lowest; +}; -unsigned int verbose; /* set with -v */ -unsigned int read_only; /* set with -r */ +struct msr_hwp_request { + unsigned char hwp_min; + unsigned char hwp_max; + unsigned char hwp_desired; + unsigned char hwp_epp; + unsigned int hwp_window; + unsigned char hwp_use_pkg; +} req_update; + +unsigned int debug; +unsigned int verbose; +unsigned int force; char *progname; -unsigned long long new_bias; -int cpu = -1; +int base_cpu; +unsigned char update_epb; +unsigned long long new_epb; +unsigned char turbo_is_enabled; +unsigned char update_turbo; +unsigned char turbo_update_value; +unsigned char update_hwp_epp; +unsigned char update_hwp_min; +unsigned char update_hwp_max; +unsigned char update_hwp_desired; +unsigned char update_hwp_window; +unsigned char update_hwp_use_pkg; +unsigned char update_hwp_enable; +#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg) +int max_cpu_num; +int max_pkg_num; +#define MAX_PACKAGES 64 +unsigned int first_cpu_in_pkg[MAX_PACKAGES]; +unsigned long long pkg_present_set; +unsigned long long pkg_selected_set; +cpu_set_t *cpu_present_set; +cpu_set_t *cpu_selected_set; +int genuine_intel; + +size_t cpu_setsize; + +char *proc_stat = "/proc/stat"; + +unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */ +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */ + +unsigned int bdx_highest_ratio; /* - * Usage: - * - * -c cpu: limit action to a single CPU (default is all CPUs) - * -v: verbose output (can invoke more than once) - * -r: read-only, don't change any settings - * - * performance - * Performance is paramount. - * Unwilling to sacrifice any performance - * for the sake of energy saving. (hardware default) - * - * normal - * Can tolerate minor performance compromise - * for potentially significant energy savings. - * (reasonable default for most desktops and servers) - * - * powersave - * Can tolerate significant performance hit - * to maximize energy savings. - * - * n - * a numerical value to write to the underlying MSR. + * maintain compatibility with original implementation, but don't document it: */ void usage(void) { - printf("%s: [-c cpu] [-v] " - "(-r | 'performance' | 'normal' | 'powersave' | n)\n", - progname); + fprintf(stderr, "%s [options] [scope][field value]\n", progname); + fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n"); + fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n"); + fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n"); + fprintf(stderr, + "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n"); + fprintf(stderr, "--hwp-window usec\n"); + + fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n"); + fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname); + exit(1); } -#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +/* + * If bdx_highest_ratio is set, + * then we must translate between MSR format and simple ratio + * used on the cmdline. + */ +int ratio_2_msr_perf(int ratio) +{ + int msr_perf; + + if (!bdx_highest_ratio) + return ratio; + + msr_perf = ratio * 255 / bdx_highest_ratio; + + if (debug) + fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio); + + return msr_perf; +} +int msr_perf_2_ratio(int msr_perf) +{ + int ratio; + double d; + + if (!bdx_highest_ratio) + return msr_perf; + + d = (double)msr_perf * (double) bdx_highest_ratio / 255.0; + d = d + 0.5; /* round */ + ratio = (int)d; + + if (debug) + fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d); + + return ratio; +} +int parse_cmdline_epb(int i) +{ + if (!has_epb) + errx(1, "EPB not enabled on this platform"); + + update_epb = 1; + + switch (i) { + case OPTARG_POWER: + return ENERGY_PERF_BIAS_POWERSAVE; + case OPTARG_BALANCE_POWER: + return ENERGY_PERF_BIAS_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + return ENERGY_PERF_BIAS_NORMAL; + case OPTARG_BALANCE_PERFORMANCE: + return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return ENERGY_PERF_BIAS_PERFORMANCE; + } + if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE) + errx(1, "--epb must be from 0 to 15"); + return i; +} + +#define HWP_CAP_LOWEST 0 +#define HWP_CAP_HIGHEST 255 + +/* + * "performance" changes hwp_min to cap.highest + * All others leave it at cap.lowest + */ +int parse_cmdline_hwp_min(int i) +{ + update_hwp_min = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_CAP_LOWEST; + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * "power" changes hwp_max to cap.lowest + * All others leave it at cap.highest + */ +int parse_cmdline_hwp_max(int i) +{ + update_hwp_max = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_CAP_LOWEST; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * for --hwp-des, all strings leave it in autonomous mode + * If you want to change it, you need to explicitly pick a value + */ +int parse_cmdline_hwp_desired(int i) +{ + update_hwp_desired = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_NORMAL: + case OPTARG_PERFORMANCE: + return 0; /* autonomous */ + } + return i; +} + +int parse_cmdline_hwp_window(int i) +{ + unsigned int exponent; + + update_hwp_window = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 0; + } + if (i < 0 || i > 1270000000) { + fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n"); + usage(); + } + for (exponent = 0; ; ++exponent) { + if (debug) + printf("%d 10^%d\n", i, exponent); + + if (i <= 127) + break; + + i = i / 10; + } + if (debug) + fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i); + + return (exponent << 7) | i; +} +int parse_cmdline_hwp_epp(int i) +{ + update_hwp_epp = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_EPP_POWERSAVE; + case OPTARG_BALANCE_POWER: + return HWP_EPP_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_EPP_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return HWP_EPP_PERFORMANCE; + } + if (i < 0 || i > 0xff) { + fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n"); + usage(); + } + return i; +} +int parse_cmdline_turbo(int i) +{ + update_turbo = 1; + + switch (i) { + case OPTARG_POWER: + return 0; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 1; + } + if (i < 0 || i > 1) { + fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n"); + usage(); + } + return i; +} + +int parse_optarg_string(char *s) +{ + int i; + char *endptr; + + if (!strncmp(s, "default", 7)) + return OPTARG_NORMAL; + + if (!strncmp(s, "normal", 6)) + return OPTARG_NORMAL; + + if (!strncmp(s, "power", 9)) + return OPTARG_POWER; + + if (!strncmp(s, "balance-power", 17)) + return OPTARG_BALANCE_POWER; + + if (!strncmp(s, "balance-performance", 19)) + return OPTARG_BALANCE_PERFORMANCE; + + if (!strncmp(s, "performance", 11)) + return OPTARG_PERFORMANCE; + + i = strtol(s, &endptr, 0); + if (s == endptr) { + fprintf(stderr, "no digits in \"%s\"\n", s); + usage(); + } + if (i == LONG_MIN || i == LONG_MAX) + errx(-1, "%s", s); + + if (i > 0xFF) + errx(-1, "%d (0x%x) must be < 256", i, i); + + if (i < 0) + errx(-1, "%d (0x%x) must be >= 0", i, i); + return i; +} + +void parse_cmdline_all(char *s) +{ + force++; + update_hwp_enable = 1; + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s)); + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s)); + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s)); + if (has_epb) + new_epb = parse_cmdline_epb(parse_optarg_string(s)); + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s)); + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s)); + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s)); +} + +void validate_cpu_selected_set(void) +{ + int cpu; + + if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0) + errx(0, "no CPUs requested"); + + for (cpu = 0; cpu <= max_cpu_num; ++cpu) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set)) + if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + errx(1, "Requested cpu% is not present", cpu); + } +} + +void parse_cmdline_cpu(char *s) +{ + char *startp, *endp; + int cpu = 0; + + if (pkg_selected_set) { + usage(); + errx(1, "--cpu | --pkg"); + } + cpu_selected_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_selected_set == NULL) + err(1, "cpu_selected_set"); + CPU_ZERO_S(cpu_setsize, cpu_selected_set); + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_cpu; -#define BIAS_PERFORMANCE 0 -#define BIAS_BALANCE 6 -#define BIAS_POWERSAVE 15 + startp++; + end_cpu = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (cpu <= end_cpu) { + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + cpu++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 1) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + /* "--cpu even" is not documented */ + if (strncmp(startp, "even", 4) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 4; + if (*startp == 0) + break; + } + + /* "--cpu odd" is not documented */ + if (strncmp(startp, "odd", 3) == 0) { + for (cpu = 1; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + + cpu = strtol(startp, &endp, 10); + if (startp == endp) + errx(1, "--cpu cpu-set: confused by '%s'", startp); + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + startp = endp; + } + + validate_cpu_selected_set(); + +} + +void parse_cmdline_pkg(char *s) +{ + char *startp, *endp; + int pkg = 0; + + if (cpu_selected_set) { + usage(); + errx(1, "--pkg | --cpu"); + } + pkg_selected_set = 0; + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_pkg; + + startp++; + end_pkg = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (pkg <= end_pkg) { + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + pkg++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + pkg_selected_set = pkg_present_set; + return; + } + + pkg = strtol(startp, &endp, 10); + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + startp = endp; + } +} + +void for_packages(unsigned long long pkg_set, int (func)(int)) +{ + int pkg_num; + + for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) { + if (pkg_set & (1UL << pkg_num)) + func(pkg_num); + } +} + +void print_version(void) +{ + printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n"); +} void cmdline(int argc, char **argv) { int opt; + int option_index = 0; + + static struct option long_options[] = { + {"all", required_argument, 0, 'a'}, + {"cpu", required_argument, 0, 'c'}, + {"pkg", required_argument, 0, 'p'}, + {"debug", no_argument, 0, 'd'}, + {"hwp-desired", required_argument, 0, 'D'}, + {"epb", required_argument, 0, 'B'}, + {"force", no_argument, 0, 'f'}, + {"hwp-enable", no_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"hwp-epp", required_argument, 0, 'P'}, + {"hwp-min", required_argument, 0, 'm'}, + {"hwp-max", required_argument, 0, 'M'}, + {"read", no_argument, 0, 'r'}, + {"turbo-enable", required_argument, 0, 't'}, + {"hwp-use-pkg", required_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"hwp-window", required_argument, 0, 'w'}, + {0, 0, 0, 0 } + }; progname = argv[0]; - while ((opt = getopt(argc, argv, "+rvc:")) != -1) { + while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw", + long_options, &option_index)) != -1) { switch (opt) { + case 'a': + parse_cmdline_all(optarg); + break; + case 'B': + new_epb = parse_cmdline_epb(parse_optarg_string(optarg)); + break; case 'c': - cpu = atoi(optarg); + parse_cmdline_cpu(optarg); + break; + case 'e': + update_hwp_enable = 1; + break; + case 'h': + usage(); + break; + case 'd': + debug++; + verbose++; + break; + case 'f': + force++; + break; + case 'D': + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg)); + break; + case 'm': + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg)); + break; + case 'M': + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg)); + break; + case 'p': + parse_cmdline_pkg(optarg); + break; + case 'P': + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg)); break; case 'r': - read_only = 1; + /* v1 used -r to specify read-only mode, now the default */ + break; + case 't': + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg)); + break; + case 'u': + update_hwp_use_pkg++; + if (atoi(optarg) == 0) + req_update.hwp_use_pkg = 0; + else + req_update.hwp_use_pkg = 1; break; case 'v': - verbose++; + print_version(); + exit(0); + break; + case 'w': + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg)); break; default: usage(); } } - /* if -r, then should be no additional optind */ - if (read_only && (argc > optind)) - usage(); - /* - * if no -r , then must be one additional optind + * v1 allowed "performance"|"normal"|"power" with no policy specifier + * to update BIAS. Continue to support that, even though no longer documented. */ - if (!read_only) { + if (argc == optind + 1) + new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind])); - if (argc != optind + 1) { - printf("must supply -r or policy param\n"); - usage(); - } + if (argc > optind + 1) { + fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]); + usage(); + } +} - if (!strcmp("performance", argv[optind])) { - new_bias = BIAS_PERFORMANCE; - } else if (!strcmp("normal", argv[optind])) { - new_bias = BIAS_BALANCE; - } else if (!strcmp("powersave", argv[optind])) { - new_bias = BIAS_POWERSAVE; - } else { - char *endptr; - - new_bias = strtoull(argv[optind], &endptr, 0); - if (endptr == argv[optind] || - new_bias > BIAS_POWERSAVE) { - fprintf(stderr, "invalid value: %s\n", - argv[optind]); - usage(); - } - } + +int get_msr(int cpu, int offset, unsigned long long *msr) +{ + int retval; + char pathname[32]; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pread(fd, msr, sizeof(*msr), offset); + if (retval != sizeof(*msr)) + err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + + if (debug > 1) + fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); + + close(fd); + return 0; +} + +int put_msr(int cpu, int offset, unsigned long long new_msr) +{ + char pathname[32]; + int retval; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pwrite(fd, &new_msr, sizeof(new_msr), offset); + if (retval != sizeof(new_msr)) + err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval); + + close(fd); + + if (debug > 1) + fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr); + + return 0; +} + +void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + printf("HWP_CAP: low %d eff %d guar %d high %d\n", + cap->lowest, cap->efficient, cap->guaranteed, cap->highest); +} +void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr)); + cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr)); + cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr)); + cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr)); +} + +void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + if (str) + printf("%s", str); + + printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg); +} +void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str) +{ + printf("pkg%d: ", pkg); + + if (str) + printf("%s", str); + + printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7); +} +void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff)); + hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff)); + hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff)); + hwp_req->hwp_epp = (((msr) >> 24) & 0xff); + hwp_req->hwp_window = (((msr) >> 32) & 0x3ff); + hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1); +} + +void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr = 0; + + if (debug > 1) + printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n", + cpu, hwp_req->hwp_min, hwp_req->hwp_max, + hwp_req->hwp_desired, hwp_req->hwp_epp, + hwp_req->hwp_window, hwp_req->hwp_use_pkg); + + msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min)); + msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max)); + msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired)); + msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp); + msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window); + msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg); + + put_msr(cpu, msr_offset, msr); +} + +int print_cpu_msrs(int cpu) +{ + unsigned long long msr; + struct msr_hwp_request req; + struct msr_hwp_cap cap; + + if (has_epb) { + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr); } + + if (!has_hwp) + return 0; + + read_hwp_request(cpu, &req, MSR_HWP_REQUEST); + print_hwp_request(cpu, &req, ""); + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + print_hwp_cap(cpu, &cap, ""); + + return 0; +} + +int print_pkg_msrs(int pkg) +{ + struct msr_hwp_request req; + unsigned long long msr; + + if (!has_hwp) + return 0; + + read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); + print_hwp_request_pkg(pkg, &req, ""); + + if (has_hwp_notify) { + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n", + pkg, msr, + ((msr) & 0x2) ? "EN" : "Dis", + ((msr) & 0x1) ? "EN" : "Dis"); + } + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n", + pkg, msr, + ((msr) & 0x4) ? "" : "No-", + ((msr) & 0x1) ? "" : "No-"); + + return 0; } /* - * validate_cpuid() - * returns on success, quietly exits on failure (make verbose with -v) + * Assumption: All HWP systems have 100 MHz bus clock */ -void validate_cpuid(void) +int ratio_2_sysfs_khz(int ratio) { - unsigned int eax, ebx, ecx, edx, max_level; - unsigned int fms, family, model, stepping; + int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */ - eax = ebx = ecx = edx = 0; + return ratio * bclk_khz; +} +/* + * If HWP is enabled and cpufreq sysfs attribtes are present, + * then update sysfs, so that it will not become + * stale when we write to MSRs. + * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq, + * so we don't have to touch that.) + */ +void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio) +{ + char pathname[64]; + FILE *fp; + int retval; + int khz; - asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), - "=d" (edx) : "a" (0)); + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq", + cpu, is_max ? "max" : "min"); - if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { - if (verbose) - fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", - (char *)&ebx, (char *)&edx, (char *)&ecx); - exit(1); + fp = fopen(pathname, "w"); + if (!fp) { + if (debug) + perror(pathname); + return; } - asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); - family = (fms >> 8) & 0xf; - model = (fms >> 4) & 0xf; - stepping = fms & 0xf; - if (family == 6 || family == 0xf) - model += ((fms >> 16) & 0xf) << 4; + khz = ratio_2_sysfs_khz(ratio); + retval = fprintf(fp, "%d", khz); + if (retval < 0) + if (debug) + perror("fprintf"); + if (debug) + printf("echo %d > %s\n", khz, pathname); - if (verbose > 1) - printf("CPUID %d levels family:model:stepping " - "0x%x:%x:%x (%d:%d:%d)\n", max_level, - family, model, stepping, family, model, stepping); + fclose(fp); +} - if (!(edx & (1 << 5))) { - if (verbose) - printf("CPUID: no MSR\n"); - exit(1); +/* + * We update all sysfs before updating any MSRs because of + * bugs in cpufreq/intel_pstate where the sysfs writes + * for a CPU may change the min/max values on other CPUS. + */ + +int update_sysfs(int cpu) +{ + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK)) + return 0; + + if (update_hwp_min) + update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min); + + if (update_hwp_max) + update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max); + + return 0; +} + +int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req) +{ + /* fail if min > max requested */ + if (req->hwp_min > req->hwp_max) { + errx(1, "cpu%d: requested hwp-min %d > hwp_max %d", + cpu, req->hwp_min, req->hwp_max); } - /* - * Support for MSR_IA32_ENERGY_PERF_BIAS - * is indicated by CPUID.06H.ECX.bit3 - */ - asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); - if (verbose) - printf("CPUID.06H.ECX: 0x%x\n", ecx); - if (!(ecx & (1 << 3))) { - if (verbose) - printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); - exit(1); + /* fail if desired > max requestd */ + if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) { + errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d", + cpu, req->hwp_desired, req->hwp_max); } - return; /* success */ + /* fail if desired < min requestd */ + if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) { + errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d", + cpu, req->hwp_desired, req->hwp_min); + } + + return 0; } -unsigned long long get_msr(int cpu, int offset) +int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap) { - unsigned long long msr; - char msr_path[32]; - int retval; - int fd; + if (update_hwp_max) { + if (req->hwp_max > cap->highest) + errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?", + cpu, req->hwp_max, cap->highest); + if (req->hwp_max < cap->lowest) + errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?", + cpu, req->hwp_max, cap->lowest); + } - sprintf(msr_path, "/dev/cpu/%d/msr", cpu); - fd = open(msr_path, O_RDONLY); - if (fd < 0) { - printf("Try \"# modprobe msr\"\n"); - perror(msr_path); - exit(1); + if (update_hwp_min) { + if (req->hwp_min > cap->highest) + errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?", + cpu, req->hwp_min, cap->highest); + if (req->hwp_min < cap->lowest) + errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?", + cpu, req->hwp_min, cap->lowest); } - retval = pread(fd, &msr, sizeof msr, offset); + if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max)) + errx(1, "cpu%d: requested min %d > requested max %d", + cpu, req->hwp_min, req->hwp_max); - if (retval != sizeof msr) { - printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (update_hwp_desired && req->hwp_desired) { + if (req->hwp_desired > req->hwp_max) + errx(1, "cpu%d: requested desired %d > requested max %d, use --force?", + cpu, req->hwp_desired, req->hwp_max); + if (req->hwp_desired < req->hwp_min) + errx(1, "cpu%d: requested desired %d < requested min %d, use --force?", + cpu, req->hwp_desired, req->hwp_min); + if (req->hwp_desired < cap->lowest) + errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?", + cpu, req->hwp_desired, cap->lowest); + if (req->hwp_desired > cap->highest) + errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?", + cpu, req->hwp_desired, cap->highest); } - close(fd); - return msr; + + return 0; } -unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) +int update_hwp_request(int cpu) { - unsigned long long old_msr; - char msr_path[32]; - int retval; - int fd; + struct msr_hwp_request req; + struct msr_hwp_cap cap; + + int msr_offset = MSR_HWP_REQUEST; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request(cpu, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + req.hwp_use_pkg = req_update.hwp_use_pkg; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); - sprintf(msr_path, "/dev/cpu/%d/msr", cpu); - fd = open(msr_path, O_RDWR); - if (fd < 0) { - perror(msr_path); - exit(1); + write_hwp_request(cpu, &req, msr_offset); + + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request(cpu, &req, "new: "); } + return 0; +} +int update_hwp_request_pkg(int pkg) +{ + struct msr_hwp_request req; + struct msr_hwp_cap cap; + int cpu = first_cpu_in_pkg[pkg]; + + int msr_offset = MSR_HWP_REQUEST_PKG; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request_pkg(pkg, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); + + write_hwp_request(cpu, &req, msr_offset); - retval = pread(fd, &old_msr, sizeof old_msr, offset); - if (retval != sizeof old_msr) { - perror("pwrite"); - printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request_pkg(pkg, &req, "new: "); } + return 0; +} + +int enable_hwp_on_cpu(int cpu) +{ + unsigned long long msr; + + get_msr(cpu, MSR_PM_ENABLE, &msr); + put_msr(cpu, MSR_PM_ENABLE, 1); + + if (verbose) + printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1); + + return 0; +} + +int update_cpu_msrs(int cpu) +{ + unsigned long long msr; + - retval = pwrite(fd, &new_msr, sizeof new_msr, offset); - if (retval != sizeof new_msr) { - perror("pwrite"); - printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); - exit(-2); + if (update_epb) { + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb); + + if (verbose) + printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", + cpu, (unsigned int) msr, (unsigned int) new_epb); } - close(fd); + if (update_turbo) { + int turbo_is_present_and_disabled; + + get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr); + + turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0); + + if (turbo_update_value == 1) { + if (turbo_is_present_and_disabled) { + msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo ENABLE\n", cpu); + } + } else { + /* + * if "turbo_is_enabled" were known to be describe this cpu + * then we could use it here to skip redundant disable requests. + * but cpu may be in a different package, so we always write. + */ + msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo DISABLE\n", cpu); + } + } + + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + update_hwp_request(cpu); + return 0; +} + +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); - return old_msr; + if (!filep) + err(1, "%s: open failed", path); + return filep; } -void print_msr(int cpu) +unsigned int get_pkg_num(int cpu) { - printf("cpu%d: 0x%016llx\n", - cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); + FILE *fp; + char pathname[128]; + unsigned int pkg; + int retval; + + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); + + fp = fopen_or_die(pathname, "r"); + retval = fscanf(fp, "%d\n", &pkg); + if (retval != 1) + errx(1, "%s: failed to parse", pathname); + return pkg; } -void update_msr(int cpu) +int set_max_cpu_pkg_num(int cpu) { - unsigned long long previous_msr; + unsigned int pkg; - previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); + if (max_cpu_num < cpu) + max_cpu_num = cpu; - if (verbose) - printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n", - cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); + pkg = get_pkg_num(cpu); + + if (pkg >= MAX_PACKAGES) + errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES); + + if (pkg > max_pkg_num) + max_pkg_num = pkg; - return; + if ((pkg_present_set & (1ULL << pkg)) == 0) { + pkg_present_set |= (1ULL << pkg); + first_cpu_in_pkg[pkg] = cpu; + } + + return 0; +} +int mark_cpu_present(int cpu) +{ + CPU_SET_S(cpu, cpu_setsize, cpu_present_set); + return 0; } -char *proc_stat = "/proc/stat"; /* - * run func() on every cpu in /dev/cpu + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number */ -void for_every_cpu(void (func)(int)) +int for_all_proc_cpus(int (func)(int)) { FILE *fp; + int cpu_num; int retval; - fp = fopen(proc_stat, "r"); - if (fp == NULL) { - perror(proc_stat); - exit(1); - } + fp = fopen_or_die(proc_stat, "r"); retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); - if (retval != 0) { - perror("/proc/stat format"); - exit(1); - } + if (retval != 0) + err(1, "%s: failed to parse format", proc_stat); while (1) { - int cpu; - - retval = fscanf(fp, - "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", - &cpu); + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); if (retval != 1) break; - func(cpu); + retval = func(cpu_num); + if (retval) { + fclose(fp); + return retval; + } } fclose(fp); + return 0; +} + +void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) +{ + int cpu_num; + + for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num) + if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) + func(cpu_num); +} + +void init_data_structures(void) +{ + for_all_proc_cpus(set_max_cpu_pkg_num); + + cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1)); + + cpu_present_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_present_set == NULL) + err(3, "CPU_ALLOC"); + CPU_ZERO_S(cpu_setsize, cpu_present_set); + for_all_proc_cpus(mark_cpu_present); +} + +/* clear has_hwp if it is not enable (or being enabled) */ + +void verify_hwp_is_enabled(void) +{ + unsigned long long msr; + + if (!has_hwp) /* set in early_cpuid() */ + return; + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + get_msr(base_cpu, MSR_PM_ENABLE, &msr); + if ((msr & 1) == 0) { + fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n"); + has_hwp = 0; + return; + } +} + +int req_update_bounds_check(void) +{ + if (!hwp_update_enabled()) + return 0; + + /* fail if min > max requested */ + if ((update_hwp_max && update_hwp_min) && + (req_update.hwp_min > req_update.hwp_max)) { + printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max); + return -EINVAL; + } + + /* fail if desired > max requestd */ + if (req_update.hwp_desired && update_hwp_max && + (req_update.hwp_desired > req_update.hwp_max)) { + printf("hwp-desired cannot be greater than hwp_max\n"); + return -EINVAL; + } + /* fail if desired < min requestd */ + if (req_update.hwp_desired && update_hwp_min && + (req_update.hwp_desired < req_update.hwp_min)) { + printf("hwp-desired cannot be less than hwp_min\n"); + return -EINVAL; + } + + return 0; +} + +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); +} + + +void probe_dev_msr(void) +{ + struct stat sb; + char pathname[32]; + + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); +} +/* + * early_cpuid() + * initialize turbo_is_enabled, has_hwp, has_epb + * before cmdline is parsed + */ +void early_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model; + + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + + if (max_level < 6) + errx(1, "Processor not supported\n"); + + __get_cpuid(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (model == 0x4F) { + unsigned long long msr; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); + + bdx_highest_ratio = msr & 0xFF; + } + + __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + turbo_is_enabled = (eax >> 1) & 1; + has_hwp = (eax >> 7) & 1; + has_epb = (ecx >> 3) & 1; +} + +/* + * parse_cpuid() + * set + * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb + */ +void parse_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + genuine_intel = 1; + + if (debug) + fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", + (char *)&ebx, (char *)&edx, (char *)&ecx); + + __get_cpuid(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (debug) { + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + max_level, family, model, stepping, family, model, stepping); + fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n", + ecx & (1 << 0) ? "SSE3" : "-", + ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 7) ? "EIST" : "-", + ecx & (1 << 8) ? "TM2" : "-", + edx & (1 << 4) ? "TSC" : "-", + edx & (1 << 5) ? "MSR" : "-", + edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 29) ? "TM" : "-"); + } + + if (!(edx & (1 << 5))) + errx(1, "CPUID: no MSR"); + + + __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + /* turbo_is_enabled already set */ + /* has_hwp already set */ + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_request_pkg = eax & (1 << 11); + + if (!has_hwp_request_pkg && update_hwp_use_pkg) + errx(1, "--hwp-use-pkg is not available on this hardware"); + + /* has_epb already set */ + + if (debug) + fprintf(stderr, + "CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + turbo_is_enabled ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", + has_hwp_request_pkg ? "" : "No-", + has_epb ? "" : "No-"); + + return; /* success */ } int main(int argc, char **argv) { + set_base_cpu(); + probe_dev_msr(); + init_data_structures(); + + early_cpuid(); /* initial cpuid parse before cmdline */ + cmdline(argc, argv); - if (verbose > 1) - printf("x86_energy_perf_policy Nov 24, 2010" - " - Len Brown <lenb@kernel.org>\n"); - if (verbose > 1 && !read_only) - printf("new_bias %lld\n", new_bias); - - validate_cpuid(); - - if (cpu != -1) { - if (read_only) - print_msr(cpu); - else - update_msr(cpu); - } else { - if (read_only) - for_every_cpu(print_msr); - else - for_every_cpu(update_msr); + if (debug) + print_version(); + + parse_cpuid(); + + /* If CPU-set and PKG-set are not initialized, default to all CPUs */ + if ((cpu_selected_set == 0) && (pkg_selected_set == 0)) + cpu_selected_set = cpu_present_set; + + /* + * If HWP is being enabled, do it now, so that subsequent operations + * that access HWP registers can work. + */ + if (update_hwp_enable) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu); + + /* If HWP present, but disabled, warn and ignore from here forward */ + verify_hwp_is_enabled(); + + if (req_update_bounds_check()) + return -EINVAL; + + /* display information only, no updates to settings */ + if (!update_epb && !update_turbo && !hwp_update_enabled()) { + if (cpu_selected_set) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs); + + if (has_hwp_request_pkg) { + if (pkg_selected_set == 0) + pkg_selected_set = pkg_present_set; + + for_packages(pkg_selected_set, print_pkg_msrs); + } + + return 0; } + /* update CPU set */ + if (cpu_selected_set) { + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); + } else if (pkg_selected_set) + for_packages(pkg_selected_set, update_hwp_request_pkg); + return 0; } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 64cae1a5deff..e1f75a1914a1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path, } EXPORT_SYMBOL(__wrap_acpi_evaluate_object); -union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, +union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { union acpi_object *obj = ERR_PTR(-ENXIO); @@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, rcu_read_lock(); ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list); if (ops) - obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4); + obj = ops->evaluate_dsm(handle, guid, rev, func, argv4); rcu_read_unlock(); if (IS_ERR(obj)) - return acpi_evaluate_dsm(handle, uuid, rev, func, argv4); + return acpi_evaluate_dsm(handle, guid, rev, func, argv4); return obj; } EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index c2187178fb13..28859da78edf 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle; union acpi_object *result; static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle, - const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4) + const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { if (handle != &nfit_ctl_handle) return ERR_PTR(-ENXIO); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index f54c0032c6ff..d3d63dd5ed38 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -13,6 +13,7 @@ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ #include <linux/list.h> +#include <linux/uuid.h> #include <linux/ioport.h> #include <linux/spinlock_types.h> @@ -36,7 +37,8 @@ typedef void *acpi_handle; typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, - const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4); + const guid_t *guid, u64 rev, u64 func, + union acpi_object *argv4); void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size); void __wrap_iounmap(volatile void __iomem *addr); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 91edd0566237..2ca51a8a588c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,9 +11,11 @@ endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ + test_align -TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o +TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ + test_pkt_md_access.o TEST_PROGS := test_kmod.sh @@ -34,6 +36,7 @@ $(BPFOBJ): force CLANG ?= clang %.o: %.c - $(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \ + $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ + -I../../../../samples/bpf/ \ -Wno-compare-distinct-pointer-types \ -O2 -target bpf -c $< -o $@ diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h index 19d0604f8694..487cbfb89beb 100644 --- a/tools/testing/selftests/bpf/bpf_endian.h +++ b/tools/testing/selftests/bpf/bpf_endian.h @@ -1,23 +1,42 @@ #ifndef __BPF_ENDIAN__ #define __BPF_ENDIAN__ -#include <asm/byteorder.h> +#include <linux/swab.h> -#if __BYTE_ORDER == __LITTLE_ENDIAN -# define __bpf_ntohs(x) __builtin_bswap16(x) -# define __bpf_htons(x) __builtin_bswap16(x) -#elif __BYTE_ORDER == __BIG_ENDIAN -# define __bpf_ntohs(x) (x) -# define __bpf_htons(x) (x) +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +# define __bpf_constant_ntohs(x) ___constant_swab16(x) +# define __bpf_constant_htons(x) ___constant_swab16(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +# define __bpf_constant_ntohs(x) (x) +# define __bpf_constant_htons(x) (x) #else -# error "Fix your __BYTE_ORDER?!" +# error "Fix your compiler's __BYTE_ORDER__?!" #endif #define bpf_htons(x) \ (__builtin_constant_p(x) ? \ - __constant_htons(x) : __bpf_htons(x)) + __bpf_constant_htons(x) : __bpf_htons(x)) #define bpf_ntohs(x) \ (__builtin_constant_p(x) ? \ - __constant_ntohs(x) : __bpf_ntohs(x)) + __bpf_constant_ntohs(x) : __bpf_ntohs(x)) -#endif +#endif /* __BPF_ENDIAN__ */ diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h new file mode 100644 index 000000000000..51841848fbfe --- /dev/null +++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h @@ -0,0 +1,22 @@ +#ifndef _UAPI_LINUX_TYPES_H +#define _UAPI_LINUX_TYPES_H + +#include <asm-generic/int-ll64.h> + +/* copied from linux:include/uapi/linux/types.h */ +#define __bitwise +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + +#endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c new file mode 100644 index 000000000000..bccebd935907 --- /dev/null +++ b/tools/testing/selftests/bpf/test_align.c @@ -0,0 +1,458 @@ +#include <asm/types.h> +#include <linux/types.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <stddef.h> +#include <stdbool.h> + +#include <sys/resource.h> + +#include <linux/unistd.h> +#include <linux/filter.h> +#include <linux/bpf_perf_event.h> +#include <linux/bpf.h> + +#include <bpf/bpf.h> + +#include "../../../include/linux/filter.h" + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define MAX_INSNS 512 +#define MAX_MATCHES 16 + +struct bpf_align_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + enum { + UNDEF, + ACCEPT, + REJECT + } result; + enum bpf_prog_type prog_type; + const char *matches[MAX_MATCHES]; +}; + +static struct bpf_align_test tests[] = { + { + .descr = "mov", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 16), + BPF_MOV64_IMM(BPF_REG_3, 32), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + }, + }, + { + .descr = "shift", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_4, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + "8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + }, + }, + { + .descr = "addsub", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp", + "3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp", + "4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp", + "6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + }, + }, + { + .descr = "mul", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + "4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp", + }, + }, + +#define PREP_PKT_POINTERS \ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ + offsetof(struct __sk_buff, data)), \ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \ + offsetof(struct __sk_buff, data_end)) + +#define LOAD_UNKNOWN(DST_REG) \ + PREP_PKT_POINTERS, \ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \ + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0) + + { + .descr = "unknown shift", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + LOAD_UNKNOWN(BPF_REG_4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp", + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp", + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp", + "20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp", + "21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp", + "22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp", + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp", + }, + }, + { + .descr = "unknown mul", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp", + "12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp", + "14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp", + "16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp" + }, + }, + { + .descr = "packet const offset", + .insns = { + PREP_PKT_POINTERS, + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + + /* Skip over ethernet header. */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp", + "5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp", + "6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp", + "10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp", + "14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + "15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + }, + }, + { + .descr = "packet variable offset", + .insns = { + LOAD_UNKNOWN(BPF_REG_6), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + + /* First, add a constant to the R5 packet pointer, + * then a variable with a known alignment. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + /* Now, test in the other direction. Adding first + * the variable offset to R5, then the constant. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + /* Test multiple accumulations of unknown values + * into a packet pointer. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp", + + /* Offset is added to packet pointer R5, resulting in known + * auxiliary alignment and offset. + */ + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (14) which is 16. Then the variable + * offset is considered using reg->aux_off_align which + * is 4 and meets the load's requirements. + */ + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + + /* Variable offset is added to R5 packet pointer, + * resulting in auxiliary alignment of 4. + */ + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5, resulting in + * reg->off of 14. + */ + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (14) which + * is 16. Then the variable offset is considered using + * reg->aux_off_align which is 4 and meets the load's + * requirements. + */ + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5 packet pointer, + * resulting in reg->off value of 14. + */ + "26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp", + /* Variable offset is added to R5, resulting in an + * auxiliary offset of 14, and an auxiliary alignment of 4. + */ + "27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* Constant is added to R5 again, setting reg->off to 4. */ + "28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* And once more we add a variable, which causes an accumulation + * of reg->off into reg->aux_off_align, with resulting value of + * 18. The auxiliary alignment stays at 4. + */ + "29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (18) which is 20. Then the variable offset + * is considered using reg->aux_off_align which is 4 and meets + * the load's requirements. + */ + "33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + }, + }, +}; + +static int probe_filter_length(const struct bpf_insn *fp) +{ + int len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static char bpf_vlog[32768]; + +static int do_test_single(struct bpf_align_test *test) +{ + struct bpf_insn *prog = test->insns; + int prog_type = test->prog_type; + int prog_len, i; + int fd_prog; + int ret; + + prog_len = probe_filter_length(prog); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, 1, "GPL", 0, + bpf_vlog, sizeof(bpf_vlog)); + if (fd_prog < 0) { + printf("Failed to load program.\n"); + printf("%s", bpf_vlog); + ret = 1; + } else { + ret = 0; + for (i = 0; i < MAX_MATCHES; i++) { + const char *t, *m = test->matches[i]; + + if (!m) + break; + t = strstr(bpf_vlog, m); + if (!t) { + printf("Failed to find match: %s\n", m); + ret = 1; + printf("%s", bpf_vlog); + break; + } + } + close(fd_prog); + } + return ret; +} + +static int do_test(unsigned int from, unsigned int to) +{ + int all_pass = 0; + int all_fail = 0; + unsigned int i; + + for (i = from; i < to; i++) { + struct bpf_align_test *test = &tests[i]; + int fail; + + printf("Test %3d: %s ... ", + i, test->descr); + fail = do_test_single(test); + if (fail) { + all_fail++; + printf("FAIL\n"); + } else { + all_pass++; + printf("PASS\n"); + } + } + printf("Results: %d pass %d fail\n", + all_pass, all_fail); + return all_fail ? EXIT_FAILURE : EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + unsigned int from = 0, to = ARRAY_SIZE(tests); + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); + + if (argc == 3) { + unsigned int l = atoi(argv[argc - 2]); + unsigned int u = atoi(argv[argc - 1]); + + if (l < to && u < to) { + from = l; + to = u + 1; + } + } else if (argc == 2) { + unsigned int t = atoi(argv[argc - 1]); + + if (t < to) { + from = t; + to = t + 1; + } + } + return do_test(from, to); +} diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 93314524de0d..79601c81e169 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -239,6 +239,54 @@ static void test_hashmap_percpu(int task, void *data) close(fd); } +static void test_hashmap_walk(int task, void *data) +{ + int fd, i, max_entries = 100000; + long long key, value, next_key; + bool next_key_valid = true; + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + max_entries, map_flags); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < max_entries; i++) { + key = i; value = key; + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0); + } + + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, + &next_key) == 0; i++) { + key = next_key; + assert(bpf_map_lookup_elem(fd, &key, &value) == 0); + } + + assert(i == max_entries); + + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); + for (i = 0; next_key_valid; i++) { + next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0; + assert(bpf_map_lookup_elem(fd, &key, &value) == 0); + value++; + assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0); + key = next_key; + } + + assert(i == max_entries); + + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, + &next_key) == 0; i++) { + key = next_key; + assert(bpf_map_lookup_elem(fd, &key, &value) == 0); + assert(value - 1 == key); + } + + assert(i == max_entries); + close(fd); +} + static void test_arraymap(int task, void *data) { int key, next_key, fd; @@ -464,6 +512,7 @@ static void test_map_stress(void) run_parallel(100, test_hashmap, NULL); run_parallel(100, test_hashmap_percpu, NULL); run_parallel(100, test_hashmap_sizes, NULL); + run_parallel(100, test_hashmap_walk, NULL); run_parallel(100, test_arraymap, NULL); run_parallel(100, test_arraymap_percpu, NULL); @@ -549,6 +598,7 @@ static void run_all_tests(void) { test_hashmap(0, NULL); test_hashmap_percpu(0, NULL); + test_hashmap_walk(0, NULL); test_arraymap(0, NULL); test_arraymap_percpu(0, NULL); diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c new file mode 100644 index 000000000000..880d2963b472 --- /dev/null +++ b/tools/testing/selftests/bpf/test_obj_id.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +/* It is a dumb bpf program such that it must have no + * issue to be loaded since testing the verifier is + * not the focus here. + */ + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") test_map_id = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +SEC("test_obj_id_dummy") +int test_obj_id(struct __sk_buff *skb) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&test_map_id, &key); + + return TC_ACT_OK; +} diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c index 39387bb7e08c..6e11ba11709e 100644 --- a/tools/testing/selftests/bpf/test_pkt_access.c +++ b/tools/testing/selftests/bpf/test_pkt_access.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include <stddef.h> +#include <string.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c new file mode 100644 index 000000000000..71729d47eb85 --- /dev/null +++ b/tools/testing/selftests/bpf/test_pkt_md_access.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +#define TEST_FIELD(TYPE, FIELD, MASK) \ + { \ + TYPE tmp = *(volatile TYPE *)&skb->FIELD; \ + if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \ + return TC_ACT_SHOT; \ + } + +SEC("test1") +int process(struct __sk_buff *skb) +{ + TEST_FIELD(__u8, len, 0xFF); + TEST_FIELD(__u16, len, 0xFFFF); + TEST_FIELD(__u32, len, 0xFFFFFFFF); + TEST_FIELD(__u16, protocol, 0xFFFF); + TEST_FIELD(__u32, protocol, 0xFFFFFFFF); + TEST_FIELD(__u8, hash, 0xFF); + TEST_FIELD(__u16, hash, 0xFFFF); + TEST_FIELD(__u32, hash, 0xFFFFFFFF); + + return TC_ACT_OK; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b59f5ed4ae40..5855cd3d3d45 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -22,6 +22,8 @@ typedef __u16 __sum16; #include <sys/wait.h> #include <sys/resource.h> +#include <sys/types.h> +#include <fcntl.h> #include <linux/bpf.h> #include <linux/err.h> @@ -70,6 +72,7 @@ static struct { pass_cnt++; \ printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ } \ + __ret; \ }) static int bpf_prog_load(const char *file, enum bpf_prog_type type, @@ -283,6 +286,224 @@ static void test_tcp_estats(void) bpf_object__close(obj); } +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static void test_bpf_obj_id(void) +{ + const __u64 array_magic_value = 0xfaceb00c; + const __u32 array_key = 0; + const int nr_iters = 2; + const char *file = "./test_obj_id.o"; + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + + struct bpf_object *objs[nr_iters]; + int prog_fds[nr_iters], map_fds[nr_iters]; + /* +1 to test for the info_len returned by kernel */ + struct bpf_prog_info prog_infos[nr_iters + 1]; + struct bpf_map_info map_infos[nr_iters + 1]; + char jited_insns[128], xlated_insns[128]; + __u32 i, next_id, info_len, nr_id_found, duration = 0; + int sysctl_fd, jit_enabled = 0, err = 0; + __u64 array_value; + + sysctl_fd = open(jit_sysctl, 0, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + jit_enabled = (tmpc != '0'); + close(sysctl_fd); + } + + err = bpf_prog_get_fd_by_id(0); + CHECK(err >= 0 || errno != ENOENT, + "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + + err = bpf_map_get_fd_by_id(0); + CHECK(err >= 0 || errno != ENOENT, + "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + + for (i = 0; i < nr_iters; i++) + objs[i] = NULL; + + /* Check bpf_obj_get_info_by_fd() */ + for (i = 0; i < nr_iters; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, + &objs[i], &prog_fds[i]); + /* test_obj_id.o is a dumb prog. It should never fail + * to load. + */ + assert(!err); + + /* Check getting prog info */ + info_len = sizeof(struct bpf_prog_info) * 2; + prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns); + prog_infos[i].jited_prog_len = sizeof(jited_insns); + prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); + prog_infos[i].xlated_prog_len = sizeof(xlated_insns); + err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], + &info_len); + if (CHECK(err || + prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || + info_len != sizeof(struct bpf_prog_info) || + (jit_enabled && !prog_infos[i].jited_prog_len) || + !prog_infos[i].xlated_prog_len, + "get-prog-info(fd)", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n", + err, errno, i, + prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, + info_len, sizeof(struct bpf_prog_info), + jit_enabled, + prog_infos[i].jited_prog_len, + prog_infos[i].xlated_prog_len)) + goto done; + + map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); + assert(map_fds[i] >= 0); + err = bpf_map_update_elem(map_fds[i], &array_key, + &array_magic_value, 0); + assert(!err); + + /* Check getting map info */ + info_len = sizeof(struct bpf_map_info) * 2; + err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + &info_len); + if (CHECK(err || + map_infos[i].type != BPF_MAP_TYPE_ARRAY || + map_infos[i].key_size != sizeof(__u32) || + map_infos[i].value_size != sizeof(__u64) || + map_infos[i].max_entries != 1 || + map_infos[i].map_flags != 0 || + info_len != sizeof(struct bpf_map_info), + "get-map-info(fd)", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + err, errno, + map_infos[i].type, BPF_MAP_TYPE_ARRAY, + info_len, sizeof(struct bpf_map_info), + map_infos[i].key_size, + map_infos[i].value_size, + map_infos[i].max_entries, + map_infos[i].map_flags)) + goto done; + } + + /* Check bpf_prog_get_next_id() */ + nr_id_found = 0; + next_id = 0; + while (!bpf_prog_get_next_id(next_id, &next_id)) { + struct bpf_prog_info prog_info; + int prog_fd; + + info_len = sizeof(prog_info); + + prog_fd = bpf_prog_get_fd_by_id(next_id); + if (prog_fd < 0 && errno == ENOENT) + /* The bpf_prog is in the dead row */ + continue; + if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", + "prog_fd %d next_id %d errno %d\n", + prog_fd, next_id, errno)) + break; + + for (i = 0; i < nr_iters; i++) + if (prog_infos[i].id == next_id) + break; + + if (i == nr_iters) + continue; + + nr_id_found++; + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + CHECK(err || info_len != sizeof(struct bpf_prog_info) || + memcmp(&prog_info, &prog_infos[i], info_len), + "get-prog-info(next_id->fd)", + "err %d errno %d info_len %u(%lu) memcmp %d\n", + err, errno, info_len, sizeof(struct bpf_prog_info), + memcmp(&prog_info, &prog_infos[i], info_len)); + + close(prog_fd); + } + CHECK(nr_id_found != nr_iters, + "check total prog id found by get_next_id", + "nr_id_found %u(%u)\n", + nr_id_found, nr_iters); + + /* Check bpf_map_get_next_id() */ + nr_id_found = 0; + next_id = 0; + while (!bpf_map_get_next_id(next_id, &next_id)) { + struct bpf_map_info map_info; + int map_fd; + + info_len = sizeof(map_info); + + map_fd = bpf_map_get_fd_by_id(next_id); + if (map_fd < 0 && errno == ENOENT) + /* The bpf_map is in the dead row */ + continue; + if (CHECK(map_fd < 0, "get-map-fd(next_id)", + "map_fd %d next_id %u errno %d\n", + map_fd, next_id, errno)) + break; + + for (i = 0; i < nr_iters; i++) + if (map_infos[i].id == next_id) + break; + + if (i == nr_iters) + continue; + + nr_id_found++; + + err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); + assert(!err); + + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + CHECK(err || info_len != sizeof(struct bpf_map_info) || + memcmp(&map_info, &map_infos[i], info_len) || + array_value != array_magic_value, + "check get-map-info(next_id->fd)", + "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", + err, errno, info_len, sizeof(struct bpf_map_info), + memcmp(&map_info, &map_infos[i], info_len), + array_value, array_magic_value); + + close(map_fd); + } + CHECK(nr_id_found != nr_iters, + "check total map id found by get_next_id", + "nr_id_found %u(%u)\n", + nr_id_found, nr_iters); + +done: + for (i = 0; i < nr_iters; i++) + bpf_object__close(objs[i]); +} + +static void test_pkt_md_access(void) +{ + const char *file = "./test_pkt_md_access.o"; + struct bpf_object *obj; + __u32 duration, retval; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) + return; + + err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + bpf_object__close(obj); +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -293,7 +514,9 @@ int main(void) test_xdp(); test_l4lb(); test_tcp_estats(); + test_bpf_obj_id(); + test_pkt_md_access(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); - return 0; + return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3773562056da..404aec520812 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -49,6 +49,7 @@ #define MAX_NR_MAPS 4 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) +#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) struct bpf_test { const char *descr; @@ -1072,44 +1073,75 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check cb access: byte, oob 1", + "__sk_buff->hash, offset 0, byte store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: byte, oob 2", + "__sk_buff->tc_index, offset 3, byte store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 1), + offsetof(struct __sk_buff, tc_index) + 3), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: byte, oob 3", + "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check skb->hash byte load not permitted 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: byte, oob 4", + "check skb->hash byte load not permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 1), + offsetof(struct __sk_buff, hash) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->hash byte load not permitted 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#endif BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1187,44 +1219,53 @@ static struct bpf_test tests[] = { .result = REJECT, }, { - "check cb access: half, oob 1", + "check __sk_buff->hash, offset 0, half store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: half, oob 2", + "check __sk_buff->tc_index, offset 2, half store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 2), + offsetof(struct __sk_buff, tc_index) + 2), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: half, oob 3", + "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#else BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash) + 2), +#endif BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check cb access: half, oob 4", + "check skb->hash half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 2), + offsetof(struct __sk_buff, hash) + 2), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#endif BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1367,28 +1408,6 @@ static struct bpf_test tests[] = { "check cb access: double, oob 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, oob 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, oob 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, cb[4])), BPF_EXIT_INSN(), @@ -1397,22 +1416,22 @@ static struct bpf_test tests[] = { .result = REJECT, }, { - "check cb access: double, oob 5", + "check __sk_buff->ifindex dw store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, ifindex)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: double, oob 6", + "check __sk_buff->ifindex dw load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 8), + offsetof(struct __sk_buff, ifindex)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -2615,6 +2634,195 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "direct packet access: test17 (pruning, alignment)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_A(-6), + }, + .errstr = "misaligned packet access off 2+15+-4 size 4", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + }, + { + "direct packet access: test18 (imm += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test19 (imm += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test20 (x += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test21 (x += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test22 (x += pkt_ptr, 3)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test23 (x += pkt_ptr, 4)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet", + }, + { + "direct packet access: test24 (x += pkt_ptr, 5)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 64), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -3341,6 +3549,70 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS }, { + "alu ops on ptr_to_map_value_or_null, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { "invalid memory access with multiple map_lookup_elem calls", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -3660,6 +3932,72 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { + "leak pointer into ctx 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 2 }, + .errstr_unpriv = "R2 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 1 }, + .errstr_unpriv = "R2 leaks addr into ctx", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into map val", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr_unpriv = "R6 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { "helper access to map: full range", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -4937,7 +5275,241 @@ static struct bpf_test tests[] = { .fixup_map_in_map = { 3 }, .errstr = "R1 type=map_value_or_null expected=map_ptr", .result = REJECT, - } + }, + { + "ld_abs: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "ld_ind: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 1), + BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check bpf_perf_event_data->sample_period byte load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 7), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "check bpf_perf_event_data->sample_period half load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 6), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "check bpf_perf_event_data->sample_period word load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 4), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "check bpf_perf_event_data->sample_period dword load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "check skb->data half load not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data) + 2), +#endif + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + }, + { + "check skb->tc_classid half load not permitted for lwt prog", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#ifdef __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid) + 2), +#endif + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -5059,9 +5631,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog, map_fds); - fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, "GPL", 0, bpf_vlog, - sizeof(bpf_vlog)); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog)); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; @@ -5187,7 +5759,7 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) } printf("Summary: %d PASSED, %d FAILED\n", passes, errors); - return errors ? -errors : 0; + return errors ? EXIT_FAILURE : EXIT_SUCCESS; } int main(int argc, char **argv) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 32e6211e1c6e..717581145cfc 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -58,7 +58,7 @@ parse_opts() { # opts ;; --verbose|-v|-vv) VERBOSE=$((VERBOSE + 1)) - [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1)) + [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) shift 1 ;; --debug|-d) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 07bb3e5930b4..aa31368851c9 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -48,7 +48,7 @@ test_event_enabled() { e=`cat $EVENT_ENABLE` if [ "$e" != $val ]; then echo "Expected $val but found $e" - exit -1 + exit 1 fi } diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 9aec6fcb7729..f2019b37370d 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -34,10 +34,10 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter echo > set_ftrace_filter grep -v '^#' set_ftrace_filter | while read t; do tr=`echo $t | cut -d: -f2` - if [ "$tr" == "" ]; then + if [ "$tr" = "" ]; then continue fi - if [ $tr == "enable_event" -o $tr == "disable_event" ]; then + if [ $tr = "enable_event" -o $tr = "disable_event" ]; then tr=`echo $t | cut -d: -f1-4` limit=`echo $t | cut -d: -f5` else diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 4c5a061a5b4e..c73db7863adb 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -75,9 +75,13 @@ rmdir foo if [ -d foo ]; then fail "foo still exists" fi -exit 0 - +mkdir foo +echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter +rmdir foo +if [ -d foo ]; then + fail "foo still exists" +fi instance_slam() { diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc new file mode 100644 index 000000000000..f4d1ff785d67 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -0,0 +1,21 @@ +#!/bin/sh +# description: Register/unregister many kprobe events + +# ftrace fentry skip size depends on the machine architecture. +# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc +case `uname -m` in + x86_64|i[3456]86) OFFS=5;; + ppc*) OFFS=4;; + *) OFFS=0;; +esac + +echo "Setup up to 256 kprobes" +grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ +head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: + +echo 1 > events/kprobes/enable +echo 0 > events/kprobes/enable +echo > kprobe_events +echo "Waiting for unoptimizing & freeing" +sleep 5 +echo "Done" diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index a676d3eefefb..13f5198ba0ee 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -305,7 +305,7 @@ function perf_test() echo "Running remote perf test $WITH DMA" write_file "" $REMOTE_PERF/run echo -n " " - read_file $LOCAL_PERF/run + read_file $REMOTE_PERF/run echo " Passed" _modprobe -r ntb_perf diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 427621792229..2f1f7b013293 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -11,3 +11,4 @@ tm-signal-context-chk-fpu tm-signal-context-chk-gpr tm-signal-context-chk-vmx tm-signal-context-chk-vsx +tm-vmx-unavail diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 5576ee6a51f2..958c11c14acd 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -2,7 +2,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS) + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \ + $(SIGNAL_CONTEXT_CHK_TESTS) include ../../lib.mk @@ -13,6 +14,7 @@ CFLAGS += -mhtm $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread +$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index d9c49f41515e..e79ccd6aada1 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -42,12 +42,12 @@ int test_body(void) printf("Check DSCR TM context switch: "); fflush(stdout); for (;;) { - rv = 1; asm __volatile__ ( /* set a known value into the DSCR */ "ld 3, %[dscr1];" "mtspr %[sprn_dscr], 3;" + "li %[rv], 1;" /* start and suspend a transaction */ "tbegin.;" "beq 1f;" diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c new file mode 100644 index 000000000000..137185ba4937 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -0,0 +1,118 @@ +/* + * Copyright 2017, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Breno Leitao <brenohl@br.ibm.com> & + * Gustavo Bueno Romero <gromero@br.ibm.com> + * Edited: Michael Neuling + * + * Force VMX unavailable during a transaction and see if it corrupts + * the checkpointed VMX register state after the abort. + */ + +#include <inttypes.h> +#include <htmintrin.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <pthread.h> +#include <sys/mman.h> +#include <unistd.h> +#include <pthread.h> + +#include "tm.h" +#include "utils.h" + +int passed; + +void *worker(void *unused) +{ + __int128 vmx0; + uint64_t texasr; + + asm goto ( + "li 3, 1;" /* Stick non-zero value in VMX0 */ + "std 3, 0(%[vmx0_ptr]);" + "lvx 0, 0, %[vmx0_ptr];" + + /* Wait here a bit so we get scheduled out 255 times */ + "lis 3, 0x3fff;" + "1: ;" + "addi 3, 3, -1;" + "cmpdi 3, 0;" + "bne 1b;" + + /* Kernel will hopefully turn VMX off now */ + + "tbegin. ;" + "beq failure;" + + /* Cause VMX unavail. Any VMX instruction */ + "vaddcuw 0,0,0;" + + "tend. ;" + "b %l[success];" + + /* Check VMX0 sanity after abort */ + "failure: ;" + "lvx 1, 0, %[vmx0_ptr];" + "vcmpequb. 2, 0, 1;" + "bc 4, 24, %l[value_mismatch];" + "b %l[value_match];" + : + : [vmx0_ptr] "r"(&vmx0) + : "r3" + : success, value_match, value_mismatch + ); + + /* HTM aborted and VMX0 is corrupted */ +value_mismatch: + texasr = __builtin_get_texasr(); + + printf("\n\n==============\n\n"); + printf("Failure with error: %lx\n", _TEXASR_FAILURE_CODE(texasr)); + printf("Summary error : %lx\n", _TEXASR_FAILURE_SUMMARY(texasr)); + printf("TFIAR exact : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr)); + + passed = 0; + return NULL; + + /* HTM aborted but VMX0 is correct */ +value_match: +// printf("!"); + return NULL; + +success: +// printf("."); + return NULL; +} + +int tm_vmx_unavail_test() +{ + int threads; + pthread_t *thread; + + SKIP_IF(!have_htm()); + + passed = 1; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * 4; + thread = malloc(sizeof(pthread_t)*threads); + if (!thread) + return EXIT_FAILURE; + + for (uint64_t i = 0; i < threads; i++) + pthread_create(&thread[i], NULL, &worker, NULL); + + for (uint64_t i = 0; i < threads; i++) + pthread_join(thread[i], NULL); + + free(thread); + + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} + + +int main(int argc, char **argv) +{ + return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test"); +} diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index eee31e261bf7..70fca318a82b 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -27,7 +27,7 @@ cat $1 > $T/.config cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' | awk ' -BEGIN { +{ print "if grep -q \"" $0 "\" < '"$T/.config"'"; print "then"; print "\t:"; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 00cb0db2643d..c29f2ec0bf9f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -45,7 +45,7 @@ T=/tmp/test-linux.sh.$$ trap 'rm -rf $T' 0 mkdir $T -grep -v 'CONFIG_[A-Z]*_TORTURE_TEST' < ${config_template} > $T/config +grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config cat << ___EOF___ >> $T/config CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD" CONFIG_VIRTIO_PCI=y diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 3b3c1b693ee1..50091de3a911 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -296,10 +296,7 @@ if test -d .git then git status >> $resdir/$ds/testid.txt git rev-parse HEAD >> $resdir/$ds/testid.txt - if ! git diff HEAD > $T/git-diff 2>&1 - then - cp $T/git-diff $resdir/$ds - fi + git diff HEAD >> $resdir/$ds/testid.txt fi ___EOF___ awk < $T/cfgcpu.pack \ diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index a3a1a05a2b5c..6a0b9f69faad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -9,6 +9,8 @@ TREE08 TREE09 SRCU-N SRCU-P +SRCU-t +SRCU-u TINY01 TINY02 TASKS01 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot new file mode 100644 index 000000000000..84a7d51b7481 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcud diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 1a087c3c8bb8..2da8b49589a0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -5,4 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_RCU_EXPERT=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P index 4837430a71c0..ab7ccd38232b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -2,7 +2,11 @@ CONFIG_RCU_TRACE=n CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_EXPERT=y +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -#CHECK#CONFIG_RCU_EXPERT=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t new file mode 100644 index 000000000000..6c78022c8cd8 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t @@ -0,0 +1,10 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_SRCU=y +CONFIG_RCU_TRACE=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_DEBUG_ATOMIC_SLEEP=y +#CHECK#CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot new file mode 100644 index 000000000000..238bfe3bd0cc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u new file mode 100644 index 000000000000..6bc24e99862f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u @@ -0,0 +1,9 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_SRCU=y +CONFIG_RCU_TRACE=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot new file mode 100644 index 000000000000..84a7d51b7481 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcud diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index a59f7686e219..d8674264318d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -6,10 +6,9 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=y CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU_REPEATEDLY=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_PREEMPT_COUNT=y +CONFIG_DEBUG_ATOMIC_SLEEP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 359cb258f639..b5b53973c01e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -10,12 +10,9 @@ CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y +CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index adc3abc82fb8..1d14e1383016 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1 +1,5 @@ rcutorture.torture_type=rcu_bh maxcpus=8 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 +rcu_nocbs=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index c1ab5926568b..35e639e39366 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -18,9 +18,6 @@ CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 3b93ee544e70..2dc31b16e506 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -14,9 +14,5 @@ CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=y -CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 120c0c88d100..5d2cc0bd50a0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -1 +1,5 @@ rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 +rcutree.kthread_prio=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 5af758e783c7..27d22695d64c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -15,11 +15,7 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index d4cdc0d74e16..2dde0d9964e3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -13,12 +13,8 @@ CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot index 15b3e1a86f74..c7fd050dfcd9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -1,2 +1,5 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test_sched=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 4cb02bd28f08..05a4eec3f27b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -18,8 +18,6 @@ CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index dd90f28ed700..ad18b52a2cad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -2,3 +2,6 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index b12a3ea1867e..0f4759f4232e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -1,6 +1,5 @@ CONFIG_SMP=y CONFIG_NR_CPUS=16 -CONFIG_CPUMASK_OFFSTACK=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n @@ -9,16 +8,11 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ_FULL_ALL=n -CONFIG_NO_HZ_FULL_SYSIDLE=y CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index 099cc63c6a3b..fb1c763c10c5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -15,7 +15,6 @@ CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T deleted file mode 100644 index 2ad13f0d29cc..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T +++ /dev/null @@ -1,21 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index fb066dc82769..1bd8efc4141e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -2,3 +2,4 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 +rcu_nocbs=0-7 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY index 917d2517b5b5..fb05ef5279b4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY @@ -1,21 +1,16 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=n +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=3 CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE index a312f671a29a..721cfda76ab2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE @@ -7,7 +7,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 index 985fb170d13c..7629f5dd73b2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 @@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt index 24396ae8355b..a75b16991a92 100644 --- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt +++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt @@ -18,7 +18,6 @@ CONFIG_PROVE_RCU In common code tested by TREE_RCU test cases. -CONFIG_NO_HZ_FULL_SYSIDLE CONFIG_RCU_NOCB_CPU Meaningless for TINY_RCU. diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 364801b1a230..9ad3f89c8dc7 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -9,28 +9,20 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one. CONFIG_HOTPLUG_CPU -- Do half. (Every second.) CONFIG_HZ_PERIODIC -- Do one. CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) -CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. -CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. +CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement. CONFIG_PREEMPT -- Do half. (First three and #8.) CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. -CONFIG_PROVE_RCU_REPEATEDLY -- Do one. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. -CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. -CONFIG_RCU_NOCB_CPU -- Do three, see below. -CONFIG_RCU_NOCB_CPU_ALL -- Do one. -CONFIG_RCU_NOCB_CPU_NONE -- Do one. -CONFIG_RCU_NOCB_CPU_ZERO -- Do one. +CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs. +CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with + rcu_nocbs=0, and one with all rcu_nocbs CPUs. CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations. CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not. -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. @@ -52,10 +44,6 @@ CONFIG_64BIT Used only to check CONFIG_RCU_FANOUT value, inspection suffices. -CONFIG_NO_HZ_FULL_SYSIDLE_SMALL - - Defer until Frederic uses this. - CONFIG_PREEMPT_COUNT CONFIG_PREEMPT_RCU @@ -78,30 +66,16 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE Always used in KVM testing. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - - Inspection suffices, ignore. - CONFIG_PREEMPT_RCU CONFIG_TREE_RCU CONFIG_TINY_RCU These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP. -CONFIG_SPARSE_RCU_POINTER - - Makes sense only for sparse runs, not for kernel builds. - CONFIG_SRCU CONFIG_TASKS_RCU Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable. -CONFIG_RCU_TRACE - - Implied by CONFIG_RCU_TRACE for Tree RCU. - boot parameters ignored: TBD diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk index 8ff89043d0a9..c9e8bc5082a7 100755 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk @@ -1,4 +1,4 @@ -#!/bin/awk -f +#!/usr/bin/awk -f # Modify SRCU for formal verification. The first argument should be srcu.h and # the second should be srcu.c. Outputs modified srcu.h and srcu.c into the diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 03f1fa495d74..00a928b833d0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1822,6 +1822,23 @@ struct tsync_sibling { struct __test_metadata *metadata; }; +/* + * To avoid joining joined threads (which is not allowed by Bionic), + * make sure we both successfully join and clear the tid to skip a + * later join attempt during fixture teardown. Any remaining threads + * will be directly killed during teardown. + */ +#define PTHREAD_JOIN(tid, status) \ + do { \ + int _rc = pthread_join(tid, status); \ + if (_rc) { \ + TH_LOG("pthread_join of tid %u failed: %d\n", \ + (unsigned int)tid, _rc); \ + } else { \ + tid = 0; \ + } \ + } while (0) + FIXTURE_DATA(TSYNC) { struct sock_fprog root_prog, apply_prog; struct tsync_sibling sibling[TSYNC_SIBLINGS]; @@ -1890,14 +1907,14 @@ FIXTURE_TEARDOWN(TSYNC) for ( ; sib < self->sibling_count; ++sib) { struct tsync_sibling *s = &self->sibling[sib]; - void *status; if (!s->tid) continue; - if (pthread_kill(s->tid, 0)) { - pthread_cancel(s->tid); - pthread_join(s->tid, &status); - } + /* + * If a thread is still running, it may be stuck, so hit + * it over the head really hard. + */ + pthread_kill(s->tid, 9); } pthread_mutex_destroy(&self->mutex); pthread_cond_destroy(&self->cond); @@ -1987,9 +2004,9 @@ TEST_F(TSYNC, siblings_fail_prctl) pthread_mutex_unlock(&self->mutex); /* Ensure diverging sibling failed to call prctl. */ - pthread_join(self->sibling[0].tid, &status); + PTHREAD_JOIN(self->sibling[0].tid, &status); EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); - pthread_join(self->sibling[1].tid, &status); + PTHREAD_JOIN(self->sibling[1].tid, &status); EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); } @@ -2029,9 +2046,9 @@ TEST_F(TSYNC, two_siblings_with_ancestor) } pthread_mutex_unlock(&self->mutex); /* Ensure they are both killed and don't exit cleanly. */ - pthread_join(self->sibling[0].tid, &status); + PTHREAD_JOIN(self->sibling[0].tid, &status); EXPECT_EQ(0x0, (long)status); - pthread_join(self->sibling[1].tid, &status); + PTHREAD_JOIN(self->sibling[1].tid, &status); EXPECT_EQ(0x0, (long)status); } @@ -2055,9 +2072,9 @@ TEST_F(TSYNC, two_sibling_want_nnp) pthread_mutex_unlock(&self->mutex); /* Ensure they are both upset about lacking nnp. */ - pthread_join(self->sibling[0].tid, &status); + PTHREAD_JOIN(self->sibling[0].tid, &status); EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); - pthread_join(self->sibling[1].tid, &status); + PTHREAD_JOIN(self->sibling[1].tid, &status); EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); } @@ -2095,9 +2112,9 @@ TEST_F(TSYNC, two_siblings_with_no_filter) pthread_mutex_unlock(&self->mutex); /* Ensure they are both killed and don't exit cleanly. */ - pthread_join(self->sibling[0].tid, &status); + PTHREAD_JOIN(self->sibling[0].tid, &status); EXPECT_EQ(0x0, (long)status); - pthread_join(self->sibling[1].tid, &status); + PTHREAD_JOIN(self->sibling[1].tid, &status); EXPECT_EQ(0x0, (long)status); } @@ -2140,9 +2157,9 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) pthread_mutex_unlock(&self->mutex); /* Ensure they are both unkilled. */ - pthread_join(self->sibling[0].tid, &status); + PTHREAD_JOIN(self->sibling[0].tid, &status); EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); - pthread_join(self->sibling[1].tid, &status); + PTHREAD_JOIN(self->sibling[1].tid, &status); EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); } @@ -2199,7 +2216,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) TH_LOG("cond broadcast non-zero"); } pthread_mutex_unlock(&self->mutex); - pthread_join(self->sibling[sib].tid, &status); + PTHREAD_JOIN(self->sibling[sib].tid, &status); EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) @@ -2224,7 +2241,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter) TH_LOG("cond broadcast non-zero"); } pthread_mutex_unlock(&self->mutex); - pthread_join(self->sibling[sib].tid, &status); + PTHREAD_JOIN(self->sibling[sib].tid, &status); EXPECT_EQ(0, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore new file mode 100644 index 000000000000..c18dd8d83cee --- /dev/null +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README new file mode 100644 index 000000000000..970ff294fec8 --- /dev/null +++ b/tools/testing/selftests/tc-testing/README @@ -0,0 +1,102 @@ +tdc - Linux Traffic Control (tc) unit testing suite + +Author: Lucas Bates - lucasb@mojatatu.com + +tdc is a Python script to load tc unit tests from a separate JSON file and +execute them inside a network namespace dedicated to the task. + + +REQUIREMENTS +------------ + +* Minimum Python version of 3.4. Earlier 3.X versions may work but are not + guaranteed. + +* The kernel must have network namespace support + +* The kernel must have veth support available, as a veth pair is created + prior to running the tests. + +* All tc-related features must be built in or available as modules. + To check what is required in current setup run: + ./tdc.py -c + + Note: + In the current release, tdc run will abort due to a failure in setup or + teardown commands - which includes not being able to run a test simply + because the kernel did not support a specific feature. (This will be + handled in a future version - the current workaround is to run the tests + on specific test categories that your kernel supports) + + +BEFORE YOU RUN +-------------- + +The path to the tc executable that will be most commonly tested can be defined +in the tdc_config.py file. Find the 'TC' entry in the NAMES dictionary and +define the path. + +If you need to test a different tc executable on the fly, you can do so by +using the -p option when running tdc: + ./tdc.py -p /path/to/tc + + +RUNNING TDC +----------- + +To use tdc, root privileges are required. tdc will not run otherwise. + +All tests are executed inside a network namespace to prevent conflicts +within the host. + +Running tdc without any arguments will run all tests. Refer to the section +on command line arguments for more information, or run: + ./tdc.py -h + +tdc will list the test names as they are being run, and print a summary in +TAP (Test Anything Protocol) format when they are done. If tests fail, +output captured from the failing test will be printed immediately following +the failed test in the TAP output. + + +USER-DEFINED CONSTANTS +---------------------- + +The tdc_config.py file contains multiple values that can be altered to suit +your needs. Any value in the NAMES dictionary can be altered without affecting +the tests to be run. These values are used in the tc commands that will be +executed as part of the test. More will be added as test cases require. + +Example: + $TC qdisc add dev $DEV1 ingress + + +COMMAND LINE ARGUMENTS +---------------------- + +Run tdc.py -h to see the full list of available arguments. + +-p PATH Specify the tc executable located at PATH to be used on this + test run +-c Show the available test case categories in this test file +-c CATEGORY Run only tests that belong to CATEGORY +-f FILE Read test cases from the JSON file named FILE +-l [CATEGORY] List all test cases in the JSON file. If CATEGORY is + specified, list test cases matching that category. +-s ID Show the test case matching ID +-e ID Execute the test case identified by ID +-i Generate unique ID numbers for test cases with no existing + ID number + + +ACKNOWLEDGEMENTS +---------------- + +Thanks to: + +Jamal Hadi Salim, for providing valuable test cases +Keara Leibovitz, who wrote the CLI test driver that I used as a base for the + first version of the tc testing suite. This work was presented at + Netdev 1.2 Tokyo in October 2016. +Samir Hussain, for providing help while I dove into Python for the first time + and being a second eye for this code. diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt new file mode 100644 index 000000000000..6a266d811a78 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TODO.txt @@ -0,0 +1,10 @@ +tc Testing Suite To-Do list: + +- Determine what tc features are supported in the kernel. If features are not + present, prevent the related categories from running. + +- Add support for multiple versions of tc to run successively + +- Improve error messages when tdc aborts its run + +- Allow tdc to write its results to file diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt new file mode 100644 index 000000000000..4e09257bc443 --- /dev/null +++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt @@ -0,0 +1,69 @@ +tdc - Adding test cases for tdc + +Author: Lucas Bates - lucasb@mojatatu.com + +ADDING TEST CASES +----------------- + +User-defined tests should be added by defining a separate JSON file. This +will help prevent conflicts when updating the repository. Refer to +template.json for the required JSON format for test cases. + +Include the 'id' field, but do not assign a value. Running tdc with the -i +option will generate a unique ID for that test case. + +tdc will recursively search the 'tc' subdirectory for .json files. Any +test case files you create in these directories will automatically be included. +If you wish to store your custom test cases elsewhere, be sure to run tdc +with the -f argument and the path to your file. + +Be aware of required escape characters in the JSON data - particularly when +defining the match pattern. Refer to the tctests.json file for examples when +in doubt. + + +TEST CASE STRUCTURE +------------------- + +Each test case has required data: + +id: A unique alphanumeric value to identify a particular test case +name: Descriptive name that explains the command under test +category: A list of single-word descriptions covering what the command + under test is testing. Example: filter, actions, u32, gact, etc. +setup: The list of commands required to ensure the command under test + succeeds. For example: if testing a filter, the command to create + the qdisc would appear here. +cmdUnderTest: The tc command being tested itself. +expExitCode: The code returned by the command under test upon its termination. + tdc will compare this value against the actual returned value. +verifyCmd: The tc command to be run to verify successful execution. + For example: if the command under test creates a gact action, + verifyCmd should be "$TC actions show action gact" +matchPattern: A regular expression to be applied against the output of the + verifyCmd to prove the command under test succeeded. This pattern + should be as specific as possible so that a false positive is not + matched. +matchCount: How many times the regex in matchPattern should match. A value + of 0 is acceptable. +teardown: The list of commands to clean up after the test is completed. + The environment should be returned to the same state as when + this test was started: qdiscs deleted, actions flushed, etc. + + +SETUP/TEARDOWN ERRORS +--------------------- + +If an error is detected during the setup/teardown process, execution of the +tests will immediately stop with an error message and the namespace in which +the tests are run will be destroyed. This is to prevent inaccurate results +in the test cases. + +Repeated failures of the setup/teardown may indicate a problem with the test +case, or possibly even a bug in one of the commands that are not being tested. + +It's possible to include acceptable exit codes with the setup/teardown command +so that it doesn't halt the script for an error that doesn't matter. Turn the +individual command into a list, with the command being first, followed by all +acceptable exit codes for the command. + diff --git a/tools/testing/selftests/tc-testing/creating-testcases/template.json b/tools/testing/selftests/tc-testing/creating-testcases/template.json new file mode 100644 index 000000000000..87971744bdd4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/creating-testcases/template.json @@ -0,0 +1,40 @@ +[ + { + "id": "", + "name": "", + "category": [ + "", + "" + ], + "setup": [ + "" + ], + "cmdUnderTest": "", + "expExitCode": "", + "verifyCmd": "", + "matchPattern": "", + "matchCount": "", + "teardown": [ + "" + ] + }, + { + "id": "", + "name": "", + "category": [ + "", + "" + ], + "setup": [ + "" + ], + "cmdUnderTest": "", + "expExitCode": "", + "verifyCmd": "", + "matchPattern": "", + "matchCount": "", + "teardown": [ + "" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json new file mode 100644 index 000000000000..af519bc97a8e --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json @@ -0,0 +1,1115 @@ +[ + { + "id": "e89a", + "name": "Add valid pass action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pass index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "a02c", + "name": "Add valid pipe action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pipe index 6", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "feef", + "name": "Add valid reclassify action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action reclassify index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "8a7a", + "name": "Add valid drop action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 30", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "9a52", + "name": "Add valid continue action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action continue index 432", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "d700", + "name": "Add invalid action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pump index 386", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action.*index 386 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "9215", + "name": "Add action with duplicate index", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pipe index 15" + ], + "cmdUnderTest": "$TC actions add action drop index 15", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "798e", + "name": "Add action with index exceeding 32-bit maximum", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 4294967296", + "expExitCode": "255", + "verifyCmd": "actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "22be", + "name": "Add action with index at 32-bit maximum", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action drop index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "ac2a", + "name": "List actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 101", + "$TC actions add action reclassify index 102", + "$TC actions add action reclassify index 103", + "$TC actions add action reclassify index 104", + "$TC actions add action reclassify index 105" + ], + "cmdUnderTest": "$TC actions list action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify", + "matchCount": "5", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "63ec", + "name": "Delete pass action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pass index 1" + ], + "cmdUnderTest": "$TC actions del action gact index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "46be", + "name": "Delete pipe action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pipe index 9" + ], + "cmdUnderTest": "$TC actions del action gact index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "2e08", + "name": "Delete reclassify action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 65536" + ], + "cmdUnderTest": "$TC actions del action gact index 65536", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "99c4", + "name": "Delete drop action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 16" + ], + "cmdUnderTest": "$TC actions del action gact index 16", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "fb6b", + "name": "Delete continue action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action continue index 32" + ], + "cmdUnderTest": "$TC actions del action gact index 32", + "expExitCode": "0", + "verifyCmd": "actions list action gact", + "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "0eb3", + "name": "Delete non-existent action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions del action gact index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "5124", + "name": "Add mirred mirror to egress action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "6fb4", + "name": "Add mirred redirect to egress action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "ba38", + "name": "Get mirred actions", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress mirror index 1 dev lo", + "$TC actions add action mirred egress redirect index 2 dev lo" + ], + "cmdUnderTest": "$TC actions show action mirred", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "[Mirror|Redirect] to device lo", + "matchCount": "2", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "d7c0", + "name": "Add invalid mirred direction", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "e213", + "name": "Add invalid mirred action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "2d89", + "name": "Add mirred action with invalid device", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "300b", + "name": "Add mirred action with duplicate index", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress redirect index 15 dev lo" + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "a70e", + "name": "Delete mirred mirror action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress mirror index 5 dev lo" + ], + "cmdUnderTest": "$TC actions del action mirred index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "3fb3", + "name": "Delete mirred redirect action", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred egress redirect index 5 dev lo" + ], + "cmdUnderTest": "$TC actions del action mirred index 5", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "b078", + "name": "Add simple action", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "6d4c", + "name": "Add simple action with duplicate index", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Aruba\" index 4" + ], + "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "2542", + "name": "List simple actions", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Rock\"", + "$TC actions add action simple sdata \"Paper\"", + "$TC actions add action simple sdata \"Scissors\" index 98" + ], + "cmdUnderTest": "$TC actions list action simple", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", + "matchCount": "3", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "ea67", + "name": "Delete simple action", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Blinkenlights\" index 1" + ], + "cmdUnderTest": "$TC actions delete action simple index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "8ff1", + "name": "Flush simple actions", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"Kirk\"", + "$TC actions add action simple sdata \"Spock\" index 50", + "$TC actions add action simple sdata \"McCoy\" index 9" + ], + "cmdUnderTest": "$TC actions flush action simple", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>", + "matchCount": "0", + "teardown": [ + "" + ] + }, + { + "id": "6236", + "name": "Add skbedit action with valid mark", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "407b", + "name": "Add skbedit action with invalid mark", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 666777888999", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "081d", + "name": "Add skbedit action with priority", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit prio 99", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit priority :99", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "cc37", + "name": "Add skbedit action with invalid priority", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit prio foo", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit priority", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "3c95", + "name": "Add skbedit action with queue_mapping", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit queue_mapping 909", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "985c", + "name": "Add skbedit action with invalid queue_mapping", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit queue_mapping", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "224f", + "name": "Add skbedit action with ptype host", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype host", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype host", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "d1a3", + "name": "Add skbedit action with ptype otherhost", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype otherhost", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype otherhost", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "b9c6", + "name": "Add skbedit action with invalid ptype", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype openair", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype openair", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "5172", + "name": "List skbedit actions", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit ptype otherhost", + "$TC actions add action skbedit ptype broadcast", + "$TC actions add action skbedit mark 59", + "$TC actions add action skbedit mark 409" + ], + "cmdUnderTest": "$TC actions list action skbedit", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit", + "matchCount": "4", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "a6d6", + "name": "Add skbedit action with index", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "index 4040404040", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "38f3", + "name": "Delete skbedit action", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit mark 42 index 9009" + ], + "cmdUnderTest": "$TC actions del action skbedit index 9009", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 42", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ce97", + "name": "Flush skbedit actions", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + "$TC actions add action skbedit mark 500", + "$TC actions add action skbedit mark 501", + "$TC actions add action skbedit mark 502", + "$TC actions add action skbedit mark 503", + "$TC actions add action skbedit mark 504", + "$TC actions add action skbedit mark 505", + "$TC actions add action skbedit mark 506" + ], + "cmdUnderTest": "$TC actions flush action skbedit", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit", + "matchCount": "0", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "f02c", + "name": "Replace gact action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 10", + "$TC actions add action drop index 12" + ], + "cmdUnderTest": "$TC actions replace action ok index 12", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "action order [0-9]*: gact action pass", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "525f", + "name": "Get gact action by index", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action drop index 3900800700" + ], + "cmdUnderTest": "$TC actions get action gact index 3900800700", + "expExitCode": "0", + "verifyCmd": "$TC actions get action gact index 3900800700", + "matchPattern": "index 3900800700", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] + } +]
\ No newline at end of file diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json new file mode 100644 index 000000000000..c727b96a59b0 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -0,0 +1,21 @@ +[ + { + "id": "e9a3", + "name": "Add u32 with source match", + "category": [ + "filter", + "u32" + ], + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", + "matchPattern": "match 7f000002/ffffffff at 12", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + } +]
\ No newline at end of file diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py new file mode 100755 index 000000000000..cd61b7844c0d --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -0,0 +1,413 @@ +#!/usr/bin/env python3 + +""" +tdc.py - Linux tc (Traffic Control) unit test driver + +Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com> +""" + +import re +import os +import sys +import argparse +import json +import subprocess +from collections import OrderedDict +from string import Template + +from tdc_config import * +from tdc_helper import * + + +USE_NS = True + + +def replace_keywords(cmd): + """ + For a given executable command, substitute any known + variables contained within NAMES with the correct values + """ + tcmd = Template(cmd) + subcmd = tcmd.safe_substitute(NAMES) + return subcmd + + +def exec_cmd(command, nsonly=True): + """ + Perform any required modifications on an executable command, then run + it in a subprocess and return the results. + """ + if (USE_NS and nsonly): + command = 'ip netns exec $NS ' + command + + if '$' in command: + command = replace_keywords(command) + + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput + + +def prepare_env(cmdlist): + """ + Execute the setup/teardown commands for a test case. Optionally + terminate test execution if the command fails. + """ + for cmdinfo in cmdlist: + if (type(cmdinfo) == list): + exit_codes = cmdinfo[1:] + cmd = cmdinfo[0] + else: + exit_codes = [0] + cmd = cmdinfo + + if (len(cmd) == 0): + continue + + (proc, foutput) = exec_cmd(cmd) + + if proc.returncode not in exit_codes: + print + print("Could not execute:") + print(cmd) + print("\nError message:") + print(foutput) + print("\nAborting test run.") + ns_destroy() + exit(1) + + +def test_runner(filtered_tests): + """ + Driver function for the unit tests. + + Prints information about the tests being run, executes the setup and + teardown commands and the command under test itself. Also determines + success/failure based on the information in the test case and generates + TAP output accordingly. + """ + testlist = filtered_tests + tcount = len(testlist) + index = 1 + tap = str(index) + ".." + str(tcount) + "\n" + + for tidx in testlist: + result = True + tresult = "" + print("Test " + tidx["id"] + ": " + tidx["name"]) + prepare_env(tidx["setup"]) + (p, procout) = exec_cmd(tidx["cmdUnderTest"]) + exit_code = p.returncode + + if (exit_code != int(tidx["expExitCode"])): + result = False + print("exit:", exit_code, int(tidx["expExitCode"])) + print(procout) + else: + match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL) + (p, procout) = exec_cmd(tidx["verifyCmd"]) + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + result = False + + if result == True: + tresult += "ok " + else: + tresult += "not ok " + tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n" + + if result == False: + tap += procout + + prepare_env(tidx["teardown"]) + index += 1 + + return tap + + +def ns_create(): + """ + Create the network namespace in which the tests will be run and set up + the required network devices for it. + """ + if (USE_NS): + cmd = 'ip netns add $NS' + exec_cmd(cmd, False) + cmd = 'ip link add $DEV0 type veth peer name $DEV1' + exec_cmd(cmd, False) + cmd = 'ip link set $DEV1 netns $NS' + exec_cmd(cmd, False) + cmd = 'ip link set $DEV0 up' + exec_cmd(cmd, False) + cmd = 'ip -s $NS link set $DEV1 up' + exec_cmd(cmd, False) + + +def ns_destroy(): + """ + Destroy the network namespace for testing (and any associated network + devices as well) + """ + if (USE_NS): + cmd = 'ip netns delete $NS' + exec_cmd(cmd, False) + + +def has_blank_ids(idlist): + """ + Search the list for empty ID fields and return true/false accordingly. + """ + return not(all(k for k in idlist)) + + +def load_from_file(filename): + """ + Open the JSON file containing the test cases and return them as an + ordered dictionary object. + """ + with open(filename) as test_data: + testlist = json.load(test_data, object_pairs_hook=OrderedDict) + idlist = get_id_list(testlist) + if (has_blank_ids(idlist)): + for k in testlist: + k['filename'] = filename + return testlist + + +def args_parse(): + """ + Create the argument parser. + """ + parser = argparse.ArgumentParser(description='Linux TC unit tests') + return parser + + +def set_args(parser): + """ + Set the command line arguments for tdc. + """ + parser.add_argument('-p', '--path', type=str, + help='The full path to the tc executable to use') + parser.add_argument('-c', '--category', type=str, nargs='?', const='+c', + help='Run tests only from the specified category, or if no category is specified, list known categories.') + parser.add_argument('-f', '--file', type=str, + help='Run tests from the specified file') + parser.add_argument('-l', '--list', type=str, nargs='?', const="", metavar='CATEGORY', + help='List all test cases, or those only within the specified category') + parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID', + help='Display the test case with specified id') + parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID', + help='Execute the single test case with specified ID') + parser.add_argument('-i', '--id', action='store_true', dest='gen_id', + help='Generate ID numbers for new test cases') + return parser + return parser + + +def check_default_settings(args): + """ + Process any arguments overriding the default settings, and ensure the + settings are correct. + """ + # Allow for overriding specific settings + global NAMES + + if args.path != None: + NAMES['TC'] = args.path + if not os.path.isfile(NAMES['TC']): + print("The specified tc path " + NAMES['TC'] + " does not exist.") + exit(1) + + +def get_id_list(alltests): + """ + Generate a list of all IDs in the test cases. + """ + return [x["id"] for x in alltests] + + +def check_case_id(alltests): + """ + Check for duplicate test case IDs. + """ + idl = get_id_list(alltests) + return [x for x in idl if idl.count(x) > 1] + + +def does_id_exist(alltests, newid): + """ + Check if a given ID already exists in the list of test cases. + """ + idl = get_id_list(alltests) + return (any(newid == x for x in idl)) + + +def generate_case_ids(alltests): + """ + If a test case has a blank ID field, generate a random hex ID for it + and then write the test cases back to disk. + """ + import random + for c in alltests: + if (c["id"] == ""): + while True: + newid = str('%04x' % random.randrange(16**4)) + if (does_id_exist(alltests, newid)): + continue + else: + c['id'] = newid + break + + ufilename = [] + for c in alltests: + if ('filename' in c): + ufilename.append(c['filename']) + ufilename = get_unique_item(ufilename) + for f in ufilename: + testlist = [] + for t in alltests: + if 'filename' in t: + if t['filename'] == f: + del t['filename'] + testlist.append(t) + outfile = open(f, "w") + json.dump(testlist, outfile, indent=4) + outfile.close() + + +def get_test_cases(args): + """ + If a test case file is specified, retrieve tests from that file. + Otherwise, glob for all json files in subdirectories and load from + each one. + """ + import fnmatch + if args.file != None: + if not os.path.isfile(args.file): + print("The specified test case file " + args.file + " does not exist.") + exit(1) + flist = [args.file] + else: + flist = [] + for root, dirnames, filenames in os.walk('tc-tests'): + for filename in fnmatch.filter(filenames, '*.json'): + flist.append(os.path.join(root, filename)) + alltests = list() + for casefile in flist: + alltests = alltests + (load_from_file(casefile)) + return alltests + + +def set_operation_mode(args): + """ + Load the test case data and process remaining arguments to determine + what the script should do for this run, and call the appropriate + function. + """ + alltests = get_test_cases(args) + + if args.gen_id: + idlist = get_id_list(alltests) + if (has_blank_ids(idlist)): + alltests = generate_case_ids(alltests) + else: + print("No empty ID fields found in test files.") + exit(0) + + duplicate_ids = check_case_id(alltests) + if (len(duplicate_ids) > 0): + print("The following test case IDs are not unique:") + print(str(set(duplicate_ids))) + print("Please correct them before continuing.") + exit(1) + + ucat = get_test_categories(alltests) + + if args.showID: + show_test_case_by_id(alltests, args.showID[0]) + exit(0) + + if args.execute: + target_id = args.execute[0] + else: + target_id = "" + + if args.category: + if (args.category == '+c'): + print("Available categories:") + print_sll(ucat) + exit(0) + else: + target_category = args.category + else: + target_category = "" + + + testcases = get_categorized_testlist(alltests, ucat) + + if args.list: + if (len(args.list) == 0): + list_test_cases(alltests) + exit(0) + elif(len(args.list > 0)): + if (args.list not in ucat): + print("Unknown category " + args.list) + print("Available categories:") + print_sll(ucat) + exit(1) + list_test_cases(testcases[args.list]) + exit(0) + + if (os.geteuid() != 0): + print("This script must be run with root privileges.\n") + exit(1) + + ns_create() + + if (len(target_category) == 0): + if (len(target_id) > 0): + alltests = list(filter(lambda x: target_id in x['id'], alltests)) + if (len(alltests) == 0): + print("Cannot find a test case with ID matching " + target_id) + exit(1) + catresults = test_runner(alltests) + print("All test results: " + "\n\n" + catresults) + elif (len(target_category) > 0): + if (target_category not in ucat): + print("Specified category is not present in this file.") + exit(1) + else: + catresults = test_runner(testcases[target_category]) + print("Category " + target_category + "\n\n" + catresults) + + ns_destroy() + + +def main(): + """ + Start of execution; set up argument parser and get the arguments, + and start operations. + """ + parser = args_parse() + parser = set_args(parser) + (args, remaining) = parser.parse_known_args() + check_default_settings(args) + + set_operation_mode(args) + + exit(0) + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py new file mode 100644 index 000000000000..01087375a7c3 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -0,0 +1,17 @@ +""" +tdc_config.py - tdc user-specified values + +Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com> +""" + +# Dictionary containing all values that can be substituted in executable +# commands. +NAMES = { + # Substitute your own tc path here + 'TC': '/sbin/tc', + # Name of veth devices to be created for the namespace + 'DEV0': 'v0p0', + 'DEV1': 'v0p1', + # Name of the namespace to use + 'NS': 'tcut' + } diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py new file mode 100644 index 000000000000..c3254f861fb2 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -0,0 +1,75 @@ +""" +tdc_helper.py - tdc helper functions + +Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com> +""" + +def get_categorized_testlist(alltests, ucat): + """ Sort the master test list into categories. """ + testcases = dict() + + for category in ucat: + testcases[category] = list(filter(lambda x: category in x['category'], alltests)) + + return(testcases) + + +def get_unique_item(lst): + """ For a list, return a set of the unique items in the list. """ + return list(set(lst)) + + +def get_test_categories(alltests): + """ Discover all unique test categories present in the test case file. """ + ucat = [] + for t in alltests: + ucat.extend(get_unique_item(t['category'])) + ucat = get_unique_item(ucat) + return ucat + +def list_test_cases(testlist): + """ Print IDs and names of all test cases. """ + for curcase in testlist: + print(curcase['id'] + ': (' + ', '.join(curcase['category']) + ") " + curcase['name']) + + +def list_categories(testlist): + """ Show all categories that are present in a test case file. """ + categories = set(map(lambda x: x['category'], testlist)) + print("Available categories:") + print(", ".join(str(s) for s in categories)) + print("") + + +def print_list(cmdlist): + """ Print a list of strings prepended with a tab. """ + for l in cmdlist: + if (type(l) == list): + print("\t" + str(l[0])) + else: + print("\t" + str(l)) + + +def print_sll(items): + print("\n".join(str(s) for s in items)) + + +def print_test_case(tcase): + """ Pretty-printing of a given test case. """ + for k in tcase.keys(): + if (type(tcase[k]) == list): + print(k + ":") + print_list(tcase[k]) + else: + print(k + ": " + tcase[k]) + + +def show_test_case_by_id(testlist, caseID): + """ Find the specified test case to pretty-print. """ + if not any(d.get('id', None) == caseID for d in testlist): + print("That ID does not exist.") + exit(1) + else: + print_test_case(next((d for d in testlist if d['id'] == caseID))) + + diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 5fa1d7e9a915..5801bbefbe89 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ BUILD_FLAGS = -DKTEST CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) -LDFLAGS += -lrt -lpthread +LDFLAGS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges @@ -8,7 +8,7 @@ TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \ - skew_consistency clocksource-switch leap-a-day \ + skew_consistency clocksource-switch freq-step leap-a-day \ leapcrash set-tai set-2038 set-tz @@ -24,6 +24,7 @@ run_destructive_tests: run_tests ./change_skew ./skew_consistency ./clocksource-switch + ./freq-step ./leap-a-day -s -i 10 ./leapcrash ./set-tz diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c new file mode 100644 index 000000000000..e8c61830825a --- /dev/null +++ b/tools/testing/selftests/timers/freq-step.c @@ -0,0 +1,268 @@ +/* + * This test checks the response of the system clock to frequency + * steps made with adjtimex(). The frequency error and stability of + * the CLOCK_MONOTONIC clock relative to the CLOCK_MONOTONIC_RAW clock + * is measured in two intervals following the step. The test fails if + * values from the second interval exceed specified limits. + * + * Copyright (C) Miroslav Lichvar <mlichvar@redhat.com> 2017 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <math.h> +#include <stdio.h> +#include <sys/timex.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define SAMPLES 100 +#define SAMPLE_READINGS 10 +#define MEAN_SAMPLE_INTERVAL 0.1 +#define STEP_INTERVAL 1.0 +#define MAX_PRECISION 100e-9 +#define MAX_FREQ_ERROR 10e-6 +#define MAX_STDDEV 1000e-9 + +struct sample { + double offset; + double time; +}; + +static time_t mono_raw_base; +static time_t mono_base; +static long user_hz; +static double precision; +static double mono_freq_offset; + +static double diff_timespec(struct timespec *ts1, struct timespec *ts2) +{ + return ts1->tv_sec - ts2->tv_sec + (ts1->tv_nsec - ts2->tv_nsec) / 1e9; +} + +static double get_sample(struct sample *sample) +{ + double delay, mindelay = 0.0; + struct timespec ts1, ts2, ts3; + int i; + + for (i = 0; i < SAMPLE_READINGS; i++) { + clock_gettime(CLOCK_MONOTONIC_RAW, &ts1); + clock_gettime(CLOCK_MONOTONIC, &ts2); + clock_gettime(CLOCK_MONOTONIC_RAW, &ts3); + + ts1.tv_sec -= mono_raw_base; + ts2.tv_sec -= mono_base; + ts3.tv_sec -= mono_raw_base; + + delay = diff_timespec(&ts3, &ts1); + if (delay <= 1e-9) { + i--; + continue; + } + + if (!i || delay < mindelay) { + sample->offset = diff_timespec(&ts2, &ts1); + sample->offset -= delay / 2.0; + sample->time = ts1.tv_sec + ts1.tv_nsec / 1e9; + mindelay = delay; + } + } + + return mindelay; +} + +static void reset_ntp_error(void) +{ + struct timex txc; + + txc.modes = ADJ_SETOFFSET; + txc.time.tv_sec = 0; + txc.time.tv_usec = 0; + + if (adjtimex(&txc) < 0) { + perror("[FAIL] adjtimex"); + ksft_exit_fail(); + } +} + +static void set_frequency(double freq) +{ + struct timex txc; + int tick_offset; + + tick_offset = 1e6 * freq / user_hz; + + txc.modes = ADJ_TICK | ADJ_FREQUENCY; + txc.tick = 1000000 / user_hz + tick_offset; + txc.freq = (1e6 * freq - user_hz * tick_offset) * (1 << 16); + + if (adjtimex(&txc) < 0) { + perror("[FAIL] adjtimex"); + ksft_exit_fail(); + } +} + +static void regress(struct sample *samples, int n, double *intercept, + double *slope, double *r_stddev, double *r_max) +{ + double x, y, r, x_sum, y_sum, xy_sum, x2_sum, r2_sum; + int i; + + x_sum = 0.0, y_sum = 0.0, xy_sum = 0.0, x2_sum = 0.0; + + for (i = 0; i < n; i++) { + x = samples[i].time; + y = samples[i].offset; + + x_sum += x; + y_sum += y; + xy_sum += x * y; + x2_sum += x * x; + } + + *slope = (xy_sum - x_sum * y_sum / n) / (x2_sum - x_sum * x_sum / n); + *intercept = (y_sum - *slope * x_sum) / n; + + *r_max = 0.0, r2_sum = 0.0; + + for (i = 0; i < n; i++) { + x = samples[i].time; + y = samples[i].offset; + r = fabs(x * *slope + *intercept - y); + if (*r_max < r) + *r_max = r; + r2_sum += r * r; + } + + *r_stddev = sqrt(r2_sum / n); +} + +static int run_test(int calibration, double freq_base, double freq_step) +{ + struct sample samples[SAMPLES]; + double intercept, slope, stddev1, max1, stddev2, max2; + double freq_error1, freq_error2; + int i; + + set_frequency(freq_base); + + for (i = 0; i < 10; i++) + usleep(1e6 * MEAN_SAMPLE_INTERVAL / 10); + + reset_ntp_error(); + + set_frequency(freq_base + freq_step); + + for (i = 0; i < 10; i++) + usleep(rand() % 2000000 * STEP_INTERVAL / 10); + + set_frequency(freq_base); + + for (i = 0; i < SAMPLES; i++) { + usleep(rand() % 2000000 * MEAN_SAMPLE_INTERVAL); + get_sample(&samples[i]); + } + + if (calibration) { + regress(samples, SAMPLES, &intercept, &slope, &stddev1, &max1); + mono_freq_offset = slope; + printf("CLOCK_MONOTONIC_RAW frequency offset: %11.3f ppm\n", + 1e6 * mono_freq_offset); + return 0; + } + + regress(samples, SAMPLES / 2, &intercept, &slope, &stddev1, &max1); + freq_error1 = slope * (1.0 - mono_freq_offset) - mono_freq_offset - + freq_base; + + regress(samples + SAMPLES / 2, SAMPLES / 2, &intercept, &slope, + &stddev2, &max2); + freq_error2 = slope * (1.0 - mono_freq_offset) - mono_freq_offset - + freq_base; + + printf("%6.0f %+10.3f %6.0f %7.0f %+10.3f %6.0f %7.0f\t", + 1e6 * freq_step, + 1e6 * freq_error1, 1e9 * stddev1, 1e9 * max1, + 1e6 * freq_error2, 1e9 * stddev2, 1e9 * max2); + + if (fabs(freq_error2) > MAX_FREQ_ERROR || stddev2 > MAX_STDDEV) { + printf("[FAIL]\n"); + return 1; + } + + printf("[OK]\n"); + return 0; +} + +static void init_test(void) +{ + struct timespec ts; + struct sample sample; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts)) { + perror("[FAIL] clock_gettime(CLOCK_MONOTONIC_RAW)"); + ksft_exit_fail(); + } + + mono_raw_base = ts.tv_sec; + + if (clock_gettime(CLOCK_MONOTONIC, &ts)) { + perror("[FAIL] clock_gettime(CLOCK_MONOTONIC)"); + ksft_exit_fail(); + } + + mono_base = ts.tv_sec; + + user_hz = sysconf(_SC_CLK_TCK); + + precision = get_sample(&sample) / 2.0; + printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t", + 1e9 * precision); + + if (precision > MAX_PRECISION) { + printf("[SKIP]\n"); + ksft_exit_skip(); + } + + printf("[OK]\n"); + srand(ts.tv_sec ^ ts.tv_nsec); + + run_test(1, 0.0, 0.0); +} + +int main(int argc, char **argv) +{ + double freq_base, freq_step; + int i, j, fails = 0; + + init_test(); + + printf("Checking response to frequency step:\n"); + printf(" Step 1st interval 2nd interval\n"); + printf(" Freq Dev Max Freq Dev Max\n"); + + for (i = 2; i >= 0; i--) { + for (j = 0; j < 5; j++) { + freq_base = (rand() % (1 << 24) - (1 << 23)) / 65536e6; + freq_step = 10e-6 * (1 << (6 * i)); + fails += run_test(0, freq_base, freq_step); + } + } + + set_frequency(0.0); + + if (fails) + ksft_exit_fail(); + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index caf1bc9257c4..74c60e8759a0 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -118,7 +118,7 @@ int consistency_test(int clock_type, unsigned long seconds) start_str = ctime(&t); while (seconds == -1 || now - then < seconds) { - inconsistent = 0; + inconsistent = -1; /* Fill list */ for (i = 0; i < CALLS_PER_LOOP; i++) @@ -130,7 +130,7 @@ int consistency_test(int clock_type, unsigned long seconds) inconsistent = i; /* display inconsistency */ - if (inconsistent) { + if (inconsistent >= 0) { unsigned long long delta; printf("\%s\n", start_str); diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index 0692d99b6d8f..2d89b5f686b1 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -387,15 +387,17 @@ int main (int argc, char **argv) /* pick defaults that works with all speeds, without short packets. * * Best per-frame data rates: - * high speed, bulk 512 * 13 * 8 = 53248 - * interrupt 1024 * 3 * 8 = 24576 - * full speed, bulk/intr 64 * 19 = 1216 - * interrupt 64 * 1 = 64 - * low speed, interrupt 8 * 1 = 8 + * super speed,bulk 1024 * 16 * 8 = 131072 + * interrupt 1024 * 3 * 8 = 24576 + * high speed, bulk 512 * 13 * 8 = 53248 + * interrupt 1024 * 3 * 8 = 24576 + * full speed, bulk/intr 64 * 19 = 1216 + * interrupt 64 * 1 = 64 + * low speed, interrupt 8 * 1 = 8 */ param.iterations = 1000; param.length = 1024; - param.vary = 512; + param.vary = 1024; param.sglen = 32; /* for easy use when hotplugging */ @@ -457,7 +459,7 @@ usage: "\t-c iterations default 1000\n" "\t-s transfer length default 1024\n" "\t-g sglen default 32\n" - "\t-v vary default 512\n", + "\t-v vary default 1024\n", argv[0]); return 1; } diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index f659c146cdc8..9bd2cd71645d 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -7,6 +7,7 @@ #include <limits.h> #include <netdb.h> #include <libudev.h> +#include <dirent.h> #include "sysfs_utils.h" #undef PROGNAME @@ -35,18 +36,11 @@ err: return NULL; } - - static int parse_status(const char *value) { int ret = 0; char *c; - - for (int i = 0; i < vhci_driver->nports; i++) - memset(&vhci_driver->idev[i], 0, sizeof(vhci_driver->idev[i])); - - /* skip a header line */ c = strchr(value, '\n'); if (!c) @@ -57,9 +51,11 @@ static int parse_status(const char *value) int port, status, speed, devid; unsigned long socket; char lbusid[SYSFS_BUS_ID_SIZE]; + struct usbip_imported_device *idev; + char hub[3]; - ret = sscanf(c, "%d %d %d %x %lx %31s\n", - &port, &status, &speed, + ret = sscanf(c, "%2s %d %d %d %x %lx %31s\n", + hub, &port, &status, &speed, &devid, &socket, lbusid); if (ret < 5) { @@ -67,34 +63,36 @@ static int parse_status(const char *value) BUG(); } - dbg("port %d status %d speed %d devid %x", - port, status, speed, devid); + dbg("hub %s port %d status %d speed %d devid %x", + hub, port, status, speed, devid); dbg("socket %lx lbusid %s", socket, lbusid); - /* if a device is connected, look at it */ - { - struct usbip_imported_device *idev = &vhci_driver->idev[port]; + idev = &vhci_driver->idev[port]; + memset(idev, 0, sizeof(*idev)); + + if (strncmp("hs", hub, 2) == 0) + idev->hub = HUB_SPEED_HIGH; + else /* strncmp("ss", hub, 2) == 0 */ + idev->hub = HUB_SPEED_SUPER; - idev->port = port; - idev->status = status; + idev->port = port; + idev->status = status; - idev->devid = devid; + idev->devid = devid; - idev->busnum = (devid >> 16); - idev->devnum = (devid & 0x0000ffff); + idev->busnum = (devid >> 16); + idev->devnum = (devid & 0x0000ffff); - if (idev->status != VDEV_ST_NULL - && idev->status != VDEV_ST_NOTASSIGNED) { - idev = imported_device_init(idev, lbusid); - if (!idev) { - dbg("imported_device_init failed"); - return -1; - } + if (idev->status != VDEV_ST_NULL + && idev->status != VDEV_ST_NOTASSIGNED) { + idev = imported_device_init(idev, lbusid); + if (!idev) { + dbg("imported_device_init failed"); + return -1; } } - /* go to the next line */ c = strchr(c, '\n'); if (!c) @@ -107,18 +105,33 @@ static int parse_status(const char *value) return 0; } +#define MAX_STATUS_NAME 16 + static int refresh_imported_device_list(void) { const char *attr_status; + char status[MAX_STATUS_NAME+1] = "status"; + int i, ret; - attr_status = udev_device_get_sysattr_value(vhci_driver->hc_device, - "status"); - if (!attr_status) { - err("udev_device_get_sysattr_value failed"); - return -1; + for (i = 0; i < vhci_driver->ncontrollers; i++) { + if (i > 0) + snprintf(status, sizeof(status), "status.%d", i); + + attr_status = udev_device_get_sysattr_value(vhci_driver->hc_device, + status); + if (!attr_status) { + err("udev_device_get_sysattr_value failed"); + return -1; + } + + dbg("controller %d", i); + + ret = parse_status(attr_status); + if (ret != 0) + return ret; } - return parse_status(attr_status); + return 0; } static int get_nports(void) @@ -134,6 +147,33 @@ static int get_nports(void) return (int)strtoul(attr_nports, NULL, 10); } +static int vhci_hcd_filter(const struct dirent *dirent) +{ + return strcmp(dirent->d_name, "vhci_hcd") >= 0; +} + +static int get_ncontrollers(void) +{ + struct dirent **namelist; + struct udev_device *platform; + int n; + + platform = udev_device_get_parent(vhci_driver->hc_device); + if (platform == NULL) + return -1; + + n = scandir(udev_device_get_syspath(platform), &namelist, vhci_hcd_filter, NULL); + if (n < 0) + err("scandir failed"); + else { + for (int i = 0; i < n; i++) + free(namelist[i]); + free(namelist); + } + + return n; +} + /* * Read the given port's record. * @@ -213,16 +253,31 @@ int usbip_vhci_driver_open(void) vhci_driver->hc_device = udev_device_new_from_subsystem_sysname(udev_context, USBIP_VHCI_BUS_TYPE, - USBIP_VHCI_DRV_NAME); + USBIP_VHCI_DEVICE_NAME); if (!vhci_driver->hc_device) { err("udev_device_new_from_subsystem_sysname failed"); goto err; } vhci_driver->nports = get_nports(); - dbg("available ports: %d", vhci_driver->nports); + if (vhci_driver->nports <= 0) { + err("no available ports"); + goto err; + } else if (vhci_driver->nports > MAXNPORT) { + err("port number exceeds %d", MAXNPORT); + goto err; + } + + vhci_driver->ncontrollers = get_ncontrollers(); + dbg("available controllers: %d", vhci_driver->ncontrollers); + + if (vhci_driver->ncontrollers <=0) { + err("no available usb controllers"); + goto err; + } + if (refresh_imported_device_list()) goto err; @@ -270,11 +325,15 @@ err: } -int usbip_vhci_get_free_port(void) +int usbip_vhci_get_free_port(uint32_t speed) { for (int i = 0; i < vhci_driver->nports; i++) { + if (speed == USB_SPEED_SUPER && + vhci_driver->idev[i].hub != HUB_SPEED_SUPER) + continue; + if (vhci_driver->idev[i].status == VDEV_ST_NULL) - return i; + return vhci_driver->idev[i].port; } return -1; diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h index fa2316cf2cac..4898d3bafb10 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.h +++ b/tools/usb/usbip/libsrc/vhci_driver.h @@ -11,9 +11,16 @@ #include "usbip_common.h" #define USBIP_VHCI_BUS_TYPE "platform" +#define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0" #define MAXNPORT 128 +enum hub_speed { + HUB_SPEED_HIGH = 0, + HUB_SPEED_SUPER, +}; + struct usbip_imported_device { + enum hub_speed hub; uint8_t port; uint32_t status; @@ -31,6 +38,7 @@ struct usbip_vhci_driver { /* /sys/devices/platform/vhci_hcd */ struct udev_device *hc_device; + int ncontrollers; int nports; struct usbip_imported_device idev[MAXNPORT]; }; @@ -44,7 +52,7 @@ void usbip_vhci_driver_close(void); int usbip_vhci_refresh_device_list(void); -int usbip_vhci_get_free_port(void); +int usbip_vhci_get_free_port(uint32_t speed); int usbip_vhci_attach_device2(uint8_t port, int sockfd, uint32_t devid, uint32_t speed); diff --git a/tools/usb/usbip/src/usbip_attach.c b/tools/usb/usbip/src/usbip_attach.c index 70a6b507fb62..6e89768ffe30 100644 --- a/tools/usb/usbip/src/usbip_attach.c +++ b/tools/usb/usbip/src/usbip_attach.c @@ -94,6 +94,7 @@ static int import_device(int sockfd, struct usbip_usb_device *udev) { int rc; int port; + uint32_t speed = udev->speed; rc = usbip_vhci_driver_open(); if (rc < 0) { @@ -101,13 +102,15 @@ static int import_device(int sockfd, struct usbip_usb_device *udev) return -1; } - port = usbip_vhci_get_free_port(); + port = usbip_vhci_get_free_port(speed); if (port < 0) { err("no free port"); usbip_vhci_driver_close(); return -1; } + dbg("got free port %d", port); + rc = usbip_vhci_attach_device(port, sockfd, udev->busnum, udev->devnum, udev->speed); if (rc < 0) { |