summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c6
-rw-r--r--kernel/debug/kdb/kdb_io.c16
-rw-r--r--kernel/debug/kdb/kdb_keyboard.c2
-rw-r--r--kernel/debug/kdb/kdb_private.h1
-rw-r--r--kernel/events/core.c1
-rwxr-xr-xkernel/gen_kheaders.sh9
-rw-r--r--kernel/kprobes.c1
-rw-r--r--kernel/livepatch/transition.c2
-rw-r--r--kernel/module/internal.h12
-rw-r--r--kernel/module/main.c39
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/sys.c12
-rw-r--r--kernel/sys_ni.c110
-rw-r--r--kernel/trace/Kconfig41
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/bpf_trace.c6
-rw-r--r--kernel/trace/fgraph.c26
-rw-r--r--kernel/trace/fprobe.c17
-rw-r--r--kernel/trace/ftrace.c37
-rw-r--r--kernel/trace/rethook.c3
-rw-r--r--kernel/trace/trace.c15
-rw-r--r--kernel/trace/trace.h13
-rw-r--r--kernel/trace/trace_boot.c8
-rw-r--r--kernel/trace/trace_entries.h26
-rw-r--r--kernel/trace/trace_eprobe.c44
-rw-r--r--kernel/trace/trace_fprobe.c1199
-rw-r--r--kernel/trace/trace_functions_graph.c93
-rw-r--r--kernel/trace/trace_kprobe.c35
-rw-r--r--kernel/trace/trace_osnoise.c477
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/trace/trace_probe.c659
-rw-r--r--kernel/trace/trace_probe.h49
-rw-r--r--kernel/trace/trace_uprobe.c8
34 files changed, 2657 insertions, 319 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 29fe21099298..817204d53372 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -7891,10 +7891,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
pr_err("missing vmlinux BTF, cannot register kfuncs\n");
return -ENOENT;
}
- if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
- pr_err("missing module BTF, cannot register kfuncs\n");
- return -ENOENT;
- }
+ if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+ pr_warn("missing module BTF, cannot register kfuncs\n");
return 0;
}
if (IS_ERR(btf))
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 5c7e9ba7cd6b..813cb6cf72d6 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -131,6 +131,7 @@ char kdb_getchar(void)
int escape_delay = 0;
get_char_func *f, *f_prev = NULL;
int key;
+ static bool last_char_was_cr;
for (f = &kdb_poll_funcs[0]; ; ++f) {
if (*f == NULL) {
@@ -150,6 +151,18 @@ char kdb_getchar(void)
}
/*
+ * The caller expects that newlines are either CR or LF. However
+ * some terminals send _both_ CR and LF. Avoid having to handle
+ * this in the caller by stripping the LF if we saw a CR right
+ * before.
+ */
+ if (last_char_was_cr && key == '\n') {
+ last_char_was_cr = false;
+ continue;
+ }
+ last_char_was_cr = (key == '\r');
+
+ /*
* When the first character is received (or we get a change
* input source) we set ourselves up to handle an escape
* sequences (just in case).
@@ -244,7 +257,8 @@ poll_again:
*cp = tmp;
}
break;
- case 13: /* enter */
+ case 10: /* linefeed */
+ case 13: /* carriage return */
*lastchar++ = '\n';
*lastchar++ = '\0';
if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c
index f87c750d3eb3..3c2987f46f6e 100644
--- a/kernel/debug/kdb/kdb_keyboard.c
+++ b/kernel/debug/kdb/kdb_keyboard.c
@@ -13,6 +13,8 @@
#include <linux/ctype.h>
#include <linux/io.h>
+#include "kdb_private.h"
+
/* Keyboard Controller Registers on normal PCs. */
#define KBD_STATUS_REG 0x64 /* Status register (R) */
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 1f8c519a5f81..548fd4059bf9 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -194,7 +194,6 @@ extern char kdb_task_state_char (const struct task_struct *);
extern bool kdb_task_state(const struct task_struct *p, const char *mask);
extern void kdb_ps_suppressed(void);
extern void kdb_ps1(const struct task_struct *p);
-extern void kdb_send_sig(struct task_struct *p, int sig);
extern char kdb_getchar(void);
extern char *kdb_getstr(char *, size_t, const char *);
extern void kdb_gdb_state_pass(char *buf);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3060427f6c9e..78ae7b6f90fd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11391,6 +11391,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
dev_set_drvdata(pmu->dev, pmu);
pmu->dev->bus = &pmu_bus;
+ pmu->dev->parent = pmu->parent;
pmu->dev->release = pmu_dev_release;
ret = dev_set_name(pmu->dev, "%s", pmu->name);
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 1ef9a87511f5..6d443ea22bb7 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -83,12 +83,9 @@ find $cpio_dir -type f -print0 |
xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
# Create archive and try to normalize metadata for reproducibility.
-# For compatibility with older versions of tar, files are fed to tar
-# pre-sorted, as --sort=name might not be available.
-find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \
- tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --owner=0 --group=0 --numeric-owner --no-recursion \
- -I $XZ -cf $tarfile -C $cpio_dir/ -T - > /dev/null
+tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+ --owner=0 --group=0 --sort=name --numeric-owner \
+ -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
echo $headers_md5 > kernel/kheaders.md5
echo "$this_file_md5" >> kernel/kheaders.md5
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 00e177de91cc..ce13f1a35251 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2127,6 +2127,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
NOKPROBE_SYMBOL(pre_handler_kretprobe);
static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
+ unsigned long ret_addr,
struct pt_regs *regs)
{
struct kretprobe *rp = (struct kretprobe *)data;
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index e9fd83a02228..e54c3d60a904 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -15,7 +15,7 @@
#include "transition.h"
#define MAX_STACK_ENTRIES 100
-DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
+static DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
#define STACK_ERR_BUF_SIZE 128
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index dc7b0160c480..c8b7b4dcf782 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -32,6 +32,18 @@
/* Maximum number of characters written by module_flags() */
#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
+struct kernel_symbol {
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ int value_offset;
+ int name_offset;
+ int namespace_offset;
+#else
+ unsigned long value;
+ const char *name;
+ const char *namespace;
+#endif
+};
+
extern struct mutex module_mutex;
extern struct list_head modules;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 834de86ebe35..59b1d067e528 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3092,7 +3092,7 @@ static bool idempotent(struct idempotent *u, const void *cookie)
* remove everybody - which includes ourselves - fill in the return
* value, and then complete the operation.
*/
-static void idempotent_complete(struct idempotent *u, int ret)
+static int idempotent_complete(struct idempotent *u, int ret)
{
const void *cookie = u->cookie;
int hash = hash_ptr(cookie, IDEM_HASH_BITS);
@@ -3109,27 +3109,18 @@ static void idempotent_complete(struct idempotent *u, int ret)
complete(&pos->complete);
}
spin_unlock(&idem_lock);
+ return ret;
}
static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
{
- struct idempotent idem;
struct load_info info = { };
void *buf = NULL;
- int len, ret;
-
- if (!f || !(f->f_mode & FMODE_READ))
- return -EBADF;
-
- if (idempotent(&idem, file_inode(f))) {
- wait_for_completion(&idem.complete);
- return idem.ret;
- }
+ int len;
len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
if (len < 0) {
mod_stat_inc(&failed_kreads);
- mod_stat_add_long(len, &invalid_kread_bytes);
return len;
}
@@ -3146,9 +3137,25 @@ static int init_module_from_file(struct file *f, const char __user * uargs, int
info.len = len;
}
- ret = load_module(&info, uargs, flags);
- idempotent_complete(&idem, ret);
- return ret;
+ return load_module(&info, uargs, flags);
+}
+
+static int idempotent_init_module(struct file *f, const char __user * uargs, int flags)
+{
+ struct idempotent idem;
+
+ if (!f || !(f->f_mode & FMODE_READ))
+ return -EBADF;
+
+ /* See if somebody else is doing the operation? */
+ if (idempotent(&idem, file_inode(f))) {
+ wait_for_completion(&idem.complete);
+ return idem.ret;
+ }
+
+ /* Otherwise, we'll do it and complete others */
+ return idempotent_complete(&idem,
+ init_module_from_file(f, uargs, flags));
}
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
@@ -3168,7 +3175,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
return -EINVAL;
f = fdget(fd);
- err = init_module_from_file(f.file, uargs, flags);
+ err = idempotent_init_module(f.file, uargs, flags);
fdput(f);
return err;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 8bce3aebc949..6a1d23a11026 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -657,7 +657,7 @@ void __init pid_idr_init(void)
idr_init(&init_pid_ns.idr);
init_pid_ns.pid_cachep = kmem_cache_create("pid",
- struct_size((struct pid *)NULL, numbers, 1),
+ struct_size_t(struct pid, numbers, 1),
__alignof__(struct pid),
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
NULL);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 70a929784a5d..0bf44afe04dd 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -48,7 +48,7 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
return kc;
snprintf(name, sizeof(name), "pid_%u", level + 1);
- len = struct_size((struct pid *)NULL, numbers, level + 1);
+ len = struct_size_t(struct pid, numbers, level + 1);
mutex_lock(&pid_caches_mutex);
/* Name collision forces to do allocation under mutex. */
if (!*pkc)
diff --git a/kernel/sys.c b/kernel/sys.c
index 339fee3eff6a..05f838929e72 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -140,6 +140,12 @@
#ifndef GET_TAGGED_ADDR_CTRL
# define GET_TAGGED_ADDR_CTRL() (-EINVAL)
#endif
+#ifndef RISCV_V_SET_CONTROL
+# define RISCV_V_SET_CONTROL(a) (-EINVAL)
+#endif
+#ifndef RISCV_V_GET_CONTROL
+# define RISCV_V_GET_CONTROL() (-EINVAL)
+#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -2708,6 +2714,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
break;
#endif
+ case PR_RISCV_V_SET_CONTROL:
+ error = RISCV_V_SET_CONTROL(arg2);
+ break;
+ case PR_RISCV_V_GET_CONTROL:
+ error = RISCV_V_GET_CONTROL();
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 04bfb1e4d377..781de7cc6a4e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -51,99 +51,35 @@ COND_SYSCALL_COMPAT(io_pgetevents);
COND_SYSCALL(io_uring_setup);
COND_SYSCALL(io_uring_enter);
COND_SYSCALL(io_uring_register);
-
-/* fs/xattr.c */
-
-/* fs/dcache.c */
-
-/* fs/cookies.c */
COND_SYSCALL(lookup_dcookie);
COND_SYSCALL_COMPAT(lookup_dcookie);
-
-/* fs/eventfd.c */
COND_SYSCALL(eventfd2);
-
-/* fs/eventfd.c */
COND_SYSCALL(epoll_create1);
COND_SYSCALL(epoll_ctl);
COND_SYSCALL(epoll_pwait);
COND_SYSCALL_COMPAT(epoll_pwait);
COND_SYSCALL(epoll_pwait2);
COND_SYSCALL_COMPAT(epoll_pwait2);
-
-/* fs/fcntl.c */
-
-/* fs/inotify_user.c */
COND_SYSCALL(inotify_init1);
COND_SYSCALL(inotify_add_watch);
COND_SYSCALL(inotify_rm_watch);
-
-/* fs/ioctl.c */
-
-/* fs/ioprio.c */
COND_SYSCALL(ioprio_set);
COND_SYSCALL(ioprio_get);
-
-/* fs/locks.c */
COND_SYSCALL(flock);
-
-/* fs/namei.c */
-
-/* fs/namespace.c */
-
-/* fs/nfsctl.c */
-
-/* fs/open.c */
-
-/* fs/pipe.c */
-
-/* fs/quota.c */
COND_SYSCALL(quotactl);
COND_SYSCALL(quotactl_fd);
-
-/* fs/readdir.c */
-
-/* fs/read_write.c */
-
-/* fs/sendfile.c */
-
-/* fs/select.c */
-
-/* fs/signalfd.c */
COND_SYSCALL(signalfd4);
COND_SYSCALL_COMPAT(signalfd4);
-
-/* fs/splice.c */
-
-/* fs/stat.c */
-
-/* fs/sync.c */
-
-/* fs/timerfd.c */
COND_SYSCALL(timerfd_create);
COND_SYSCALL(timerfd_settime);
COND_SYSCALL(timerfd_settime32);
COND_SYSCALL(timerfd_gettime);
COND_SYSCALL(timerfd_gettime32);
-
-/* fs/utimes.c */
-
-/* kernel/acct.c */
COND_SYSCALL(acct);
-
-/* kernel/capability.c */
COND_SYSCALL(capget);
COND_SYSCALL(capset);
-
-/* kernel/exec_domain.c */
-
-/* kernel/exit.c */
-
-/* kernel/fork.c */
/* __ARCH_WANT_SYS_CLONE3 */
COND_SYSCALL(clone3);
-
-/* kernel/futex/syscalls.c */
COND_SYSCALL(futex);
COND_SYSCALL(futex_time32);
COND_SYSCALL(set_robust_list);
@@ -151,29 +87,11 @@ COND_SYSCALL_COMPAT(set_robust_list);
COND_SYSCALL(get_robust_list);
COND_SYSCALL_COMPAT(get_robust_list);
COND_SYSCALL(futex_waitv);
-
-/* kernel/hrtimer.c */
-
-/* kernel/itimer.c */
-
-/* kernel/kexec.c */
COND_SYSCALL(kexec_load);
COND_SYSCALL_COMPAT(kexec_load);
-
-/* kernel/module.c */
COND_SYSCALL(init_module);
COND_SYSCALL(delete_module);
-
-/* kernel/posix-timers.c */
-
-/* kernel/printk.c */
COND_SYSCALL(syslog);
-
-/* kernel/ptrace.c */
-
-/* kernel/sched/core.c */
-
-/* kernel/sys.c */
COND_SYSCALL(setregid);
COND_SYSCALL(setgid);
COND_SYSCALL(setreuid);
@@ -186,12 +104,6 @@ COND_SYSCALL(setfsuid);
COND_SYSCALL(setfsgid);
COND_SYSCALL(setgroups);
COND_SYSCALL(getgroups);
-
-/* kernel/time.c */
-
-/* kernel/timer.c */
-
-/* ipc/mqueue.c */
COND_SYSCALL(mq_open);
COND_SYSCALL_COMPAT(mq_open);
COND_SYSCALL(mq_unlink);
@@ -203,8 +115,6 @@ COND_SYSCALL(mq_notify);
COND_SYSCALL_COMPAT(mq_notify);
COND_SYSCALL(mq_getsetattr);
COND_SYSCALL_COMPAT(mq_getsetattr);
-
-/* ipc/msg.c */
COND_SYSCALL(msgget);
COND_SYSCALL(old_msgctl);
COND_SYSCALL(msgctl);
@@ -214,8 +124,6 @@ COND_SYSCALL(msgrcv);
COND_SYSCALL_COMPAT(msgrcv);
COND_SYSCALL(msgsnd);
COND_SYSCALL_COMPAT(msgsnd);
-
-/* ipc/sem.c */
COND_SYSCALL(semget);
COND_SYSCALL(old_semctl);
COND_SYSCALL(semctl);
@@ -224,8 +132,6 @@ COND_SYSCALL_COMPAT(old_semctl);
COND_SYSCALL(semtimedop);
COND_SYSCALL(semtimedop_time32);
COND_SYSCALL(semop);
-
-/* ipc/shm.c */
COND_SYSCALL(shmget);
COND_SYSCALL(old_shmctl);
COND_SYSCALL(shmctl);
@@ -234,8 +140,6 @@ COND_SYSCALL_COMPAT(old_shmctl);
COND_SYSCALL(shmat);
COND_SYSCALL_COMPAT(shmat);
COND_SYSCALL(shmdt);
-
-/* net/socket.c */
COND_SYSCALL(socket);
COND_SYSCALL(socketpair);
COND_SYSCALL(bind);
@@ -256,30 +160,18 @@ COND_SYSCALL(sendmsg);
COND_SYSCALL_COMPAT(sendmsg);
COND_SYSCALL(recvmsg);
COND_SYSCALL_COMPAT(recvmsg);
-
-/* mm/filemap.c */
-
-/* mm/nommu.c, also with MMU */
COND_SYSCALL(mremap);
-
-/* security/keys/keyctl.c */
COND_SYSCALL(add_key);
COND_SYSCALL(request_key);
COND_SYSCALL(keyctl);
COND_SYSCALL_COMPAT(keyctl);
-
-/* security/landlock/syscalls.c */
COND_SYSCALL(landlock_create_ruleset);
COND_SYSCALL(landlock_add_rule);
COND_SYSCALL(landlock_restrict_self);
-
-/* arch/example/kernel/sys_example.c */
-
-/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
COND_SYSCALL_COMPAT(fadvise64_64);
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
COND_SYSCALL(swapon);
COND_SYSCALL(swapoff);
COND_SYSCALL(mprotect);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8cf97fa4a4b3..61c541c36596 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,6 +31,9 @@ config HAVE_FUNCTION_GRAPH_TRACER
help
See Documentation/trace/ftrace-design.rst
+config HAVE_FUNCTION_GRAPH_RETVAL
+ bool
+
config HAVE_DYNAMIC_FTRACE
bool
help
@@ -227,6 +230,18 @@ config FUNCTION_GRAPH_TRACER
the return value. This is done by setting the current return
address on the current task structure into a stack of calls.
+config FUNCTION_GRAPH_RETVAL
+ bool "Kernel Function Graph Return Value"
+ depends on HAVE_FUNCTION_GRAPH_RETVAL
+ depends on FUNCTION_GRAPH_TRACER
+ default n
+ help
+ Support recording and printing the function return value when
+ using function graph tracer. It can be helpful to locate functions
+ that return errors. This feature is off by default, and you can
+ enable it via the trace option funcgraph-retval.
+ See Documentation/trace/ftrace.rst
+
config DYNAMIC_FTRACE
bool "enable/disable function tracing dynamically"
depends on FUNCTION_TRACER
@@ -650,6 +665,32 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
+config FPROBE_EVENTS
+ depends on FPROBE
+ depends on HAVE_REGS_AND_STACK_ACCESS_API
+ bool "Enable fprobe-based dynamic events"
+ select TRACING
+ select PROBE_EVENTS
+ select DYNAMIC_EVENTS
+ default y
+ help
+ This allows user to add tracing events on the function entry and
+ exit via ftrace interface. The syntax is same as the kprobe events
+ and the kprobe events on function entry and exit will be
+ transparently converted to this fprobe events.
+
+config PROBE_EVENTS_BTF_ARGS
+ depends on HAVE_FUNCTION_ARG_ACCESS_API
+ depends on FPROBE_EVENTS || KPROBE_EVENTS
+ depends on DEBUG_INFO_BTF && BPF_SYSCALL
+ bool "Support BTF function arguments for probe events"
+ default y
+ help
+ The user can specify the arguments of the probe event using the names
+ of the arguments of the probed function, when the probe location is a
+ kernel function entry or a tracepoint.
+ This is available only if BTF (BPF Type Format) support is enabled.
+
config KPROBE_EVENTS
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c6651e16b557..64b61f67a403 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
obj-$(CONFIG_FPROBE) += fprobe.o
obj-$(CONFIG_RETHOOK) += rethook.o
+obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
obj-$(CONFIG_RV) += rv/
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 03b7f6b8e4f0..5f2dcabad202 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2652,7 +2652,8 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- struct pt_regs *regs, void *data)
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *data)
{
struct bpf_kprobe_multi_link *link;
@@ -2663,7 +2664,8 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
static void
kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
- struct pt_regs *regs, void *data)
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *data)
{
struct bpf_kprobe_multi_link *link;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 218cd95bf8e4..cd2c35b1dd8f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -236,16 +236,23 @@ static struct notifier_block ftrace_suspend_notifier = {
.notifier_call = ftrace_suspend_notifier_call,
};
+/* fgraph_ret_regs is not defined without CONFIG_FUNCTION_GRAPH_RETVAL */
+struct fgraph_ret_regs;
+
/*
* Send the trace to the ring-buffer.
* @return the original return address.
*/
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs,
+ unsigned long frame_pointer)
{
struct ftrace_graph_ret trace;
unsigned long ret;
ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+ trace.retval = fgraph_ret_regs_return_value(ret_regs);
+#endif
trace.rettime = trace_clock_local();
ftrace_graph_return(&trace);
/*
@@ -266,6 +273,23 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
return ret;
}
+/*
+ * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can
+ * leave only ftrace_return_to_handler(ret_regs).
+ */
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
+unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs)
+{
+ return __ftrace_return_to_handler(ret_regs,
+ fgraph_ret_regs_frame_pointer(ret_regs));
+}
+#else
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+{
+ return __ftrace_return_to_handler(NULL, frame_pointer);
+}
+#endif
+
/**
* ftrace_graph_get_ret_stack - return the entry of the shadow stack
* @task: The task to read the shadow stack from
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 18d36842faf5..e4704ec26df7 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
}
if (fp->entry_handler)
- ret = fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data);
+ ret = fp->entry_handler(fp, ip, parent_ip, ftrace_get_regs(fregs), entry_data);
/* If entry_handler returns !0, nmissed is not counted. */
if (rh) {
@@ -112,7 +112,7 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
}
static void fprobe_exit_handler(struct rethook_node *rh, void *data,
- struct pt_regs *regs)
+ unsigned long ret_ip, struct pt_regs *regs)
{
struct fprobe *fp = (struct fprobe *)data;
struct fprobe_rethook_node *fpr;
@@ -133,7 +133,7 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data,
return;
}
- fp->exit_handler(fp, fpr->entry_ip, regs,
+ fp->exit_handler(fp, fpr->entry_ip, ret_ip, regs,
fp->entry_data_size ? (void *)fpr->data : NULL);
ftrace_test_recursion_unlock(bit);
}
@@ -348,6 +348,14 @@ int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
}
EXPORT_SYMBOL_GPL(register_fprobe_syms);
+bool fprobe_is_registered(struct fprobe *fp)
+{
+ if (!fp || (fp->ops.saved_func != fprobe_handler &&
+ fp->ops.saved_func != fprobe_kprobe_handler))
+ return false;
+ return true;
+}
+
/**
* unregister_fprobe() - Unregister fprobe from ftrace
* @fp: A fprobe data structure to be unregistered.
@@ -360,8 +368,7 @@ int unregister_fprobe(struct fprobe *fp)
{
int ret;
- if (!fp || (fp->ops.saved_func != fprobe_handler &&
- fp->ops.saved_func != fprobe_kprobe_handler))
+ if (!fprobe_is_registered(fp))
return -EINVAL;
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6a77edb51f18..3740aca79fe7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3861,6 +3861,9 @@ static int t_show(struct seq_file *m, void *v)
if (!rec)
return 0;
+ if (iter->flags & FTRACE_ITER_ADDRS)
+ seq_printf(m, "%lx ", rec->ip);
+
if (print_rec(m, rec->ip)) {
/* This should only happen when a rec is disabled */
WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
@@ -3996,6 +3999,30 @@ ftrace_touched_open(struct inode *inode, struct file *file)
return 0;
}
+static int
+ftrace_avail_addrs_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_iterator *iter;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+ if (!iter)
+ return -ENOMEM;
+
+ iter->pg = ftrace_pages_start;
+ iter->flags = FTRACE_ITER_ADDRS;
+ iter->ops = &global_ops;
+
+ return 0;
+}
+
/**
* ftrace_regex_open - initialize function tracer filter files
* @ops: The ftrace_ops that hold the hash filters
@@ -5916,6 +5943,13 @@ static const struct file_operations ftrace_touched_fops = {
.release = seq_release_private,
};
+static const struct file_operations ftrace_avail_addrs_fops = {
+ .open = ftrace_avail_addrs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
.read = seq_read,
@@ -6377,6 +6411,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
trace_create_file("available_filter_functions", TRACE_MODE_READ,
d_tracer, NULL, &ftrace_avail_fops);
+ trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
+ d_tracer, NULL, &ftrace_avail_addrs_fops);
+
trace_create_file("enabled_functions", TRACE_MODE_READ,
d_tracer, NULL, &ftrace_enabled_fops);
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 60f6cb2b486b..f32ee484391a 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -301,7 +301,8 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
break;
handler = READ_ONCE(rhn->rethook->handler);
if (handler)
- handler(rhn, rhn->rethook->data, regs);
+ handler(rhn, rhn->rethook->data,
+ correct_ret_addr, regs);
if (first == node)
break;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 074d0b2e19ed..4529e264cb86 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5672,10 +5672,17 @@ static const char readme_msg[] =
" uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
-#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
+ defined(CONFIG_FPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
"\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
+#endif
+#ifdef CONFIG_FPROBE_EVENTS
+ "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
+ "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
+#endif
#ifdef CONFIG_HIST_TRIGGERS
"\t s:[synthetic/]<event> <field> [<field>]\n"
#endif
@@ -5691,7 +5698,11 @@ static const char readme_msg[] =
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
+#else
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
+#endif
#else
"\t $stack<index>, $stack, $retval, $comm,\n"
#endif
@@ -8135,7 +8146,7 @@ static const struct file_operations tracing_err_log_fops = {
.open = tracing_err_log_open,
.write = tracing_err_log_write,
.read = seq_read,
- .llseek = seq_lseek,
+ .llseek = tracing_lseek,
.release = tracing_err_log_release,
};
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79bdefe9261b..ed7906b13f09 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -148,6 +148,17 @@ struct kretprobe_trace_entry_head {
unsigned long ret_ip;
};
+struct fentry_trace_entry_head {
+ struct trace_entry ent;
+ unsigned long ip;
+};
+
+struct fexit_trace_entry_head {
+ struct trace_entry ent;
+ unsigned long func;
+ unsigned long ret_ip;
+};
+
#define TRACE_BUF_SIZE 1024
struct trace_array;
@@ -832,6 +843,8 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_TAIL 0x100
#define TRACE_GRAPH_SLEEP_TIME 0x200
#define TRACE_GRAPH_GRAPH_TIME 0x400
+#define TRACE_GRAPH_PRINT_RETVAL 0x800
+#define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 778200dd8ede..7ccc7a8e155b 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -31,7 +31,7 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node)
/* Common ftrace options */
xbc_node_for_each_array_value(node, "options", anode, p) {
- if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) {
+ if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) {
pr_err("String is too long: %s\n", p);
continue;
}
@@ -87,7 +87,7 @@ trace_boot_enable_events(struct trace_array *tr, struct xbc_node *node)
const char *p;
xbc_node_for_each_array_value(node, "events", anode, p) {
- if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) {
+ if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0) {
pr_err("String is too long: %s\n", p);
continue;
}
@@ -486,7 +486,7 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
p = xbc_node_find_value(enode, "filter", NULL);
if (p && *p != '\0') {
- if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+ if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0)
pr_err("filter string is too long: %s\n", p);
else if (apply_event_filter(file, buf) < 0)
pr_err("Failed to apply filter: %s\n", buf);
@@ -494,7 +494,7 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) {
xbc_node_for_each_array_value(enode, "actions", anode, p) {
- if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+ if (strscpy(buf, p, ARRAY_SIZE(buf)) < 0)
pr_err("action string is too long: %s\n", p);
else if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply an action: %s\n", p);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index cd41e863b51c..340b2fa98218 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -86,6 +86,30 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
);
/* Function return entry */
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+
+FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
+
+ TRACE_GRAPH_RET,
+
+ F_STRUCT(
+ __field_struct( struct ftrace_graph_ret, ret )
+ __field_packed( unsigned long, ret, func )
+ __field_packed( unsigned long, ret, retval )
+ __field_packed( int, ret, depth )
+ __field_packed( unsigned int, ret, overrun )
+ __field_packed( unsigned long long, ret, calltime)
+ __field_packed( unsigned long long, ret, rettime )
+ ),
+
+ F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d retval: %lx",
+ (void *)__entry->func, __entry->depth,
+ __entry->calltime, __entry->rettime,
+ __entry->depth, __entry->retval)
+);
+
+#else
+
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
TRACE_GRAPH_RET,
@@ -105,6 +129,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__entry->depth)
);
+#endif
+
/*
* Context switch trace entry - which task (and prio) we switched from/to:
*
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 67e854979d53..cb0077ba2b49 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -227,37 +227,6 @@ error:
return ERR_PTR(ret);
}
-static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
-{
- struct probe_arg *parg = &ep->tp.args[i];
- struct ftrace_event_field *field;
- struct list_head *head;
- int ret = -ENOENT;
-
- head = trace_get_fields(ep->event);
- list_for_each_entry(field, head, link) {
- if (!strcmp(parg->code->data, field->name)) {
- kfree(parg->code->data);
- parg->code->data = field;
- return 0;
- }
- }
-
- /*
- * Argument not found on event. But allow for comm and COMM
- * to be used to get the current->comm.
- */
- if (strcmp(parg->code->data, "COMM") == 0 ||
- strcmp(parg->code->data, "comm") == 0) {
- parg->code->op = FETCH_OP_COMM;
- ret = 0;
- }
-
- kfree(parg->code->data);
- parg->code->data = NULL;
- return ret;
-}
-
static int eprobe_event_define_fields(struct trace_event_call *event_call)
{
struct eprobe_trace_entry_head field;
@@ -817,19 +786,16 @@ find_and_get_event(const char *system, const char *event_name)
static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
{
- unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
+ struct traceprobe_parse_context ctx = {
+ .event = ep->event,
+ .flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT,
+ };
int ret;
- ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
+ ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx);
if (ret)
return ret;
- if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) {
- ret = trace_eprobe_tp_arg_update(ep, i);
- if (ret)
- trace_probe_log_err(0, BAD_ATTACH_ARG);
- }
-
/* Handle symbols "@" */
if (!ret)
ret = traceprobe_update_arg(&ep->tp.args[i]);
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
new file mode 100644
index 000000000000..dfe2e546acdc
--- /dev/null
+++ b/kernel/trace/trace_fprobe.c
@@ -0,0 +1,1199 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Fprobe-based tracing events
+ * Copyright (C) 2022 Google LLC.
+ */
+#define pr_fmt(fmt) "trace_fprobe: " fmt
+
+#include <linux/fprobe.h>
+#include <linux/module.h>
+#include <linux/rculist.h>
+#include <linux/security.h>
+#include <linux/tracepoint.h>
+#include <linux/uaccess.h>
+
+#include "trace_dynevent.h"
+#include "trace_probe.h"
+#include "trace_probe_kernel.h"
+#include "trace_probe_tmpl.h"
+
+#define FPROBE_EVENT_SYSTEM "fprobes"
+#define TRACEPOINT_EVENT_SYSTEM "tracepoints"
+#define RETHOOK_MAXACTIVE_MAX 4096
+
+static int trace_fprobe_create(const char *raw_command);
+static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_fprobe_release(struct dyn_event *ev);
+static bool trace_fprobe_is_busy(struct dyn_event *ev);
+static bool trace_fprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev);
+
+static struct dyn_event_operations trace_fprobe_ops = {
+ .create = trace_fprobe_create,
+ .show = trace_fprobe_show,
+ .is_busy = trace_fprobe_is_busy,
+ .free = trace_fprobe_release,
+ .match = trace_fprobe_match,
+};
+
+/*
+ * Fprobe event core functions
+ */
+struct trace_fprobe {
+ struct dyn_event devent;
+ struct fprobe fp;
+ const char *symbol;
+ struct tracepoint *tpoint;
+ struct module *mod;
+ struct trace_probe tp;
+};
+
+static bool is_trace_fprobe(struct dyn_event *ev)
+{
+ return ev->ops == &trace_fprobe_ops;
+}
+
+static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev)
+{
+ return container_of(ev, struct trace_fprobe, devent);
+}
+
+/**
+ * for_each_trace_fprobe - iterate over the trace_fprobe list
+ * @pos: the struct trace_fprobe * for each entry
+ * @dpos: the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_fprobe(pos, dpos) \
+ for_each_dyn_event(dpos) \
+ if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos)))
+
+static bool trace_fprobe_is_return(struct trace_fprobe *tf)
+{
+ return tf->fp.exit_handler != NULL;
+}
+
+static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
+{
+ return tf->tpoint != NULL;
+}
+
+static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
+{
+ return tf->symbol ? tf->symbol : "unknown";
+}
+
+static bool trace_fprobe_is_busy(struct dyn_event *ev)
+{
+ struct trace_fprobe *tf = to_trace_fprobe(ev);
+
+ return trace_probe_is_enabled(&tf->tp);
+}
+
+static bool trace_fprobe_match_command_head(struct trace_fprobe *tf,
+ int argc, const char **argv)
+{
+ char buf[MAX_ARGSTR_LEN + 1];
+
+ if (!argc)
+ return true;
+
+ snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf));
+ if (strcmp(buf, argv[0]))
+ return false;
+ argc--; argv++;
+
+ return trace_probe_match_command_args(&tf->tp, argc, argv);
+}
+
+static bool trace_fprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev)
+{
+ struct trace_fprobe *tf = to_trace_fprobe(ev);
+
+ if (event[0] != '\0' && strcmp(trace_probe_name(&tf->tp), event))
+ return false;
+
+ if (system && strcmp(trace_probe_group_name(&tf->tp), system))
+ return false;
+
+ return trace_fprobe_match_command_head(tf, argc, argv);
+}
+
+static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
+{
+ return fprobe_is_registered(&tf->fp);
+}
+
+/*
+ * Note that we don't verify the fetch_insn code, since it does not come
+ * from user space.
+ */
+static int
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+ void *base)
+{
+ struct pt_regs *regs = rec;
+ unsigned long val;
+ int ret;
+
+retry:
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_STACK:
+ val = regs_get_kernel_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = kernel_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ case FETCH_OP_ARG:
+ val = regs_get_kernel_argument(regs, code->param);
+ break;
+#endif
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ ret = process_common_fetch_insn(code, &val);
+ if (ret < 0)
+ return ret;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
+/* function entry handler */
+static nokprobe_inline void
+__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ struct pt_regs *regs,
+ struct trace_event_file *trace_file)
+{
+ struct fentry_trace_entry_head *entry;
+ struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+ struct trace_event_buffer fbuffer;
+ int dsize;
+
+ if (WARN_ON_ONCE(call != trace_file->event_call))
+ return;
+
+ if (trace_trigger_soft_disabled(trace_file))
+ return;
+
+ dsize = __get_data_size(&tf->tp, regs);
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tf->tp.size + dsize);
+ if (!entry)
+ return;
+
+ fbuffer.regs = regs;
+ entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+ entry->ip = entry_ip;
+ store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+
+ trace_event_buffer_commit(&fbuffer);
+}
+
+static void
+fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ struct pt_regs *regs)
+{
+ struct event_file_link *link;
+
+ trace_probe_for_each_link_rcu(link, &tf->tp)
+ __fentry_trace_func(tf, entry_ip, regs, link->file);
+}
+NOKPROBE_SYMBOL(fentry_trace_func);
+
+/* Kretprobe handler */
+static nokprobe_inline void
+__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs,
+ struct trace_event_file *trace_file)
+{
+ struct fexit_trace_entry_head *entry;
+ struct trace_event_buffer fbuffer;
+ struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+ int dsize;
+
+ if (WARN_ON_ONCE(call != trace_file->event_call))
+ return;
+
+ if (trace_trigger_soft_disabled(trace_file))
+ return;
+
+ dsize = __get_data_size(&tf->tp, regs);
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tf->tp.size + dsize);
+ if (!entry)
+ return;
+
+ fbuffer.regs = regs;
+ entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+ entry->func = entry_ip;
+ entry->ret_ip = ret_ip;
+ store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+
+ trace_event_buffer_commit(&fbuffer);
+}
+
+static void
+fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs)
+{
+ struct event_file_link *link;
+
+ trace_probe_for_each_link_rcu(link, &tf->tp)
+ __fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
+}
+NOKPROBE_SYMBOL(fexit_trace_func);
+
+#ifdef CONFIG_PERF_EVENTS
+
+static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ struct pt_regs *regs)
+{
+ struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+ struct fentry_trace_entry_head *entry;
+ struct hlist_head *head;
+ int size, __size, dsize;
+ int rctx;
+
+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return 0;
+
+ dsize = __get_data_size(&tf->tp, regs);
+ __size = sizeof(*entry) + tf->tp.size + dsize;
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+ entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ if (!entry)
+ return 0;
+
+ entry->ip = entry_ip;
+ memset(&entry[1], 0, dsize);
+ store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+ head, NULL);
+ return 0;
+}
+NOKPROBE_SYMBOL(fentry_perf_func);
+
+static void
+fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs)
+{
+ struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+ struct fexit_trace_entry_head *entry;
+ struct hlist_head *head;
+ int size, __size, dsize;
+ int rctx;
+
+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return;
+
+ dsize = __get_data_size(&tf->tp, regs);
+ __size = sizeof(*entry) + tf->tp.size + dsize;
+ size = ALIGN(__size + sizeof(u32), sizeof(u64));
+ size -= sizeof(u32);
+
+ entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ if (!entry)
+ return;
+
+ entry->func = entry_ip;
+ entry->ret_ip = ret_ip;
+ store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+ perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+ head, NULL);
+}
+NOKPROBE_SYMBOL(fexit_perf_func);
+#endif /* CONFIG_PERF_EVENTS */
+
+static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *entry_data)
+{
+ struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
+ int ret = 0;
+
+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+ fentry_trace_func(tf, entry_ip, regs);
+#ifdef CONFIG_PERF_EVENTS
+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
+ ret = fentry_perf_func(tf, entry_ip, regs);
+#endif
+ return ret;
+}
+NOKPROBE_SYMBOL(fentry_dispatcher);
+
+static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *entry_data)
+{
+ struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
+
+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+ fexit_trace_func(tf, entry_ip, ret_ip, regs);
+#ifdef CONFIG_PERF_EVENTS
+ if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
+ fexit_perf_func(tf, entry_ip, ret_ip, regs);
+#endif
+}
+NOKPROBE_SYMBOL(fexit_dispatcher);
+
+static void free_trace_fprobe(struct trace_fprobe *tf)
+{
+ if (tf) {
+ trace_probe_cleanup(&tf->tp);
+ kfree(tf->symbol);
+ kfree(tf);
+ }
+}
+
+/*
+ * Allocate new trace_probe and initialize it (including fprobe).
+ */
+static struct trace_fprobe *alloc_trace_fprobe(const char *group,
+ const char *event,
+ const char *symbol,
+ struct tracepoint *tpoint,
+ int maxactive,
+ int nargs, bool is_return)
+{
+ struct trace_fprobe *tf;
+ int ret = -ENOMEM;
+
+ tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
+ if (!tf)
+ return ERR_PTR(ret);
+
+ tf->symbol = kstrdup(symbol, GFP_KERNEL);
+ if (!tf->symbol)
+ goto error;
+
+ if (is_return)
+ tf->fp.exit_handler = fexit_dispatcher;
+ else
+ tf->fp.entry_handler = fentry_dispatcher;
+
+ tf->tpoint = tpoint;
+ tf->fp.nr_maxactive = maxactive;
+
+ ret = trace_probe_init(&tf->tp, event, group, false);
+ if (ret < 0)
+ goto error;
+
+ dyn_event_init(&tf->devent, &trace_fprobe_ops);
+ return tf;
+error:
+ free_trace_fprobe(tf);
+ return ERR_PTR(ret);
+}
+
+static struct trace_fprobe *find_trace_fprobe(const char *event,
+ const char *group)
+{
+ struct dyn_event *pos;
+ struct trace_fprobe *tf;
+
+ for_each_trace_fprobe(tf, pos)
+ if (strcmp(trace_probe_name(&tf->tp), event) == 0 &&
+ strcmp(trace_probe_group_name(&tf->tp), group) == 0)
+ return tf;
+ return NULL;
+}
+
+static inline int __enable_trace_fprobe(struct trace_fprobe *tf)
+{
+ if (trace_fprobe_is_registered(tf))
+ enable_fprobe(&tf->fp);
+
+ return 0;
+}
+
+static void __disable_trace_fprobe(struct trace_probe *tp)
+{
+ struct trace_fprobe *tf;
+
+ list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+ if (!trace_fprobe_is_registered(tf))
+ continue;
+ disable_fprobe(&tf->fp);
+ }
+}
+
+/*
+ * Enable trace_probe
+ * if the file is NULL, enable "perf" handler, or enable "trace" handler.
+ */
+static int enable_trace_fprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
+{
+ struct trace_probe *tp;
+ struct trace_fprobe *tf;
+ bool enabled;
+ int ret = 0;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+ enabled = trace_probe_is_enabled(tp);
+
+ /* This also changes "enabled" state */
+ if (file) {
+ ret = trace_probe_add_file(tp, file);
+ if (ret)
+ return ret;
+ } else
+ trace_probe_set_flag(tp, TP_FLAG_PROFILE);
+
+ if (!enabled) {
+ list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+ /* TODO: check the fprobe is gone */
+ __enable_trace_fprobe(tf);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Disable trace_probe
+ * if the file is NULL, disable "perf" handler, or disable "trace" handler.
+ */
+static int disable_trace_fprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
+{
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+
+ if (file) {
+ if (!trace_probe_get_file_link(tp, file))
+ return -ENOENT;
+ if (!trace_probe_has_single_file(tp))
+ goto out;
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+ } else
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
+ if (!trace_probe_is_enabled(tp))
+ __disable_trace_fprobe(tp);
+
+ out:
+ if (file)
+ /*
+ * Synchronization is done in below function. For perf event,
+ * file == NULL and perf_trace_event_unreg() calls
+ * tracepoint_synchronize_unregister() to ensure synchronize
+ * event. We don't need to care about it.
+ */
+ trace_probe_remove_file(tp, file);
+
+ return 0;
+}
+
+/* Event entry printers */
+static enum print_line_t
+print_fentry_event(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct fentry_trace_entry_head *field;
+ struct trace_seq *s = &iter->seq;
+ struct trace_probe *tp;
+
+ field = (struct fentry_trace_entry_head *)iter->ent;
+ tp = trace_probe_primary_from_call(
+ container_of(event, struct trace_event_call, event));
+ if (WARN_ON_ONCE(!tp))
+ goto out;
+
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+ if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+ goto out;
+
+ trace_seq_putc(s, ')');
+
+ if (trace_probe_print_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
+
+ trace_seq_putc(s, '\n');
+ out:
+ return trace_handle_return(s);
+}
+
+static enum print_line_t
+print_fexit_event(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct fexit_trace_entry_head *field;
+ struct trace_seq *s = &iter->seq;
+ struct trace_probe *tp;
+
+ field = (struct fexit_trace_entry_head *)iter->ent;
+ tp = trace_probe_primary_from_call(
+ container_of(event, struct trace_event_call, event));
+ if (WARN_ON_ONCE(!tp))
+ goto out;
+
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+ if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+ goto out;
+
+ trace_seq_puts(s, " <- ");
+
+ if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+ goto out;
+
+ trace_seq_putc(s, ')');
+
+ if (trace_probe_print_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
+
+ trace_seq_putc(s, '\n');
+
+ out:
+ return trace_handle_return(s);
+}
+
+static int fentry_event_define_fields(struct trace_event_call *event_call)
+{
+ int ret;
+ struct fentry_trace_entry_head field;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(event_call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENOENT;
+
+ DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
+
+ return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static int fexit_event_define_fields(struct trace_event_call *event_call)
+{
+ int ret;
+ struct fexit_trace_entry_head field;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(event_call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENOENT;
+
+ DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
+ DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
+
+ return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static struct trace_event_functions fentry_funcs = {
+ .trace = print_fentry_event
+};
+
+static struct trace_event_functions fexit_funcs = {
+ .trace = print_fexit_event
+};
+
+static struct trace_event_fields fentry_fields_array[] = {
+ { .type = TRACE_FUNCTION_TYPE,
+ .define_fields = fentry_event_define_fields },
+ {}
+};
+
+static struct trace_event_fields fexit_fields_array[] = {
+ { .type = TRACE_FUNCTION_TYPE,
+ .define_fields = fexit_event_define_fields },
+ {}
+};
+
+static int fprobe_register(struct trace_event_call *event,
+ enum trace_reg type, void *data);
+
+static inline void init_trace_event_call(struct trace_fprobe *tf)
+{
+ struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+
+ if (trace_fprobe_is_return(tf)) {
+ call->event.funcs = &fexit_funcs;
+ call->class->fields_array = fexit_fields_array;
+ } else {
+ call->event.funcs = &fentry_funcs;
+ call->class->fields_array = fentry_fields_array;
+ }
+
+ call->flags = TRACE_EVENT_FL_FPROBE;
+ call->class->reg = fprobe_register;
+}
+
+static int register_fprobe_event(struct trace_fprobe *tf)
+{
+ init_trace_event_call(tf);
+
+ return trace_probe_register_event_call(&tf->tp);
+}
+
+static int unregister_fprobe_event(struct trace_fprobe *tf)
+{
+ return trace_probe_unregister_event_call(&tf->tp);
+}
+
+/* Internal register function - just handle fprobe and flags */
+static int __register_trace_fprobe(struct trace_fprobe *tf)
+{
+ int i, ret;
+
+ /* Should we need new LOCKDOWN flag for fprobe? */
+ ret = security_locked_down(LOCKDOWN_KPROBES);
+ if (ret)
+ return ret;
+
+ if (trace_fprobe_is_registered(tf))
+ return -EINVAL;
+
+ for (i = 0; i < tf->tp.nr_args; i++) {
+ ret = traceprobe_update_arg(&tf->tp.args[i]);
+ if (ret)
+ return ret;
+ }
+
+ /* Set/clear disabled flag according to tp->flag */
+ if (trace_probe_is_enabled(&tf->tp))
+ tf->fp.flags &= ~FPROBE_FL_DISABLED;
+ else
+ tf->fp.flags |= FPROBE_FL_DISABLED;
+
+ if (trace_fprobe_is_tracepoint(tf)) {
+ struct tracepoint *tpoint = tf->tpoint;
+ unsigned long ip = (unsigned long)tpoint->probestub;
+ /*
+ * Here, we do 2 steps to enable fprobe on a tracepoint.
+ * At first, put __probestub_##TP function on the tracepoint
+ * and put a fprobe on the stub function.
+ */
+ ret = tracepoint_probe_register_prio_may_exist(tpoint,
+ tpoint->probestub, NULL, 0);
+ if (ret < 0)
+ return ret;
+ return register_fprobe_ips(&tf->fp, &ip, 1);
+ }
+
+ /* TODO: handle filter, nofilter or symbol list */
+ return register_fprobe(&tf->fp, tf->symbol, NULL);
+}
+
+/* Internal unregister function - just handle fprobe and flags */
+static void __unregister_trace_fprobe(struct trace_fprobe *tf)
+{
+ if (trace_fprobe_is_registered(tf)) {
+ unregister_fprobe(&tf->fp);
+ memset(&tf->fp, 0, sizeof(tf->fp));
+ if (trace_fprobe_is_tracepoint(tf)) {
+ tracepoint_probe_unregister(tf->tpoint,
+ tf->tpoint->probestub, NULL);
+ tf->tpoint = NULL;
+ tf->mod = NULL;
+ }
+ }
+}
+
+/* TODO: make this trace_*probe common function */
+/* Unregister a trace_probe and probe_event */
+static int unregister_trace_fprobe(struct trace_fprobe *tf)
+{
+ /* If other probes are on the event, just unregister fprobe */
+ if (trace_probe_has_sibling(&tf->tp))
+ goto unreg;
+
+ /* Enabled event can not be unregistered */
+ if (trace_probe_is_enabled(&tf->tp))
+ return -EBUSY;
+
+ /* If there's a reference to the dynamic event */
+ if (trace_event_dyn_busy(trace_probe_event_call(&tf->tp)))
+ return -EBUSY;
+
+ /* Will fail if probe is being used by ftrace or perf */
+ if (unregister_fprobe_event(tf))
+ return -EBUSY;
+
+unreg:
+ __unregister_trace_fprobe(tf);
+ dyn_event_remove(&tf->devent);
+ trace_probe_unlink(&tf->tp);
+
+ return 0;
+}
+
+static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig,
+ struct trace_fprobe *comp)
+{
+ struct trace_probe_event *tpe = orig->tp.event;
+ int i;
+
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
+ if (strcmp(trace_fprobe_symbol(orig),
+ trace_fprobe_symbol(comp)))
+ continue;
+
+ /*
+ * trace_probe_compare_arg_type() ensured that nr_args and
+ * each argument name and type are same. Let's compare comm.
+ */
+ for (i = 0; i < orig->tp.nr_args; i++) {
+ if (strcmp(orig->tp.args[i].comm,
+ comp->tp.args[i].comm))
+ break;
+ }
+
+ if (i == orig->tp.nr_args)
+ return true;
+ }
+
+ return false;
+}
+
+static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
+{
+ int ret;
+
+ if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to) ||
+ trace_fprobe_is_tracepoint(tf) != trace_fprobe_is_tracepoint(to)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, DIFF_PROBE_TYPE);
+ return -EEXIST;
+ }
+ ret = trace_probe_compare_arg_type(&tf->tp, &to->tp);
+ if (ret) {
+ /* Note that argument starts index = 2 */
+ trace_probe_log_set_index(ret + 1);
+ trace_probe_log_err(0, DIFF_ARG_TYPE);
+ return -EEXIST;
+ }
+ if (trace_fprobe_has_same_fprobe(to, tf)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, SAME_PROBE);
+ return -EEXIST;
+ }
+
+ /* Append to existing event */
+ ret = trace_probe_append(&tf->tp, &to->tp);
+ if (ret)
+ return ret;
+
+ ret = __register_trace_fprobe(tf);
+ if (ret)
+ trace_probe_unlink(&tf->tp);
+ else
+ dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
+
+ return ret;
+}
+
+/* Register a trace_probe and probe_event */
+static int register_trace_fprobe(struct trace_fprobe *tf)
+{
+ struct trace_fprobe *old_tf;
+ int ret;
+
+ mutex_lock(&event_mutex);
+
+ old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
+ trace_probe_group_name(&tf->tp));
+ if (old_tf) {
+ ret = append_trace_fprobe(tf, old_tf);
+ goto end;
+ }
+
+ /* Register new event */
+ ret = register_fprobe_event(tf);
+ if (ret) {
+ if (ret == -EEXIST) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, EVENT_EXIST);
+ } else
+ pr_warn("Failed to register probe event(%d)\n", ret);
+ goto end;
+ }
+
+ /* Register fprobe */
+ ret = __register_trace_fprobe(tf);
+ if (ret < 0)
+ unregister_fprobe_event(tf);
+ else
+ dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
+
+end:
+ mutex_unlock(&event_mutex);
+ return ret;
+}
+
+#ifdef CONFIG_MODULES
+static int __tracepoint_probe_module_cb(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct tp_module *tp_mod = data;
+ struct trace_fprobe *tf;
+ struct dyn_event *pos;
+
+ if (val != MODULE_STATE_GOING)
+ return NOTIFY_DONE;
+
+ mutex_lock(&event_mutex);
+ for_each_trace_fprobe(tf, pos) {
+ if (tp_mod->mod == tf->mod) {
+ tracepoint_probe_unregister(tf->tpoint,
+ tf->tpoint->probestub, NULL);
+ tf->tpoint = NULL;
+ tf->mod = NULL;
+ }
+ }
+ mutex_unlock(&event_mutex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block tracepoint_module_nb = {
+ .notifier_call = __tracepoint_probe_module_cb,
+};
+#endif /* CONFIG_MODULES */
+
+struct __find_tracepoint_cb_data {
+ const char *tp_name;
+ struct tracepoint *tpoint;
+};
+
+static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
+{
+ struct __find_tracepoint_cb_data *data = priv;
+
+ if (!data->tpoint && !strcmp(data->tp_name, tp->name))
+ data->tpoint = tp;
+}
+
+static struct tracepoint *find_tracepoint(const char *tp_name)
+{
+ struct __find_tracepoint_cb_data data = {
+ .tp_name = tp_name,
+ };
+
+ for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
+
+ return data.tpoint;
+}
+
+static int __trace_fprobe_create(int argc, const char *argv[])
+{
+ /*
+ * Argument syntax:
+ * - Add fentry probe:
+ * f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
+ * - Add fexit probe:
+ * f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
+ * - Add tracepoint probe:
+ * t[:[GRP/][EVENT]] TRACEPOINT [FETCHARGS]
+ *
+ * Fetch args:
+ * $retval : fetch return value
+ * $stack : fetch stack address
+ * $stackN : fetch Nth entry of stack (N:0-)
+ * $argN : fetch Nth argument (N:1-)
+ * $comm : fetch current task comm
+ * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
+ * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+ * Dereferencing memory fetch:
+ * +|-offs(ARG) : fetch memory at ARG +|- offs address.
+ * Alias name of args:
+ * NAME=FETCHARG : set NAME as alias of FETCHARG.
+ * Type of args:
+ * FETCHARG:TYPE : use TYPE instead of unsigned long.
+ */
+ struct trace_fprobe *tf = NULL;
+ int i, len, new_argc = 0, ret = 0;
+ bool is_return = false;
+ char *symbol = NULL, *tmp = NULL;
+ const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
+ const char **new_argv = NULL;
+ int maxactive = 0;
+ char buf[MAX_EVENT_NAME_LEN];
+ char gbuf[MAX_EVENT_NAME_LEN];
+ char sbuf[KSYM_NAME_LEN];
+ char abuf[MAX_BTF_ARGS_LEN];
+ bool is_tracepoint = false;
+ struct tracepoint *tpoint = NULL;
+ struct traceprobe_parse_context ctx = {
+ .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
+ };
+
+ if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
+ return -ECANCELED;
+
+ if (argv[0][0] == 't') {
+ is_tracepoint = true;
+ group = TRACEPOINT_EVENT_SYSTEM;
+ }
+
+ trace_probe_log_init("trace_fprobe", argc, argv);
+
+ event = strchr(&argv[0][1], ':');
+ if (event)
+ event++;
+
+ if (isdigit(argv[0][1])) {
+ if (event)
+ len = event - &argv[0][1] - 1;
+ else
+ len = strlen(&argv[0][1]);
+ if (len > MAX_EVENT_NAME_LEN - 1) {
+ trace_probe_log_err(1, BAD_MAXACT);
+ goto parse_error;
+ }
+ memcpy(buf, &argv[0][1], len);
+ buf[len] = '\0';
+ ret = kstrtouint(buf, 0, &maxactive);
+ if (ret || !maxactive) {
+ trace_probe_log_err(1, BAD_MAXACT);
+ goto parse_error;
+ }
+ /* fprobe rethook instances are iterated over via a list. The
+ * maximum should stay reasonable.
+ */
+ if (maxactive > RETHOOK_MAXACTIVE_MAX) {
+ trace_probe_log_err(1, MAXACT_TOO_BIG);
+ goto parse_error;
+ }
+ }
+
+ trace_probe_log_set_index(1);
+
+ /* a symbol(or tracepoint) must be specified */
+ symbol = kstrdup(argv[1], GFP_KERNEL);
+ if (!symbol)
+ return -ENOMEM;
+
+ tmp = strchr(symbol, '%');
+ if (tmp) {
+ if (!is_tracepoint && !strcmp(tmp, "%return")) {
+ *tmp = '\0';
+ is_return = true;
+ } else {
+ trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
+ goto parse_error;
+ }
+ }
+ if (!is_return && maxactive) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(1, BAD_MAXACT_TYPE);
+ goto parse_error;
+ }
+
+ trace_probe_log_set_index(0);
+ if (event) {
+ ret = traceprobe_parse_event_name(&event, &group, gbuf,
+ event - argv[0]);
+ if (ret)
+ goto parse_error;
+ }
+
+ if (!event) {
+ /* Make a new event name */
+ if (is_tracepoint)
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s",
+ isdigit(*symbol) ? "_" : "", symbol);
+ else
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
+ is_return ? "exit" : "entry");
+ sanitize_event_name(buf);
+ event = buf;
+ }
+
+ if (is_return)
+ ctx.flags |= TPARG_FL_RETURN;
+ else
+ ctx.flags |= TPARG_FL_FENTRY;
+
+ if (is_tracepoint) {
+ ctx.flags |= TPARG_FL_TPOINT;
+ tpoint = find_tracepoint(symbol);
+ if (!tpoint) {
+ trace_probe_log_set_index(1);
+ trace_probe_log_err(0, NO_TRACEPOINT);
+ goto parse_error;
+ }
+ ctx.funcname = kallsyms_lookup(
+ (unsigned long)tpoint->probestub,
+ NULL, NULL, NULL, sbuf);
+ } else
+ ctx.funcname = symbol;
+
+ argc -= 2; argv += 2;
+ new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
+ abuf, MAX_BTF_ARGS_LEN, &ctx);
+ if (IS_ERR(new_argv)) {
+ ret = PTR_ERR(new_argv);
+ new_argv = NULL;
+ goto out;
+ }
+ if (new_argv) {
+ argc = new_argc;
+ argv = new_argv;
+ }
+
+ /* setup a probe */
+ tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
+ argc, is_return);
+ if (IS_ERR(tf)) {
+ ret = PTR_ERR(tf);
+ /* This must return -ENOMEM, else there is a bug */
+ WARN_ON_ONCE(ret != -ENOMEM);
+ goto out; /* We know tf is not allocated */
+ }
+
+ if (is_tracepoint)
+ tf->mod = __module_text_address(
+ (unsigned long)tf->tpoint->probestub);
+
+ /* parse arguments */
+ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ trace_probe_log_set_index(i + 2);
+ ctx.offset = 0;
+ ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
+ if (ret)
+ goto error; /* This can be -ENOMEM */
+ }
+
+ ret = traceprobe_set_print_fmt(&tf->tp,
+ is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
+ if (ret < 0)
+ goto error;
+
+ ret = register_trace_fprobe(tf);
+ if (ret) {
+ trace_probe_log_set_index(1);
+ if (ret == -EILSEQ)
+ trace_probe_log_err(0, BAD_INSN_BNDRY);
+ else if (ret == -ENOENT)
+ trace_probe_log_err(0, BAD_PROBE_ADDR);
+ else if (ret != -ENOMEM && ret != -EEXIST)
+ trace_probe_log_err(0, FAIL_REG_PROBE);
+ goto error;
+ }
+
+out:
+ trace_probe_log_clear();
+ kfree(new_argv);
+ kfree(symbol);
+ return ret;
+
+parse_error:
+ ret = -EINVAL;
+error:
+ free_trace_fprobe(tf);
+ goto out;
+}
+
+static int trace_fprobe_create(const char *raw_command)
+{
+ return trace_probe_create(raw_command, __trace_fprobe_create);
+}
+
+static int trace_fprobe_release(struct dyn_event *ev)
+{
+ struct trace_fprobe *tf = to_trace_fprobe(ev);
+ int ret = unregister_trace_fprobe(tf);
+
+ if (!ret)
+ free_trace_fprobe(tf);
+ return ret;
+}
+
+static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
+{
+ struct trace_fprobe *tf = to_trace_fprobe(ev);
+ int i;
+
+ if (trace_fprobe_is_tracepoint(tf))
+ seq_putc(m, 't');
+ else
+ seq_putc(m, 'f');
+ if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
+ seq_printf(m, "%d", tf->fp.nr_maxactive);
+ seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
+ trace_probe_name(&tf->tp));
+
+ seq_printf(m, " %s%s", trace_fprobe_symbol(tf),
+ trace_fprobe_is_return(tf) ? "%return" : "");
+
+ for (i = 0; i < tf->tp.nr_args; i++)
+ seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+/*
+ * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
+ */
+static int fprobe_register(struct trace_event_call *event,
+ enum trace_reg type, void *data)
+{
+ struct trace_event_file *file = data;
+
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return enable_trace_fprobe(event, file);
+ case TRACE_REG_UNREGISTER:
+ return disable_trace_fprobe(event, file);
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return enable_trace_fprobe(event, NULL);
+ case TRACE_REG_PERF_UNREGISTER:
+ return disable_trace_fprobe(event, NULL);
+ case TRACE_REG_PERF_OPEN:
+ case TRACE_REG_PERF_CLOSE:
+ case TRACE_REG_PERF_ADD:
+ case TRACE_REG_PERF_DEL:
+ return 0;
+#endif
+ }
+ return 0;
+}
+
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup fprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int init_fprobe_trace_early(void)
+{
+ int ret;
+
+ ret = dyn_event_register(&trace_fprobe_ops);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_MODULES
+ ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
+ if (ret)
+ return ret;
+#endif
+
+ return 0;
+}
+core_initcall(init_fprobe_trace_early);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 203204cadf92..c35fbaab2a47 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -58,6 +58,12 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
/* Display function name after trailing } */
{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+ /* Display function return value ? */
+ { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
+ /* Display function return value in hexadecimal format ? */
+ { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
+#endif
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
@@ -619,6 +625,56 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
trace_seq_puts(s, "| ");
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+
+#define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
+
+static void print_graph_retval(struct trace_seq *s, unsigned long retval,
+ bool leaf, void *func, bool hex_format)
+{
+ unsigned long err_code = 0;
+
+ if (retval == 0 || hex_format)
+ goto done;
+
+ /* Check if the return value matches the negative format */
+ if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
+ (((u64)retval) >> 32) == 0) {
+ /* sign extension */
+ err_code = (unsigned long)(s32)retval;
+ } else {
+ err_code = retval;
+ }
+
+ if (!IS_ERR_VALUE(err_code))
+ err_code = 0;
+
+done:
+ if (leaf) {
+ if (hex_format || (err_code == 0))
+ trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
+ func, retval);
+ else
+ trace_seq_printf(s, "%ps(); /* = %ld */\n",
+ func, err_code);
+ } else {
+ if (hex_format || (err_code == 0))
+ trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
+ func, retval);
+ else
+ trace_seq_printf(s, "} /* %ps = %ld */\n",
+ func, err_code);
+ }
+}
+
+#else
+
+#define __TRACE_GRAPH_PRINT_RETVAL 0
+
+#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
+
+#endif
+
/* Case of a leaf function on its call entry */
static enum print_line_t
print_graph_entry_leaf(struct trace_iterator *iter,
@@ -663,7 +719,15 @@ print_graph_entry_leaf(struct trace_iterator *iter,
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
trace_seq_putc(s, ' ');
- trace_seq_printf(s, "%ps();\n", (void *)call->func);
+ /*
+ * Write out the function return value if the option function-retval is
+ * enabled.
+ */
+ if (flags & __TRACE_GRAPH_PRINT_RETVAL)
+ print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
+ !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+ else
+ trace_seq_printf(s, "%ps();\n", (void *)call->func);
print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -942,16 +1006,25 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
trace_seq_putc(s, ' ');
/*
- * If the return function does not have a matching entry,
- * then the entry was lost. Instead of just printing
- * the '}' and letting the user guess what function this
- * belongs to, write out the function name. Always do
- * that if the funcgraph-tail option is enabled.
+ * Always write out the function name and its return value if the
+ * function-retval option is enabled.
*/
- if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
- trace_seq_puts(s, "}\n");
- else
- trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+ if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
+ print_graph_retval(s, trace->retval, false, (void *)trace->func,
+ !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+ } else {
+ /*
+ * If the return function does not have a matching entry,
+ * then the entry was lost. Instead of just printing
+ * the '}' and letting the user guess what function this
+ * belongs to, write out the function name. Always do
+ * that if the funcgraph-tail option is enabled.
+ */
+ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
+ trace_seq_puts(s, "}\n");
+ else
+ trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+ }
/* Overrun */
if (flags & TRACE_GRAPH_PRINT_OVERRUN)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1b3fa7b854aa..23dba01831f7 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -732,9 +732,10 @@ static int __trace_kprobe_create(int argc, const char *argv[])
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_kprobe *tk = NULL;
- int i, len, ret = 0;
+ int i, len, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL, *tmp = NULL;
+ const char **new_argv = NULL;
const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
enum probe_print_type ptype;
int maxactive = 0;
@@ -742,7 +743,8 @@ static int __trace_kprobe_create(int argc, const char *argv[])
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
- unsigned int flags = TPARG_FL_KERNEL;
+ char abuf[MAX_BTF_ARGS_LEN];
+ struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
switch (argv[0][0]) {
case 'r':
@@ -764,7 +766,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
if (isdigit(argv[0][1])) {
if (!is_return) {
- trace_probe_log_err(1, MAXACT_NO_KPROBE);
+ trace_probe_log_err(1, BAD_MAXACT_TYPE);
goto parse_error;
}
if (event)
@@ -823,10 +825,10 @@ static int __trace_kprobe_create(int argc, const char *argv[])
goto parse_error;
}
if (is_return)
- flags |= TPARG_FL_RETURN;
+ ctx.flags |= TPARG_FL_RETURN;
ret = kprobe_on_func_entry(NULL, symbol, offset);
- if (ret == 0)
- flags |= TPARG_FL_FENTRY;
+ if (ret == 0 && !is_return)
+ ctx.flags |= TPARG_FL_FENTRY;
/* Defer the ENOENT case until register kprobe */
if (ret == -EINVAL && is_return) {
trace_probe_log_err(0, BAD_RETPROBE);
@@ -854,21 +856,35 @@ static int __trace_kprobe_create(int argc, const char *argv[])
event = buf;
}
+ argc -= 2; argv += 2;
+ ctx.funcname = symbol;
+ new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
+ abuf, MAX_BTF_ARGS_LEN, &ctx);
+ if (IS_ERR(new_argv)) {
+ ret = PTR_ERR(new_argv);
+ new_argv = NULL;
+ goto out;
+ }
+ if (new_argv) {
+ argc = new_argc;
+ argv = new_argv;
+ }
+
/* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
- argc - 2, is_return);
+ argc, is_return);
if (IS_ERR(tk)) {
ret = PTR_ERR(tk);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
goto out; /* We know tk is not allocated */
}
- argc -= 2; argv += 2;
/* parse arguments */
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
trace_probe_log_set_index(i + 2);
- ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], flags);
+ ctx.offset = 0;
+ ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], &ctx);
if (ret)
goto error; /* This can be -ENOMEM */
}
@@ -892,6 +908,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
out:
trace_probe_log_clear();
+ kfree(new_argv);
kfree(symbol);
return ret;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e97e3fa5cbed..bd0d01d00fb9 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -181,6 +181,7 @@ struct osn_irq {
#define IRQ_CONTEXT 0
#define THREAD_CONTEXT 1
+#define THREAD_URET 2
/*
* sofirq runtime info.
*/
@@ -238,6 +239,7 @@ struct timerlat_variables {
u64 abs_period;
bool tracing_thread;
u64 count;
+ bool uthread_migrate;
};
static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
@@ -1181,6 +1183,78 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
osn_var->thread.arrival_time = 0;
}
+#ifdef CONFIG_TIMERLAT_TRACER
+/*
+ * osnoise_stop_exception - Stop tracing and the tracer.
+ */
+static __always_inline void osnoise_stop_exception(char *msg, int cpu)
+{
+ struct osnoise_instance *inst;
+ struct trace_array *tr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ tr = inst->tr;
+ trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
+ "stop tracing hit on cpu %d due to exception: %s\n",
+ smp_processor_id(),
+ msg);
+
+ if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
+ panic("tracer hit on cpu %d due to exception: %s\n",
+ smp_processor_id(),
+ msg);
+
+ tracer_tracing_off(tr);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
+ *
+ * his function is hooked to the sched:sched_migrate_task trace event, and monitors
+ * timerlat user-space thread migration.
+ */
+static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
+{
+ struct osnoise_variables *osn_var;
+ long cpu = task_cpu(p);
+
+ osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
+ if (osn_var->pid == p->pid && dest_cpu != cpu) {
+ per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
+ osnoise_taint("timerlat user-thread migrated\n");
+ osnoise_stop_exception("timerlat user-thread migrated", cpu);
+ }
+}
+
+static int register_migration_monitor(void)
+{
+ int ret = 0;
+
+ /*
+ * Timerlat thread migration check is only required when running timerlat in user-space.
+ * Thus, enable callback only if timerlat is set with no workload.
+ */
+ if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
+ ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
+
+ return ret;
+}
+
+static void unregister_migration_monitor(void)
+{
+ if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
+ unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
+}
+#else
+static int register_migration_monitor(void)
+{
+ return 0;
+}
+static void unregister_migration_monitor(void) {}
+#endif
/*
* trace_sched_switch - sched:sched_switch trace event handler
*
@@ -1204,7 +1278,7 @@ trace_sched_switch_callback(void *data, bool preempt,
}
/*
- * hook_thread_events - Hook the insturmentation for thread noise
+ * hook_thread_events - Hook the instrumentation for thread noise
*
* Hook the osnoise tracer callbacks to handle the noise from other
* threads on the necessary kernel events.
@@ -1217,11 +1291,19 @@ static int hook_thread_events(void)
if (ret)
return -EINVAL;
+ ret = register_migration_monitor();
+ if (ret)
+ goto out_unreg;
+
return 0;
+
+out_unreg:
+ unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
+ return -EINVAL;
}
/*
- * unhook_thread_events - *nhook the insturmentation for thread noise
+ * unhook_thread_events - unhook the instrumentation for thread noise
*
* Unook the osnoise tracer callbacks to handle the noise from other
* threads on the necessary kernel events.
@@ -1229,6 +1311,7 @@ static int hook_thread_events(void)
static void unhook_thread_events(void)
{
unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
+ unregister_migration_monitor();
}
/*
@@ -1286,6 +1369,22 @@ static __always_inline void osnoise_stop_tracing(void)
}
/*
+ * osnoise_has_tracing_on - Check if there is at least one instance on
+ */
+static __always_inline int osnoise_has_tracing_on(void)
+{
+ struct osnoise_instance *inst;
+ int trace_is_on = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list)
+ trace_is_on += tracer_tracing_is_on(inst->tr);
+ rcu_read_unlock();
+
+ return trace_is_on;
+}
+
+/*
* notify_new_max_latency - Notify a new max latency via fsnotify interface.
*/
static void notify_new_max_latency(u64 latency)
@@ -1517,13 +1616,16 @@ static struct cpumask save_cpumask;
/*
* osnoise_sleep - sleep until the next period
*/
-static void osnoise_sleep(void)
+static void osnoise_sleep(bool skip_period)
{
u64 interval;
ktime_t wake_time;
mutex_lock(&interface_lock);
- interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
+ if (skip_period)
+ interval = osnoise_data.sample_period;
+ else
+ interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
mutex_unlock(&interface_lock);
/*
@@ -1546,6 +1648,39 @@ static void osnoise_sleep(void)
}
/*
+ * osnoise_migration_pending - checks if the task needs to migrate
+ *
+ * osnoise/timerlat threads are per-cpu. If there is a pending request to
+ * migrate the thread away from the current CPU, something bad has happened.
+ * Play the good citizen and leave.
+ *
+ * Returns 0 if it is safe to continue, 1 otherwise.
+ */
+static inline int osnoise_migration_pending(void)
+{
+ if (!current->migration_pending)
+ return 0;
+
+ /*
+ * If migration is pending, there is a task waiting for the
+ * tracer to enable migration. The tracer does not allow migration,
+ * thus: taint and leave to unblock the blocked thread.
+ */
+ osnoise_taint("migration requested to osnoise threads, leaving.");
+
+ /*
+ * Unset this thread from the threads managed by the interface.
+ * The tracers are responsible for cleaning their env before
+ * exiting.
+ */
+ mutex_lock(&interface_lock);
+ this_cpu_osn_var()->kthread = NULL;
+ mutex_unlock(&interface_lock);
+
+ return 1;
+}
+
+/*
* osnoise_main - The osnoise detection kernel thread
*
* Calls run_osnoise() function to measure the osnoise for the configured runtime,
@@ -1553,12 +1688,35 @@ static void osnoise_sleep(void)
*/
static int osnoise_main(void *data)
{
+ unsigned long flags;
+
+ /*
+ * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
+ * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
+ *
+ * To work around this limitation, disable migration and remove the
+ * flag.
+ */
+ migrate_disable();
+ raw_spin_lock_irqsave(&current->pi_lock, flags);
+ current->flags &= ~(PF_NO_SETAFFINITY);
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
while (!kthread_should_stop()) {
+ if (osnoise_migration_pending())
+ break;
+
+ /* skip a period if tracing is off on all instances */
+ if (!osnoise_has_tracing_on()) {
+ osnoise_sleep(true);
+ continue;
+ }
+
run_osnoise();
- osnoise_sleep();
+ osnoise_sleep(false);
}
+ migrate_enable();
return 0;
}
@@ -1706,6 +1864,7 @@ static int timerlat_main(void *data)
struct timerlat_variables *tlat = this_cpu_tmr_var();
struct timerlat_sample s;
struct sched_param sp;
+ unsigned long flags;
u64 now, diff;
/*
@@ -1714,6 +1873,18 @@ static int timerlat_main(void *data)
sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+ /*
+ * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
+ * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
+ *
+ * To work around this limitation, disable migration and remove the
+ * flag.
+ */
+ migrate_disable();
+ raw_spin_lock_irqsave(&current->pi_lock, flags);
+ current->flags &= ~(PF_NO_SETAFFINITY);
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+
tlat->count = 0;
tlat->tracing_thread = false;
@@ -1731,6 +1902,7 @@ static int timerlat_main(void *data)
osn_var->sampling = 1;
while (!kthread_should_stop()) {
+
now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
diff = now - tlat->abs_period;
@@ -1749,10 +1921,14 @@ static int timerlat_main(void *data)
if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
osnoise_stop_tracing();
+ if (osnoise_migration_pending())
+ break;
+
wait_next_period(tlat);
}
hrtimer_cancel(&tlat->timer);
+ migrate_enable();
return 0;
}
#else /* CONFIG_TIMERLAT_TRACER */
@@ -1771,10 +1947,24 @@ static void stop_kthread(unsigned int cpu)
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
if (kthread) {
- kthread_stop(kthread);
+ if (test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ kthread_stop(kthread);
+ } else {
+ /*
+ * This is a user thread waiting on the timerlat_fd. We need
+ * to close all users, and the best way to guarantee this is
+ * by killing the thread. NOTE: this is a purpose specific file.
+ */
+ kill_pid(kthread->thread_pid, SIGKILL, 1);
+ put_task_struct(kthread);
+ }
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
} else {
+ /* if no workload, just return */
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ /*
+ * This is set in the osnoise tracer case.
+ */
per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
barrier();
return;
@@ -1819,7 +2009,6 @@ static int start_kthread(unsigned int cpu)
barrier();
return 0;
}
-
snprintf(comm, 24, "osnoise/%d", cpu);
}
@@ -1848,6 +2037,11 @@ static int start_per_cpu_kthreads(void)
int retval = 0;
int cpu;
+ if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ if (timerlat_enabled())
+ return 0;
+ }
+
cpus_read_lock();
/*
* Run only on online CPUs in which osnoise is allowed to run.
@@ -2188,6 +2382,223 @@ err_free:
return err;
}
+#ifdef CONFIG_TIMERLAT_TRACER
+static int timerlat_fd_open(struct inode *inode, struct file *file)
+{
+ struct osnoise_variables *osn_var;
+ struct timerlat_variables *tlat;
+ long cpu = (long) inode->i_cdev;
+
+ mutex_lock(&interface_lock);
+
+ /*
+ * This file is accessible only if timerlat is enabled, and
+ * NO_OSNOISE_WORKLOAD is set.
+ */
+ if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ mutex_unlock(&interface_lock);
+ return -EINVAL;
+ }
+
+ migrate_disable();
+
+ osn_var = this_cpu_osn_var();
+
+ /*
+ * The osn_var->pid holds the single access to this file.
+ */
+ if (osn_var->pid) {
+ mutex_unlock(&interface_lock);
+ migrate_enable();
+ return -EBUSY;
+ }
+
+ /*
+ * timerlat tracer is a per-cpu tracer. Check if the user-space too
+ * is pinned to a single CPU. The tracer laters monitor if the task
+ * migrates and then disables tracer if it does. However, it is
+ * worth doing this basic acceptance test to avoid obviusly wrong
+ * setup.
+ */
+ if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
+ mutex_unlock(&interface_lock);
+ migrate_enable();
+ return -EPERM;
+ }
+
+ /*
+ * From now on, it is good to go.
+ */
+ file->private_data = inode->i_cdev;
+
+ get_task_struct(current);
+
+ osn_var->kthread = current;
+ osn_var->pid = current->pid;
+
+ /*
+ * Setup is done.
+ */
+ mutex_unlock(&interface_lock);
+
+ tlat = this_cpu_tmr_var();
+ tlat->count = 0;
+
+ migrate_enable();
+ return 0;
+};
+
+/*
+ * timerlat_fd_read - Read function for "timerlat_fd" file
+ * @file: The active open file structure
+ * @ubuf: The userspace provided buffer to read value into
+ * @cnt: The maximum number of bytes to read
+ * @ppos: The current "file" position
+ *
+ * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
+ */
+static ssize_t
+timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
+ loff_t *ppos)
+{
+ long cpu = (long) file->private_data;
+ struct osnoise_variables *osn_var;
+ struct timerlat_variables *tlat;
+ struct timerlat_sample s;
+ s64 diff;
+ u64 now;
+
+ migrate_disable();
+
+ tlat = this_cpu_tmr_var();
+
+ /*
+ * While in user-space, the thread is migratable. There is nothing
+ * we can do about it.
+ * So, if the thread is running on another CPU, stop the machinery.
+ */
+ if (cpu == smp_processor_id()) {
+ if (tlat->uthread_migrate) {
+ migrate_enable();
+ return -EINVAL;
+ }
+ } else {
+ per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
+ osnoise_taint("timerlat user thread migrate\n");
+ osnoise_stop_tracing();
+ migrate_enable();
+ return -EINVAL;
+ }
+
+ osn_var = this_cpu_osn_var();
+
+ /*
+ * The timerlat in user-space runs in a different order:
+ * the read() starts from the execution of the previous occurrence,
+ * sleeping for the next occurrence.
+ *
+ * So, skip if we are entering on read() before the first wakeup
+ * from timerlat IRQ:
+ */
+ if (likely(osn_var->sampling)) {
+ now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
+ diff = now - tlat->abs_period;
+
+ /*
+ * it was not a timer firing, but some other signal?
+ */
+ if (diff < 0)
+ goto out;
+
+ s.seqnum = tlat->count;
+ s.timer_latency = diff;
+ s.context = THREAD_URET;
+
+ trace_timerlat_sample(&s);
+
+ notify_new_max_latency(diff);
+
+ tlat->tracing_thread = false;
+ if (osnoise_data.stop_tracing_total)
+ if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
+ osnoise_stop_tracing();
+ } else {
+ tlat->tracing_thread = false;
+ tlat->kthread = current;
+
+ hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
+ tlat->timer.function = timerlat_irq;
+
+ /* Annotate now to drift new period */
+ tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
+
+ osn_var->sampling = 1;
+ }
+
+ /* wait for the next period */
+ wait_next_period(tlat);
+
+ /* This is the wakeup from this cycle */
+ now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
+ diff = now - tlat->abs_period;
+
+ /*
+ * it was not a timer firing, but some other signal?
+ */
+ if (diff < 0)
+ goto out;
+
+ s.seqnum = tlat->count;
+ s.timer_latency = diff;
+ s.context = THREAD_CONTEXT;
+
+ trace_timerlat_sample(&s);
+
+ if (osnoise_data.stop_tracing_total) {
+ if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
+ timerlat_dump_stack(time_to_us(diff));
+ notify_new_max_latency(diff);
+ osnoise_stop_tracing();
+ }
+ }
+
+out:
+ migrate_enable();
+ return 0;
+}
+
+static int timerlat_fd_release(struct inode *inode, struct file *file)
+{
+ struct osnoise_variables *osn_var;
+ struct timerlat_variables *tlat_var;
+ long cpu = (long) file->private_data;
+
+ migrate_disable();
+ mutex_lock(&interface_lock);
+
+ osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
+ tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
+
+ hrtimer_cancel(&tlat_var->timer);
+ memset(tlat_var, 0, sizeof(*tlat_var));
+
+ osn_var->sampling = 0;
+ osn_var->pid = 0;
+
+ /*
+ * We are leaving, not being stopped... see stop_kthread();
+ */
+ if (osn_var->kthread) {
+ put_task_struct(osn_var->kthread);
+ osn_var->kthread = NULL;
+ }
+
+ mutex_unlock(&interface_lock);
+ migrate_enable();
+ return 0;
+}
+#endif
+
/*
* osnoise/runtime_us: cannot be greater than the period.
*/
@@ -2251,6 +2662,13 @@ static struct trace_min_max_param timerlat_period = {
.max = &timerlat_max_period,
.min = &timerlat_min_period,
};
+
+static const struct file_operations timerlat_fd_fops = {
+ .open = timerlat_fd_open,
+ .read = timerlat_fd_read,
+ .release = timerlat_fd_release,
+ .llseek = generic_file_llseek,
+};
#endif
static const struct file_operations cpus_fops = {
@@ -2288,18 +2706,63 @@ static int init_timerlat_stack_tracefs(struct dentry *top_dir)
}
#endif /* CONFIG_STACKTRACE */
+static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
+{
+ struct dentry *timerlat_fd;
+ struct dentry *per_cpu;
+ struct dentry *cpu_dir;
+ char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
+ long cpu;
+
+ /*
+ * Why not using tracing instance per_cpu/ dir?
+ *
+ * Because osnoise/timerlat have a single workload, having
+ * multiple files like these are wast of memory.
+ */
+ per_cpu = tracefs_create_dir("per_cpu", top_dir);
+ if (!per_cpu)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ snprintf(cpu_str, 30, "cpu%ld", cpu);
+ cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
+ if (!cpu_dir)
+ goto out_clean;
+
+ timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
+ cpu_dir, NULL, &timerlat_fd_fops);
+ if (!timerlat_fd)
+ goto out_clean;
+
+ /* Record the CPU */
+ d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
+ }
+
+ return 0;
+
+out_clean:
+ tracefs_remove(per_cpu);
+ return -ENOMEM;
+}
+
/*
* init_timerlat_tracefs - A function to initialize the timerlat interface files
*/
static int init_timerlat_tracefs(struct dentry *top_dir)
{
struct dentry *tmp;
+ int retval;
tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
&timerlat_period, &trace_min_max_fops);
if (!tmp)
return -ENOMEM;
+ retval = osnoise_create_cpu_timerlat_fd(top_dir);
+ if (retval)
+ return retval;
+
return init_timerlat_stack_tracefs(top_dir);
}
#else /* CONFIG_TIMERLAT_TRACER */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 1e33f367783e..db575094c498 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1446,6 +1446,8 @@ static struct trace_event trace_osnoise_event = {
};
/* TRACE_TIMERLAT */
+
+static char *timerlat_lat_context[] = {"irq", "thread", "user-ret"};
static enum print_line_t
trace_timerlat_print(struct trace_iterator *iter, int flags,
struct trace_event *event)
@@ -1458,7 +1460,7 @@ trace_timerlat_print(struct trace_iterator *iter, int flags,
trace_seq_printf(s, "#%-5u context %6s timer_latency %9llu ns\n",
field->seqnum,
- field->context ? "thread" : "irq",
+ timerlat_lat_context[field->context],
field->timer_latency);
return trace_handle_return(s);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 73055ba8d8ef..7ba371da0926 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -11,6 +11,8 @@
*/
#define pr_fmt(fmt) "trace_probe: " fmt
+#include <linux/bpf.h>
+
#include "trace_probe.h"
#undef C
@@ -283,69 +285,354 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
return 0;
}
+static int parse_trace_event_arg(char *arg, struct fetch_insn *code,
+ struct traceprobe_parse_context *ctx)
+{
+ struct ftrace_event_field *field;
+ struct list_head *head;
+
+ head = trace_get_fields(ctx->event);
+ list_for_each_entry(field, head, link) {
+ if (!strcmp(arg, field->name)) {
+ code->op = FETCH_OP_TP_ARG;
+ code->data = field;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
+
+static struct btf *traceprobe_get_btf(void)
+{
+ struct btf *btf = bpf_get_btf_vmlinux();
+
+ if (IS_ERR_OR_NULL(btf))
+ return NULL;
+
+ return btf;
+}
+
+static u32 btf_type_int(const struct btf_type *t)
+{
+ return *(u32 *)(t + 1);
+}
+
+static const char *type_from_btf_id(struct btf *btf, s32 id)
+{
+ const struct btf_type *t;
+ u32 intdata;
+ s32 tid;
+
+ /* TODO: const char * could be converted as a string */
+ t = btf_type_skip_modifiers(btf, id, &tid);
+
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_ENUM:
+ /* enum is "int", so convert to "s32" */
+ return "s32";
+ case BTF_KIND_ENUM64:
+ return "s64";
+ case BTF_KIND_PTR:
+ /* pointer will be converted to "x??" */
+ if (IS_ENABLED(CONFIG_64BIT))
+ return "x64";
+ else
+ return "x32";
+ case BTF_KIND_INT:
+ intdata = btf_type_int(t);
+ if (BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED) {
+ switch (BTF_INT_BITS(intdata)) {
+ case 8:
+ return "s8";
+ case 16:
+ return "s16";
+ case 32:
+ return "s32";
+ case 64:
+ return "s64";
+ }
+ } else { /* unsigned */
+ switch (BTF_INT_BITS(intdata)) {
+ case 8:
+ return "u8";
+ case 16:
+ return "u16";
+ case 32:
+ return "u32";
+ case 64:
+ return "u64";
+ }
+ }
+ }
+ /* TODO: support other types */
+
+ return NULL;
+}
+
+static const struct btf_type *find_btf_func_proto(const char *funcname)
+{
+ struct btf *btf = traceprobe_get_btf();
+ const struct btf_type *t;
+ s32 id;
+
+ if (!btf || !funcname)
+ return ERR_PTR(-EINVAL);
+
+ id = btf_find_by_name_kind(btf, funcname, BTF_KIND_FUNC);
+ if (id <= 0)
+ return ERR_PTR(-ENOENT);
+
+ /* Get BTF_KIND_FUNC type */
+ t = btf_type_by_id(btf, id);
+ if (!btf_type_is_func(t))
+ return ERR_PTR(-ENOENT);
+
+ /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return ERR_PTR(-ENOENT);
+
+ return t;
+}
+
+static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr,
+ bool tracepoint)
+{
+ const struct btf_param *param;
+ const struct btf_type *t;
+
+ if (!funcname || !nr)
+ return ERR_PTR(-EINVAL);
+
+ t = find_btf_func_proto(funcname);
+ if (IS_ERR(t))
+ return (const struct btf_param *)t;
+
+ *nr = btf_type_vlen(t);
+ param = (const struct btf_param *)(t + 1);
+
+ /* Hide the first 'data' argument of tracepoint */
+ if (tracepoint) {
+ (*nr)--;
+ param++;
+ }
+
+ if (*nr > 0)
+ return param;
+ else
+ return NULL;
+}
+
+static int parse_btf_arg(const char *varname, struct fetch_insn *code,
+ struct traceprobe_parse_context *ctx)
+{
+ struct btf *btf = traceprobe_get_btf();
+ const struct btf_param *params;
+ int i;
+
+ if (!btf) {
+ trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ return -EOPNOTSUPP;
+ }
+
+ if (WARN_ON_ONCE(!ctx->funcname))
+ return -EINVAL;
+
+ if (!ctx->params) {
+ params = find_btf_func_param(ctx->funcname, &ctx->nr_params,
+ ctx->flags & TPARG_FL_TPOINT);
+ if (IS_ERR(params)) {
+ trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
+ return PTR_ERR(params);
+ }
+ ctx->params = params;
+ } else
+ params = ctx->params;
+
+ for (i = 0; i < ctx->nr_params; i++) {
+ const char *name = btf_name_by_offset(btf, params[i].name_off);
+
+ if (name && !strcmp(name, varname)) {
+ code->op = FETCH_OP_ARG;
+ if (ctx->flags & TPARG_FL_TPOINT)
+ code->param = i + 1;
+ else
+ code->param = i;
+ return 0;
+ }
+ }
+ trace_probe_log_err(ctx->offset, NO_BTFARG);
+ return -ENOENT;
+}
+
+static const struct fetch_type *parse_btf_arg_type(int arg_idx,
+ struct traceprobe_parse_context *ctx)
+{
+ struct btf *btf = traceprobe_get_btf();
+ const char *typestr = NULL;
+
+ if (btf && ctx->params) {
+ if (ctx->flags & TPARG_FL_TPOINT)
+ arg_idx--;
+ typestr = type_from_btf_id(btf, ctx->params[arg_idx].type);
+ }
+
+ return find_fetch_type(typestr, ctx->flags);
+}
+
+static const struct fetch_type *parse_btf_retval_type(
+ struct traceprobe_parse_context *ctx)
+{
+ struct btf *btf = traceprobe_get_btf();
+ const char *typestr = NULL;
+ const struct btf_type *t;
+
+ if (btf && ctx->funcname) {
+ t = find_btf_func_proto(ctx->funcname);
+ if (!IS_ERR(t))
+ typestr = type_from_btf_id(btf, t->type);
+ }
+
+ return find_fetch_type(typestr, ctx->flags);
+}
+
+static bool is_btf_retval_void(const char *funcname)
+{
+ const struct btf_type *t;
+
+ t = find_btf_func_proto(funcname);
+ if (IS_ERR(t))
+ return false;
+
+ return t->type == 0;
+}
+#else
+static struct btf *traceprobe_get_btf(void)
+{
+ return NULL;
+}
+
+static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr,
+ bool tracepoint)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int parse_btf_arg(const char *varname, struct fetch_insn *code,
+ struct traceprobe_parse_context *ctx)
+{
+ trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ return -EOPNOTSUPP;
+}
+
+#define parse_btf_arg_type(idx, ctx) \
+ find_fetch_type(NULL, ctx->flags)
+
+#define parse_btf_retval_type(ctx) \
+ find_fetch_type(NULL, ctx->flags)
+
+#define is_btf_retval_void(funcname) (false)
+
+#endif
+
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
static int parse_probe_vars(char *arg, const struct fetch_type *t,
- struct fetch_insn *code, unsigned int flags, int offs)
+ struct fetch_insn *code,
+ struct traceprobe_parse_context *ctx)
{
unsigned long param;
+ int err = TP_ERR_BAD_VAR;
int ret = 0;
int len;
- if (flags & TPARG_FL_TPOINT) {
+ if (ctx->flags & TPARG_FL_TEVENT) {
if (code->data)
return -EFAULT;
- code->data = kstrdup(arg, GFP_KERNEL);
- if (!code->data)
- return -ENOMEM;
- code->op = FETCH_OP_TP_ARG;
- } else if (strcmp(arg, "retval") == 0) {
- if (flags & TPARG_FL_RETURN) {
+ ret = parse_trace_event_arg(arg, code, ctx);
+ if (!ret)
+ return 0;
+ if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
+ code->op = FETCH_OP_COMM;
+ return 0;
+ }
+ /* backward compatibility */
+ ctx->offset = 0;
+ goto inval;
+ }
+
+ if (strcmp(arg, "retval") == 0) {
+ if (ctx->flags & TPARG_FL_RETURN) {
+ if ((ctx->flags & TPARG_FL_KERNEL) &&
+ is_btf_retval_void(ctx->funcname)) {
+ err = TP_ERR_NO_RETVAL;
+ goto inval;
+ }
code->op = FETCH_OP_RETVAL;
- } else {
- trace_probe_log_err(offs, RETVAL_ON_PROBE);
- ret = -EINVAL;
+ return 0;
}
- } else if ((len = str_has_prefix(arg, "stack"))) {
+ err = TP_ERR_RETVAL_ON_PROBE;
+ goto inval;
+ }
+
+ len = str_has_prefix(arg, "stack");
+ if (len) {
+
if (arg[len] == '\0') {
code->op = FETCH_OP_STACKP;
- } else if (isdigit(arg[len])) {
+ return 0;
+ }
+
+ if (isdigit(arg[len])) {
ret = kstrtoul(arg + len, 10, &param);
- if (ret) {
- goto inval_var;
- } else if ((flags & TPARG_FL_KERNEL) &&
- param > PARAM_MAX_STACK) {
- trace_probe_log_err(offs, BAD_STACK_NUM);
- ret = -EINVAL;
- } else {
- code->op = FETCH_OP_STACK;
- code->param = (unsigned int)param;
+ if (ret)
+ goto inval;
+
+ if ((ctx->flags & TPARG_FL_KERNEL) &&
+ param > PARAM_MAX_STACK) {
+ err = TP_ERR_BAD_STACK_NUM;
+ goto inval;
}
- } else
- goto inval_var;
- } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
+ code->op = FETCH_OP_STACK;
+ code->param = (unsigned int)param;
+ return 0;
+ }
+ goto inval;
+ }
+
+ if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
code->op = FETCH_OP_COMM;
+ return 0;
+ }
+
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
- } else if (((flags & TPARG_FL_MASK) ==
- (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) &&
- (len = str_has_prefix(arg, "arg"))) {
+ len = str_has_prefix(arg, "arg");
+ if (len && tparg_is_function_entry(ctx->flags)) {
ret = kstrtoul(arg + len, 10, &param);
- if (ret) {
- goto inval_var;
- } else if (!param || param > PARAM_MAX_STACK) {
- trace_probe_log_err(offs, BAD_ARG_NUM);
- return -EINVAL;
+ if (ret)
+ goto inval;
+
+ if (!param || param > PARAM_MAX_STACK) {
+ err = TP_ERR_BAD_ARG_NUM;
+ goto inval;
}
+
code->op = FETCH_OP_ARG;
code->param = (unsigned int)param - 1;
+ /*
+ * The tracepoint probe will probe a stub function, and the
+ * first parameter of the stub is a dummy and should be ignored.
+ */
+ if (ctx->flags & TPARG_FL_TPOINT)
+ code->param++;
+ return 0;
+ }
#endif
- } else
- goto inval_var;
- return ret;
-
-inval_var:
- trace_probe_log_err(offs, BAD_VAR);
+inval:
+ __trace_probe_log_err(ctx->offset, err);
return -EINVAL;
}
@@ -378,7 +665,7 @@ static int __parse_imm_string(char *str, char **pbuf, int offs)
static int
parse_probe_arg(char *arg, const struct fetch_type *type,
struct fetch_insn **pcode, struct fetch_insn *end,
- unsigned int flags, int offs)
+ struct traceprobe_parse_context *ctx)
{
struct fetch_insn *code = *pcode;
unsigned long param;
@@ -389,13 +676,13 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
switch (arg[0]) {
case '$':
- ret = parse_probe_vars(arg + 1, type, code, flags, offs);
+ ret = parse_probe_vars(arg + 1, type, code, ctx);
break;
case '%': /* named register */
- if (flags & TPARG_FL_TPOINT) {
- /* eprobes do not handle registers */
- trace_probe_log_err(offs, BAD_VAR);
+ if (ctx->flags & (TPARG_FL_TEVENT | TPARG_FL_FPROBE)) {
+ /* eprobe and fprobe do not handle registers */
+ trace_probe_log_err(ctx->offset, BAD_VAR);
break;
}
ret = regs_query_register_offset(arg + 1);
@@ -404,14 +691,14 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
code->param = (unsigned int)ret;
ret = 0;
} else
- trace_probe_log_err(offs, BAD_REG_NAME);
+ trace_probe_log_err(ctx->offset, BAD_REG_NAME);
break;
case '@': /* memory, file-offset or symbol */
if (isdigit(arg[1])) {
ret = kstrtoul(arg + 1, 0, &param);
if (ret) {
- trace_probe_log_err(offs, BAD_MEM_ADDR);
+ trace_probe_log_err(ctx->offset, BAD_MEM_ADDR);
break;
}
/* load address */
@@ -419,13 +706,13 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
code->immediate = param;
} else if (arg[1] == '+') {
/* kprobes don't support file offsets */
- if (flags & TPARG_FL_KERNEL) {
- trace_probe_log_err(offs, FILE_ON_KPROBE);
+ if (ctx->flags & TPARG_FL_KERNEL) {
+ trace_probe_log_err(ctx->offset, FILE_ON_KPROBE);
return -EINVAL;
}
ret = kstrtol(arg + 2, 0, &offset);
if (ret) {
- trace_probe_log_err(offs, BAD_FILE_OFFS);
+ trace_probe_log_err(ctx->offset, BAD_FILE_OFFS);
break;
}
@@ -433,8 +720,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
code->immediate = (unsigned long)offset; // imm64?
} else {
/* uprobes don't support symbols */
- if (!(flags & TPARG_FL_KERNEL)) {
- trace_probe_log_err(offs, SYM_ON_UPROBE);
+ if (!(ctx->flags & TPARG_FL_KERNEL)) {
+ trace_probe_log_err(ctx->offset, SYM_ON_UPROBE);
return -EINVAL;
}
/* Preserve symbol for updating */
@@ -443,7 +730,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
if (!code->data)
return -ENOMEM;
if (++code == end) {
- trace_probe_log_err(offs, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
return -EINVAL;
}
code->op = FETCH_OP_IMM;
@@ -451,7 +738,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
}
/* These are fetching from memory */
if (++code == end) {
- trace_probe_log_err(offs, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
return -EINVAL;
}
*pcode = code;
@@ -470,36 +757,38 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
arg++; /* Skip '+', because kstrtol() rejects it. */
tmp = strchr(arg, '(');
if (!tmp) {
- trace_probe_log_err(offs, DEREF_NEED_BRACE);
+ trace_probe_log_err(ctx->offset, DEREF_NEED_BRACE);
return -EINVAL;
}
*tmp = '\0';
ret = kstrtol(arg, 0, &offset);
if (ret) {
- trace_probe_log_err(offs, BAD_DEREF_OFFS);
+ trace_probe_log_err(ctx->offset, BAD_DEREF_OFFS);
break;
}
- offs += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
+ ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
arg = tmp + 1;
tmp = strrchr(arg, ')');
if (!tmp) {
- trace_probe_log_err(offs + strlen(arg),
+ trace_probe_log_err(ctx->offset + strlen(arg),
DEREF_OPEN_BRACE);
return -EINVAL;
} else {
- const struct fetch_type *t2 = find_fetch_type(NULL, flags);
+ const struct fetch_type *t2 = find_fetch_type(NULL, ctx->flags);
+ int cur_offs = ctx->offset;
*tmp = '\0';
- ret = parse_probe_arg(arg, t2, &code, end, flags, offs);
+ ret = parse_probe_arg(arg, t2, &code, end, ctx);
if (ret)
break;
+ ctx->offset = cur_offs;
if (code->op == FETCH_OP_COMM ||
code->op == FETCH_OP_DATA) {
- trace_probe_log_err(offs, COMM_CANT_DEREF);
+ trace_probe_log_err(ctx->offset, COMM_CANT_DEREF);
return -EINVAL;
}
if (++code == end) {
- trace_probe_log_err(offs, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
return -EINVAL;
}
*pcode = code;
@@ -510,7 +799,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
break;
case '\\': /* Immediate value */
if (arg[1] == '"') { /* Immediate string */
- ret = __parse_imm_string(arg + 2, &tmp, offs + 2);
+ ret = __parse_imm_string(arg + 2, &tmp, ctx->offset + 2);
if (ret)
break;
code->op = FETCH_OP_DATA;
@@ -518,15 +807,24 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
} else {
ret = str_to_immediate(arg + 1, &code->immediate);
if (ret)
- trace_probe_log_err(offs + 1, BAD_IMM);
+ trace_probe_log_err(ctx->offset + 1, BAD_IMM);
else
code->op = FETCH_OP_IMM;
}
break;
+ default:
+ if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */
+ if (!tparg_is_function_entry(ctx->flags)) {
+ trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ return -EINVAL;
+ }
+ ret = parse_btf_arg(arg, code, ctx);
+ break;
+ }
}
if (!ret && code->op == FETCH_OP_NOP) {
/* Parsed, but do not find fetch method */
- trace_probe_log_err(offs, BAD_FETCH_ARG);
+ trace_probe_log_err(ctx->offset, BAD_FETCH_ARG);
ret = -EINVAL;
}
return ret;
@@ -571,12 +869,13 @@ static int __parse_bitfield_probe_arg(const char *bf,
/* String length checking wrapper */
static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
- struct probe_arg *parg, unsigned int flags, int offset)
+ struct probe_arg *parg,
+ struct traceprobe_parse_context *ctx)
{
struct fetch_insn *code, *scode, *tmp = NULL;
char *t, *t2, *t3;
- char *arg;
int ret, len;
+ char *arg;
arg = kstrdup(argv, GFP_KERNEL);
if (!arg)
@@ -585,10 +884,10 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
ret = -EINVAL;
len = strlen(arg);
if (len > MAX_ARGSTR_LEN) {
- trace_probe_log_err(offset, ARG_TOO_LONG);
+ trace_probe_log_err(ctx->offset, ARG_TOO_LONG);
goto out;
} else if (len == 0) {
- trace_probe_log_err(offset, NO_ARG_BODY);
+ trace_probe_log_err(ctx->offset, NO_ARG_BODY);
goto out;
}
@@ -606,23 +905,24 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
*t2++ = '\0';
t3 = strchr(t2, ']');
if (!t3) {
- offset += t2 + strlen(t2) - arg;
- trace_probe_log_err(offset,
+ int offs = t2 + strlen(t2) - arg;
+
+ trace_probe_log_err(ctx->offset + offs,
ARRAY_NO_CLOSE);
goto out;
} else if (t3[1] != '\0') {
- trace_probe_log_err(offset + t3 + 1 - arg,
+ trace_probe_log_err(ctx->offset + t3 + 1 - arg,
BAD_ARRAY_SUFFIX);
goto out;
}
*t3 = '\0';
if (kstrtouint(t2, 0, &parg->count) || !parg->count) {
- trace_probe_log_err(offset + t2 - arg,
+ trace_probe_log_err(ctx->offset + t2 - arg,
BAD_ARRAY_NUM);
goto out;
}
if (parg->count > MAX_ARRAY_LEN) {
- trace_probe_log_err(offset + t2 - arg,
+ trace_probe_log_err(ctx->offset + t2 - arg,
ARRAY_TOO_BIG);
goto out;
}
@@ -633,17 +933,17 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
* Since $comm and immediate string can not be dereferenced,
* we can find those by strcmp. But ignore for eprobes.
*/
- if (!(flags & TPARG_FL_TPOINT) &&
+ if (!(ctx->flags & TPARG_FL_TEVENT) &&
(strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
strncmp(arg, "\\\"", 2) == 0)) {
/* The type of $comm must be "string", and not an array. */
if (parg->count || (t && strcmp(t, "string")))
goto out;
- parg->type = find_fetch_type("string", flags);
+ parg->type = find_fetch_type("string", ctx->flags);
} else
- parg->type = find_fetch_type(t, flags);
+ parg->type = find_fetch_type(t, ctx->flags);
if (!parg->type) {
- trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE);
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE);
goto out;
}
parg->offset = *size;
@@ -665,10 +965,18 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
- flags, offset);
+ ctx);
if (ret)
goto fail;
+ /* Update storing type if BTF is available */
+ if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && !t) {
+ if (code->op == FETCH_OP_ARG)
+ parg->type = parse_btf_arg_type(code->param, ctx);
+ else if (code->op == FETCH_OP_RETVAL)
+ parg->type = parse_btf_retval_type(ctx);
+ }
+
ret = -EINVAL;
/* Store operation */
if (parg->type->is_string) {
@@ -676,7 +984,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (code->op != FETCH_OP_REG && code->op != FETCH_OP_STACK &&
code->op != FETCH_OP_RETVAL && code->op != FETCH_OP_ARG &&
code->op != FETCH_OP_DEREF && code->op != FETCH_OP_TP_ARG) {
- trace_probe_log_err(offset + (t ? (t - arg) : 0),
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
BAD_SYMSTRING);
goto fail;
}
@@ -684,7 +992,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) {
- trace_probe_log_err(offset + (t ? (t - arg) : 0),
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
BAD_STRING);
goto fail;
}
@@ -703,7 +1011,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
*/
code++;
if (code->op != FETCH_OP_NOP) {
- trace_probe_log_err(offset, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
goto fail;
}
}
@@ -726,7 +1034,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
} else {
code++;
if (code->op != FETCH_OP_NOP) {
- trace_probe_log_err(offset, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
goto fail;
}
code->op = FETCH_OP_ST_RAW;
@@ -737,7 +1045,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (t != NULL) {
ret = __parse_bitfield_probe_arg(t, parg->type, &code);
if (ret) {
- trace_probe_log_err(offset + t - arg, BAD_BITFIELD);
+ trace_probe_log_err(ctx->offset + t - arg, BAD_BITFIELD);
goto fail;
}
}
@@ -747,13 +1055,13 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
if (scode->op != FETCH_OP_ST_MEM &&
scode->op != FETCH_OP_ST_STRING &&
scode->op != FETCH_OP_ST_USTRING) {
- trace_probe_log_err(offset + (t ? (t - arg) : 0),
+ trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
BAD_STRING);
goto fail;
}
code++;
if (code->op != FETCH_OP_NOP) {
- trace_probe_log_err(offset, TOO_MANY_OPS);
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
goto fail;
}
code->op = FETCH_OP_LP_ARRAY;
@@ -801,8 +1109,35 @@ static int traceprobe_conflict_field_name(const char *name,
return 0;
}
+static char *generate_probe_arg_name(const char *arg, int idx)
+{
+ char *name = NULL;
+ const char *end;
+
+ /*
+ * If argument name is omitted, try arg as a name (BTF variable)
+ * or "argN".
+ */
+ if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS)) {
+ end = strchr(arg, ':');
+ if (!end)
+ end = arg + strlen(arg);
+
+ name = kmemdup_nul(arg, end - arg, GFP_KERNEL);
+ if (!name || !is_good_name(name)) {
+ kfree(name);
+ name = NULL;
+ }
+ }
+
+ if (!name)
+ name = kasprintf(GFP_KERNEL, "arg%d", idx + 1);
+
+ return name;
+}
+
int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
- unsigned int flags)
+ struct traceprobe_parse_context *ctx)
{
struct probe_arg *parg = &tp->args[i];
const char *body;
@@ -822,8 +1157,7 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL);
body++;
} else {
- /* If argument name is omitted, set "argN" */
- parg->name = kasprintf(GFP_KERNEL, "arg%d", i + 1);
+ parg->name = generate_probe_arg_name(arg, i);
body = arg;
}
if (!parg->name)
@@ -837,9 +1171,9 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
trace_probe_log_err(0, USED_ARG_NAME);
return -EINVAL;
}
+ ctx->offset = body - arg;
/* Parse fetch argument */
- return traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags,
- body - arg);
+ return traceprobe_parse_probe_arg_body(body, &tp->size, parg, ctx);
}
void traceprobe_free_probe_arg(struct probe_arg *arg)
@@ -858,6 +1192,151 @@ void traceprobe_free_probe_arg(struct probe_arg *arg)
kfree(arg->fmt);
}
+static int argv_has_var_arg(int argc, const char *argv[], int *args_idx,
+ struct traceprobe_parse_context *ctx)
+{
+ int i, found = 0;
+
+ for (i = 0; i < argc; i++)
+ if (str_has_prefix(argv[i], "$arg")) {
+ trace_probe_log_set_index(i + 2);
+
+ if (!tparg_is_function_entry(ctx->flags)) {
+ trace_probe_log_err(0, NOFENTRY_ARGS);
+ return -EINVAL;
+ }
+
+ if (isdigit(argv[i][4])) {
+ found = 1;
+ continue;
+ }
+
+ if (argv[i][4] != '*') {
+ trace_probe_log_err(0, BAD_VAR);
+ return -EINVAL;
+ }
+
+ if (*args_idx >= 0 && *args_idx < argc) {
+ trace_probe_log_err(0, DOUBLE_ARGS);
+ return -EINVAL;
+ }
+ found = 1;
+ *args_idx = i;
+ }
+
+ return found;
+}
+
+static int sprint_nth_btf_arg(int idx, const char *type,
+ char *buf, int bufsize,
+ struct traceprobe_parse_context *ctx)
+{
+ struct btf *btf = traceprobe_get_btf();
+ const char *name;
+ int ret;
+
+ if (idx >= ctx->nr_params) {
+ trace_probe_log_err(0, NO_BTFARG);
+ return -ENOENT;
+ }
+ name = btf_name_by_offset(btf, ctx->params[idx].name_off);
+ if (!name) {
+ trace_probe_log_err(0, NO_BTF_ENTRY);
+ return -ENOENT;
+ }
+ ret = snprintf(buf, bufsize, "%s%s", name, type);
+ if (ret >= bufsize) {
+ trace_probe_log_err(0, ARGS_2LONG);
+ return -E2BIG;
+ }
+ return ret;
+}
+
+/* Return new_argv which must be freed after use */
+const char **traceprobe_expand_meta_args(int argc, const char *argv[],
+ int *new_argc, char *buf, int bufsize,
+ struct traceprobe_parse_context *ctx)
+{
+ const struct btf_param *params = NULL;
+ int i, j, n, used, ret, args_idx = -1;
+ const char **new_argv = NULL;
+ int nr_params;
+
+ ret = argv_has_var_arg(argc, argv, &args_idx, ctx);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (!ret) {
+ *new_argc = argc;
+ return NULL;
+ }
+
+ params = find_btf_func_param(ctx->funcname, &nr_params,
+ ctx->flags & TPARG_FL_TPOINT);
+ if (IS_ERR(params)) {
+ if (args_idx != -1) {
+ /* $arg* requires BTF info */
+ trace_probe_log_err(0, NOSUP_BTFARG);
+ return (const char **)params;
+ }
+ *new_argc = argc;
+ return NULL;
+ }
+ ctx->params = params;
+ ctx->nr_params = nr_params;
+
+ if (args_idx >= 0)
+ *new_argc = argc + ctx->nr_params - 1;
+ else
+ *new_argc = argc;
+
+ new_argv = kcalloc(*new_argc, sizeof(char *), GFP_KERNEL);
+ if (!new_argv)
+ return ERR_PTR(-ENOMEM);
+
+ used = 0;
+ for (i = 0, j = 0; i < argc; i++) {
+ trace_probe_log_set_index(i + 2);
+ if (i == args_idx) {
+ for (n = 0; n < nr_params; n++) {
+ ret = sprint_nth_btf_arg(n, "", buf + used,
+ bufsize - used, ctx);
+ if (ret < 0)
+ goto error;
+
+ new_argv[j++] = buf + used;
+ used += ret + 1;
+ }
+ continue;
+ }
+
+ if (str_has_prefix(argv[i], "$arg")) {
+ char *type = NULL;
+
+ n = simple_strtoul(argv[i] + 4, &type, 10);
+ if (type && !(*type == ':' || *type == '\0')) {
+ trace_probe_log_err(0, BAD_VAR);
+ ret = -ENOENT;
+ goto error;
+ }
+ /* Note: $argN starts from $arg1 */
+ ret = sprint_nth_btf_arg(n - 1, type, buf + used,
+ bufsize - used, ctx);
+ if (ret < 0)
+ goto error;
+ new_argv[j++] = buf + used;
+ used += ret + 1;
+ } else
+ new_argv[j++] = argv[i];
+ }
+
+ return new_argv;
+
+error:
+ kfree(new_argv);
+ return ERR_PTR(ret);
+}
+
int traceprobe_update_arg(struct probe_arg *arg)
{
struct fetch_insn *code = arg->code;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 6a4ecfb1da43..01ea148723de 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -23,6 +23,7 @@
#include <linux/limits.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
+#include <linux/btf.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -32,7 +33,9 @@
#define MAX_ARGSTR_LEN 63
#define MAX_ARRAY_LEN 64
#define MAX_ARG_NAME_LEN 32
+#define MAX_BTF_ARGS_LEN 128
#define MAX_STRING_SIZE PATH_MAX
+#define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN)
/* Reserved field names */
#define FIELD_STRING_IP "__probe_ip"
@@ -357,15 +360,42 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
#define trace_probe_for_each_link_rcu(pos, tp) \
list_for_each_entry_rcu(pos, &(tp)->event->files, list)
+/*
+ * The flags used for parsing trace_probe arguments.
+ * TPARG_FL_RETURN, TPARG_FL_FENTRY and TPARG_FL_TEVENT are mutually exclusive.
+ * TPARG_FL_KERNEL and TPARG_FL_USER are also mutually exclusive.
+ * TPARG_FL_FPROBE and TPARG_FL_TPOINT are optional but it should be with
+ * TPARG_FL_KERNEL.
+ */
#define TPARG_FL_RETURN BIT(0)
#define TPARG_FL_KERNEL BIT(1)
#define TPARG_FL_FENTRY BIT(2)
-#define TPARG_FL_TPOINT BIT(3)
+#define TPARG_FL_TEVENT BIT(3)
#define TPARG_FL_USER BIT(4)
-#define TPARG_FL_MASK GENMASK(4, 0)
+#define TPARG_FL_FPROBE BIT(5)
+#define TPARG_FL_TPOINT BIT(6)
+#define TPARG_FL_LOC_MASK GENMASK(4, 0)
+
+static inline bool tparg_is_function_entry(unsigned int flags)
+{
+ return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY);
+}
+
+struct traceprobe_parse_context {
+ struct trace_event_call *event;
+ const struct btf_param *params;
+ s32 nr_params;
+ const char *funcname;
+ unsigned int flags;
+ int offset;
+};
extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
- const char *argv, unsigned int flags);
+ const char *argv,
+ struct traceprobe_parse_context *ctx);
+const char **traceprobe_expand_meta_args(int argc, const char *argv[],
+ int *new_argc, char *buf, int bufsize,
+ struct traceprobe_parse_context *ctx);
extern int traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
@@ -404,11 +434,12 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(REFCNT_OPEN_BRACE, "Reference counter brace is not closed"), \
C(BAD_REFCNT_SUFFIX, "Reference counter has wrong suffix"), \
C(BAD_UPROBE_OFFS, "Invalid uprobe offset"), \
- C(MAXACT_NO_KPROBE, "Maxactive is not for kprobe"), \
+ C(BAD_MAXACT_TYPE, "Maxactive is only for function exit"), \
C(BAD_MAXACT, "Invalid maxactive number"), \
C(MAXACT_TOO_BIG, "Maxactive is too big"), \
C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \
C(BAD_RETPROBE, "Retprobe address must be an function entry"), \
+ C(NO_TRACEPOINT, "Tracepoint is not found"), \
C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \
C(NO_GROUP_NAME, "Group name is not specified"), \
C(GROUP_TOO_LONG, "Group name is too long"), \
@@ -418,6 +449,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \
C(EVENT_EXIST, "Given group/event name is already used by another event"), \
C(RETVAL_ON_PROBE, "$retval is not available on probe"), \
+ C(NO_RETVAL, "This function returns 'void' type"), \
C(BAD_STACK_NUM, "Invalid stack number"), \
C(BAD_ARG_NUM, "Invalid argument number"), \
C(BAD_VAR, "Invalid $-valiable specified"), \
@@ -456,7 +488,14 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(NO_EVENT_INFO, "This requires both group and event name to attach"),\
C(BAD_ATTACH_EVENT, "Attached event does not exist"),\
C(BAD_ATTACH_ARG, "Attached event does not have this field"),\
- C(NO_EP_FILTER, "No filter rule after 'if'"),
+ C(NO_EP_FILTER, "No filter rule after 'if'"), \
+ C(NOSUP_BTFARG, "BTF is not available or not supported"), \
+ C(NO_BTFARG, "This variable is not found at this probe point"),\
+ C(NO_BTF_ENTRY, "No BTF entry for this probe point"), \
+ C(BAD_VAR_ARGS, "$arg* must be an independent parameter without name etc."),\
+ C(NOFENTRY_ARGS, "$arg* can be used only on function entry"), \
+ C(DOUBLE_ARGS, "$arg* can be used only once in the parameters"), \
+ C(ARGS_2LONG, "$arg* failed because the argument list is too long"),
#undef C
#define C(a, b) TP_ERR_##a
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8b92e34ff0c8..fa09b33ee731 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -686,10 +686,12 @@ static int __trace_uprobe_create(int argc, const char **argv)
/* parse arguments */
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ struct traceprobe_parse_context ctx = {
+ .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER,
+ };
+
trace_probe_log_set_index(i + 2);
- ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i],
- (is_return ? TPARG_FL_RETURN : 0) |
- TPARG_FL_USER);
+ ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx);
if (ret)
goto error;
}