From f5b40e363ad6041a96e3da32281d8faa191597b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 May 2006 10:49:33 -0700 Subject: Fix ptrace_attach()/ptrace_traceme()/de_thread() race This holds the task lock (and, for ptrace_attach, the tasklist_lock) over the actual attach event, which closes a race between attacking to a thread that is either doing a PTRACE_TRACEME or getting de-threaded. Thanks to Oleg Nesterov for reminding me about this, and Chris Wright for noticing a lost return value in my first version. Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4e0f0ec003f7..b0f8da80d7d4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -148,12 +148,16 @@ int ptrace_may_attach(struct task_struct *task) int ptrace_attach(struct task_struct *task) { int retval; - task_lock(task); + retval = -EPERM; if (task->pid <= 1) - goto bad; + goto out; if (task->tgid == current->tgid) - goto bad; + goto out; + + write_lock_irq(&tasklist_lock); + task_lock(task); + /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; @@ -166,17 +170,15 @@ int ptrace_attach(struct task_struct *task) ? PT_ATTACHED : 0); if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; - task_unlock(task); - write_lock_irq(&tasklist_lock); __ptrace_link(task, current); - write_unlock_irq(&tasklist_lock); force_sig_specific(SIGSTOP, task); - return 0; bad: + write_unlock_irq(&tasklist_lock); task_unlock(task); +out: return retval; } @@ -417,21 +419,22 @@ int ptrace_request(struct task_struct *child, long request, */ int ptrace_traceme(void) { - int ret; + int ret = -EPERM; /* * Are we already being traced? */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; + task_lock(current); + if (!(current->ptrace & PT_PTRACED)) { + ret = security_ptrace(current->parent, current); + /* + * Set the ptrace bit in the process ptrace flags. + */ + if (!ret) + current->ptrace |= PT_PTRACED; + } + task_unlock(current); + return ret; } /** -- cgit v1.2.3-70-g09d2 From f358166a9405e4f1d8e50d8f415c26d95505b6de Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 May 2006 11:08:49 -0700 Subject: ptrace_attach: fix possible deadlock schenario with irqs Eric Biederman points out that we can't take the task_lock while holding tasklist_lock for writing, because another CPU that holds the task lock might take an interrupt that then tries to take tasklist_lock for writing. Which would be a nasty deadlock, with one CPU spinning forever in an interrupt handler (although admittedly you need to really work at triggering it ;) Since the ptrace_attach() code is special and very unusual, just make it be extra careful, and use trylock+repeat to avoid the possible deadlock. Cc: Oleg Nesterov Cc: Eric W. Biederman Cc: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b0f8da80d7d4..921c22ad16e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -155,8 +155,26 @@ int ptrace_attach(struct task_struct *task) if (task->tgid == current->tgid) goto out; - write_lock_irq(&tasklist_lock); +repeat: + /* + * Nasty, nasty. + * + * We want to hold both the task-lock and the + * tasklist_lock for writing at the same time. + * But that's against the rules (tasklist_lock + * is taken for reading by interrupts on other + * cpu's that may have task_lock). + */ task_lock(task); + local_irq_disable(); + if (!write_trylock(&tasklist_lock)) { + local_irq_enable(); + task_unlock(task); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) -- cgit v1.2.3-70-g09d2 From 986733e01d258c26107f1da9d8d47c718349ad2f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 15 May 2006 09:43:58 -0700 Subject: [PATCH] RCU: introduce rcu_needs_cpu() interface With "Paul E. McKenney" Introduce rcu_needs_cpu() interface. This can be used to tell if there will be a new rcu batch on a cpu soon by looking at the curlist pointer. This can be used to avoid to enter a tickless idle state where the cpu would miss that a new batch is ready when rcu_start_batch would be called on a different cpu. Signed-off-by: Heiko Carstens Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 + kernel/rcupdate.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'kernel') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5673008b61e1..970284f571a6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -132,6 +132,7 @@ static inline void rcu_bh_qsctr_inc(int cpu) } extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 6d32ff26f948..2058f88c7bbb 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) return 0; } +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); } +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); + + return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || -- cgit v1.2.3-70-g09d2 From 5e376613899076396d0c97de67ad072587267370 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 15 May 2006 09:44:06 -0700 Subject: [PATCH] symbol_put_addr() locks kernel Even since a previous patch: Fix race between CONFIG_DEBUG_SLABALLOC and modules Sun, 27 Jun 2004 17:55:19 +0000 (17:55 +0000) http://www.kernel.org/git/?p=linux/kernel/git/torvalds/old-2.6-bkcvs.git;a=commit;h=92b3db26d31cf21b70e3c1eadc56c179506d8fbe The function symbol_put_addr() will deadlock the kernel. symbol_put_addr() would acquire modlist_lock, then while holding the lock call two functions kernel_text_address() and module_text_address() which also try to acquire the same lock. This deadlocks the kernel of course. This patch changes symbol_put_addr() to not acquire the modlist_lock, it doesn't need it since it never looks at the module list directly. Also, it now uses core_kernel_text() instead of kernel_text_address(). The latter has an additional check for addr inside a module, but we don't need to do that since we call module_text_address() (the same function kernel_text_address uses) ourselves. Signed-off-by: Trent Piepho Cc: Zwane Mwaikambo Acked-by: Rusty Russell Cc: Johannes Stezenbach Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + kernel/extable.c | 2 +- kernel/module.c | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e1bd0842f6a1..f4fc576ed4c4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -124,6 +124,7 @@ extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(char *ptr, char **retptr); +extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int session_of_pgrp(int pgrp); diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531ceed..7fe262855317 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static int core_kernel_text(unsigned long addr) +int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) diff --git a/kernel/module.c b/kernel/module.c index d24deb0dbbc9..bbe04862e1b0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); void symbol_put_addr(void *addr) { - unsigned long flags; + struct module *modaddr; - spin_lock_irqsave(&modlist_lock, flags); - if (!kernel_text_address((unsigned long)addr)) - BUG(); + if (core_kernel_text((unsigned long)addr)) + return; - module_put(module_text_address((unsigned long)addr)); - spin_unlock_irqrestore(&modlist_lock, flags); + if (!(modaddr = module_text_address((unsigned long)addr))) + BUG(); + module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); -- cgit v1.2.3-70-g09d2