diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/.gitignore | 1 | ||||
-rw-r--r-- | kernel/hrtimer.c | 3 | ||||
-rw-r--r-- | kernel/kexec.c | 118 | ||||
-rw-r--r-- | kernel/kprobes.c | 19 | ||||
-rw-r--r-- | kernel/kthread.c | 52 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/smpboot.c | 14 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 26 | ||||
-rw-r--r-- | kernel/user_namespace.c | 22 |
9 files changed, 177 insertions, 80 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore index ab4f1090f437..b3097bde4e9c 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -4,3 +4,4 @@ config_data.h config_data.gz timeconst.h +hz.bc diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..14be27feda49 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,6 +63,7 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), .clock_base = { { @@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; timerqueue_init_head(&cpu_base->clock_base[i].active); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..ffd4e111fd67 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -55,7 +55,7 @@ struct resource crashk_res = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; struct resource crashk_low_res = { - .name = "Crash kernel low", + .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline, return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name) + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - ck_cmdline = p; - p = strstr(p+1, name); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline, return 0; } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel="); + "crashkernel=", NULL); +} + +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, @@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_low="); + "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e35be53f6613..3fed7f0cbcdf 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -794,16 +794,16 @@ out: } #ifdef CONFIG_SYSCTL -/* This should be called with kprobe_mutex locked */ static void __kprobes optimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already allowed, just return */ if (kprobes_allow_optimization) - return; + goto out; kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void) optimize_kprobe(p); } printk(KERN_INFO "Kprobes globally optimized\n"); +out: + mutex_unlock(&kprobe_mutex); } -/* This should be called with kprobe_mutex locked */ static void __kprobes unoptimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already prohibited, just return */ - if (!kprobes_allow_optimization) + if (!kprobes_allow_optimization) { + mutex_unlock(&kprobe_mutex); return; + } kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void) unoptimize_kprobe(p, false); } } + mutex_unlock(&kprobe_mutex); + /* Wait for unoptimizing completion */ wait_for_kprobe_optimizer(); printk(KERN_INFO "Kprobes globally unoptimized\n"); } +static DEFINE_MUTEX(kprobe_sysctl_mutex); int sysctl_kprobes_optimization; int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, @@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, { int ret; - mutex_lock(&kprobe_mutex); + mutex_lock(&kprobe_sysctl_mutex); sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); @@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, optimize_all_kprobes(); else unoptimize_all_kprobes(); - mutex_unlock(&kprobe_mutex); + mutex_unlock(&kprobe_sysctl_mutex); return ret; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9baf..9eb7fed0bbaa 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) complete(&self->parked); schedule(); - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); } clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); @@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); -static void __kthread_bind(struct task_struct *p, unsigned int cpu) +static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) { + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, state)) { + WARN_ON(1); + return; + } /* It's safe because the task is inactive. */ do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; @@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu) */ void kthread_bind(struct task_struct *p, unsigned int cpu) { - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - __kthread_bind(p, cpu); + __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(kthread_bind); @@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k) return NULL; } +static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) +{ + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + wake_up_state(k, TASK_PARKED); + } +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). @@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = task_get_live_kthread(k); - if (kthread) { - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. - */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu); - wake_up_process(k); - } - } + if (kthread) + __kthread_unpark(k, kthread); put_task_struct(k); } @@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k) trace_sched_kthread_stop(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + __kthread_unpark(k, kthread); wake_up_process(k); wait_for_completion(&kthread->exited); } diff --git a/kernel/signal.c b/kernel/signal.c index dd72567767d9..598dc06be421 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2948,7 +2948,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) static int do_tkill(pid_t tgid, pid_t pid, int sig) { - struct siginfo info; + struct siginfo info = {}; info.si_signo = sig; info.si_errno = 0; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 8eaed9aa9cf0..02fc5c933673 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) } get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; - if (ht->create) - ht->create(cpu); + if (ht->create) { + /* + * Make sure that the task has actually scheduled out + * into park position, before calling the create + * callback. At least the migration thread callback + * requires that the task is off the runqueue. + */ + if (!wait_task_inactive(tsk, TASK_PARKED)) + WARN_ON(1); + else + ht->create(cpu); + } return 0; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c272eec..5a0f781cd729 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, struct request *rq) { - struct blk_trace *bt = q->blk_trace; - - /* if control ever passes through here, it's a request based driver */ - if (unlikely(bt && !bt->rq_based)) - bt->rq_based = true; - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); } @@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } -static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) +static void blk_add_trace_bio_complete(void *ignore, + struct request_queue *q, struct bio *bio, + int error) { - struct request_queue *q; - struct blk_trace *bt; - - if (!bio->bi_bdev) - return; - - q = bdev_get_queue(bio->bi_bdev); - bt = q->blk_trace; - - /* - * Request based drivers will generate both rq and bio completions. - * Ignore bio ones. - */ - if (likely(!bt) || bt->rq_based) - return; - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a54f26f82eb2..e134d8f365dd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -25,7 +25,8 @@ static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) @@ -612,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (map->nr_extents != 0) goto out; - /* Require the appropriate privilege CAP_SETUID or CAP_SETGID - * over the user namespace in order to set the id mapping. + /* + * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; /* Get a buffer */ @@ -700,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; /* Map the lower ids from the parent user namespace to the @@ -787,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t &ns->projid_map, &ns->parent->projid_map); } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { /* Allow mapping to your own filesystem ids */ @@ -795,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, current_fsuid())) + if (uid_eq(uid, file->f_cred->fsuid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, current_fsgid())) + if (gid_eq(gid, file->f_cred->fsgid)) return true; } } @@ -811,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + * And the opener of the id file also had the approprpiate capability. */ - if (ns_capable(ns->parent, cap_setid)) + if (ns_capable(ns->parent, cap_setid) && + file_ns_capable(file, ns->parent, cap_setid)) return true; return false; |