diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-12 13:48:57 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-12 13:48:57 +0100 |
commit | 45ab6b0c76d0e4cce5bd608ccf97b0f6b20f18df (patch) | |
tree | 4d51c73533c386aee16fde1e74b5e3bc22eedc53 /kernel | |
parent | 81444a799550214f549caf579cf65a0ca55e70b7 (diff) | |
parent | d65bd5ecb2bd166cea4952a59b7e16cc3ad6ef6c (diff) |
Merge branch 'sched/core' into cpus4096
Conflicts:
include/linux/ftrace.h
kernel/sched.c
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 32 | ||||
-rw-r--r-- | kernel/auditsc.c | 24 | ||||
-rw-r--r-- | kernel/fork.c | 16 | ||||
-rw-r--r-- | kernel/latencytop.c | 2 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 2 | ||||
-rw-r--r-- | kernel/power/swap.c | 2 | ||||
-rw-r--r-- | kernel/relay.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 99 | ||||
-rw-r--r-- | kernel/sched_debug.c | 6 | ||||
-rw-r--r-- | kernel/sched_rt.c | 4 | ||||
-rw-r--r-- | kernel/softlockup.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 22 | ||||
-rw-r--r-- | kernel/user.c | 2 |
13 files changed, 149 insertions, 71 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 4414e93d8750..ce6d8ea3131e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -61,8 +61,11 @@ #include "audit.h" -/* No auditing will take place until audit_initialized != 0. +/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED. * (Initialization happens after skb_init is called.) */ +#define AUDIT_DISABLED -1 +#define AUDIT_UNINITIALIZED 0 +#define AUDIT_INITIALIZED 1 static int audit_initialized; #define AUDIT_OFF 0 @@ -965,6 +968,9 @@ static int __init audit_init(void) { int i; + if (audit_initialized == AUDIT_DISABLED) + return 0; + printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, @@ -976,7 +982,7 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); - audit_initialized = 1; + audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; @@ -999,13 +1005,21 @@ __initcall(audit_init); static int __init audit_enable(char *str) { audit_default = !!simple_strtol(str, NULL, 0); - printk(KERN_INFO "audit: %s%s\n", - audit_default ? "enabled" : "disabled", - audit_initialized ? "" : " (after initialization)"); - if (audit_initialized) { + if (!audit_default) + audit_initialized = AUDIT_DISABLED; + + printk(KERN_INFO "audit: %s", audit_default ? "enabled" : "disabled"); + + if (audit_initialized == AUDIT_INITIALIZED) { audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; + } else if (audit_initialized == AUDIT_UNINITIALIZED) { + printk(" (after initialization)"); + } else { + printk(" (until reboot)"); } + printk("\n"); + return 1; } @@ -1107,9 +1121,7 @@ unsigned int audit_serial(void) static inline void audit_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { - if (ctx) - auditsc_get_stamp(ctx, t, serial); - else { + if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { *t = CURRENT_TIME; *serial = audit_serial(); } @@ -1146,7 +1158,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int reserve; unsigned long timeout_start = jiffies; - if (!audit_initialized) + if (audit_initialized != AUDIT_INITIALIZED) return NULL; if (unlikely(audit_filter_type(type))) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..2a3f0afc4d2a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1459,7 +1459,6 @@ void audit_free(struct task_struct *tsk) /** * audit_syscall_entry - fill in an audit record at syscall entry - * @tsk: task being audited * @arch: architecture type * @major: major syscall type (function) * @a1: additional syscall register 1 @@ -1548,9 +1547,25 @@ void audit_syscall_entry(int arch, int major, context->ppid = 0; } +void audit_finish_fork(struct task_struct *child) +{ + struct audit_context *ctx = current->audit_context; + struct audit_context *p = child->audit_context; + if (!p || !ctx || !ctx->auditable) + return; + p->arch = ctx->arch; + p->major = ctx->major; + memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); + p->ctime = ctx->ctime; + p->dummy = ctx->dummy; + p->auditable = ctx->auditable; + p->in_syscall = ctx->in_syscall; + p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); + p->ppid = current->pid; +} + /** * audit_syscall_exit - deallocate audit context after a system call - * @tsk: task being audited * @valid: success/failure flag * @return_code: syscall return value * @@ -1942,15 +1957,18 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); * * Also sets the context as auditable. */ -void auditsc_get_stamp(struct audit_context *ctx, +int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { + if (!ctx->in_syscall) + return 0; if (!ctx->serial) ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; ctx->auditable = 1; + return 1; } /* global counter which is incremented every time something logs in */ diff --git a/kernel/fork.c b/kernel/fork.c index 7407ab319875..7b93da72d4a2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -319,17 +319,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; + struct address_space *mapping = file->f_mapping; + get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - - /* insert tmp into the share list, just after mpnt */ - spin_lock(&file->f_mapping->i_mmap_lock); + spin_lock(&mapping->i_mmap_lock); + if (tmp->vm_flags & VM_SHARED) + mapping->i_mmap_writable++; tmp->vm_truncate_count = mpnt->vm_truncate_count; - flush_dcache_mmap_lock(file->f_mapping); + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); - flush_dcache_mmap_unlock(file->f_mapping); - spin_unlock(&file->f_mapping->i_mmap_lock); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); } /* @@ -1406,6 +1409,7 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + audit_finish_fork(p); tracehook_report_clone(trace, regs, clone_flags, nr, p); /* diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c56923..449db466bdbc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 3f4377e0aa04..157de3a47832 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -311,7 +311,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_cputime cputime; thread_group_cputime(p, &cputime); - switch (which_clock) { + switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b7713b53d07a..6da14358537c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -633,7 +633,7 @@ void swsusp_close(fmode_t mode) return; } - blkdev_put(resume_bdev, mode); /* move up */ + blkdev_put(resume_bdev, mode); } static int swsusp_header_init(void) diff --git a/kernel/relay.c b/kernel/relay.c index 32b0befdcb6a..09ac2008f77b 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1317,12 +1317,9 @@ static ssize_t relay_file_splice_read(struct file *in, if (ret < 0) break; else if (!ret) { - if (spliced) - break; - if (flags & SPLICE_F_NONBLOCK) { + if (flags & SPLICE_F_NONBLOCK) ret = -EAGAIN; - break; - } + break; } *ppos += ret; diff --git a/kernel/sched.c b/kernel/sched.c index 4ed9f588faa6..e00c92d22655 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -267,6 +267,10 @@ struct task_group { struct cgroup_subsys_state css; #endif +#ifdef CONFIG_USER_SCHED + uid_t uid; +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; @@ -292,6 +296,12 @@ struct task_group { #ifdef CONFIG_USER_SCHED +/* Helper function to pass uid information to create_sched_user() */ +void set_tg_uid(struct user_struct *user) +{ + user->tg->uid = user->uid; +} + /* * Root task group. * Every UID task group (including init_task_group aka UID-0) will @@ -1587,6 +1597,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) #endif +/* + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. + */ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) + __releases(this_rq->lock) + __acquires(busiest->lock) + __acquires(this_rq->lock) +{ + int ret = 0; + + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } + if (unlikely(!spin_trylock(&busiest->lock))) { + if (busiest < this_rq) { + spin_unlock(&this_rq->lock); + spin_lock(&busiest->lock); + spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); + ret = 1; + } else + spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); + } + return ret; +} + +static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) + __releases(busiest->lock) +{ + spin_unlock(&busiest->lock); + lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); +} #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -2784,40 +2827,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) } /* - * double_lock_balance - lock the busiest runqueue, this_rq is locked already. - */ -static int double_lock_balance(struct rq *this_rq, struct rq *busiest) - __releases(this_rq->lock) - __acquires(busiest->lock) - __acquires(this_rq->lock) -{ - int ret = 0; - - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); - BUG_ON(1); - } - if (unlikely(!spin_trylock(&busiest->lock))) { - if (busiest < this_rq) { - spin_unlock(&this_rq->lock); - spin_lock(&busiest->lock); - spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); - ret = 1; - } else - spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); - } - return ret; -} - -static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) - __releases(busiest->lock) -{ - spin_unlock(&busiest->lock); - lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); -} - -/* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only * allow dest_cpu, which will force the cpu onto dest_cpu. Then @@ -3676,7 +3685,7 @@ out_balanced: static void idle_balance(int this_cpu, struct rq *this_rq) { struct sched_domain *sd; - int pulled_task = -1; + int pulled_task = 0; unsigned long next_balance = jiffies + HZ; cpumask_var_t tmpmask; @@ -6577,7 +6586,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) req = list_entry(rq->migration_queue.next, struct migration_req, list); list_del_init(&req->list); + spin_unlock_irq(&rq->lock); complete(&req->done); + spin_lock_irq(&rq->lock); } spin_unlock_irq(&rq->lock); break; @@ -6781,6 +6792,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_EXEC | SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES); + if (nr_node_ids == 1) + pflags &= ~SD_SERIALIZE; } if (~cflags & pflags) return 0; @@ -7716,8 +7729,14 @@ static struct sched_domain_attr *dattr_cur; */ static cpumask_var_t fallback_doms; -void __attribute__((weak)) arch_update_cpu_topology(void) +/* + * arch_update_cpu_topology lets virtualized architectures update the + * cpu core maps. It is supposed to return 1 if the topology changed + * or 0 if it stayed the same. + */ +int __attribute__((weak)) arch_update_cpu_topology(void) { + return 0; } /* @@ -7811,17 +7830,21 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; + int new_topology; mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); + /* Let architecture update cpu core mappings. */ + new_topology = arch_update_cpu_topology(); + n = doms_new ? ndoms_new : 0; /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { - for (j = 0; j < n; j++) { + for (j = 0; j < n && !new_topology; j++) { if (cpumask_equal(&doms_cur[i], &doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; @@ -7841,7 +7864,7 @@ match1: /* Build new domains */ for (i = 0; i < ndoms_new; i++) { - for (j = 0; j < ndoms_cur; j++) { + for (j = 0; j < ndoms_cur && !new_topology; j++) { if (cpumask_equal(&doms_new[i], &doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index baf2f17af462..4293cfa9681d 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -160,10 +160,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cgroup_path(tg->css.cgroup, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); +#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) + { + uid_t uid = cfs_rq->tg->uid; + SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid); + } #else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); #endif - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 94aab72f6a02..1bbd99014011 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -914,10 +914,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) /* Only try algorithms three times */ #define RT_MAX_TRIES 3 -static int double_lock_balance(struct rq *this_rq, struct rq *busiest); -static inline void double_unlock_balance(struct rq *this_rq, - struct rq *busiest); - static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 884e6cd2769c..1ab790c67b17 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -188,7 +188,7 @@ static void check_hung_task(struct task_struct *t, unsigned long now) if ((long)(now - t->last_switch_timestamp) < sysctl_hung_task_timeout_secs) return; - if (sysctl_hung_task_warnings < 0) + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e7acfb482a68..fa05e88aa76f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -518,6 +518,28 @@ void update_wall_time(void) /* correct the clock when NTP error is too big */ clocksource_adjust(offset); + /* + * Since in the loop above, we accumulate any amount of time + * in xtime_nsec over a second into xtime.tv_sec, its possible for + * xtime_nsec to be fairly small after the loop. Further, if we're + * slightly speeding the clocksource up in clocksource_adjust(), + * its possible the required corrective factor to xtime_nsec could + * cause it to underflow. + * + * Now, we cannot simply roll the accumulated second back, since + * the NTP subsystem has been notified via second_overflow. So + * instead we push xtime_nsec forward by the amount we underflowed, + * and add that amount into the error. + * + * We'll correct this error next time through this function, when + * xtime_nsec is not as small. + */ + if (unlikely((s64)clock->xtime_nsec < 0)) { + s64 neg = -(s64)clock->xtime_nsec; + clock->xtime_nsec = 0; + clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); + } + /* store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..cec2224bc9f5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up) if (IS_ERR(up->tg)) rc = -ENOMEM; + set_tg_uid(up); + return rc; } |