summaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c107
1 files changed, 70 insertions, 37 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index deb5ac8c12f3..8e2558c2ba67 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
DEFINE_TRACE(sched_migrate_task);
#ifdef CONFIG_SMP
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
* Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -220,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
ktime_t now;
- if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
+ if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return;
if (hrtimer_active(&rt_b->rt_period_timer))
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
* slice expiry etc.
*/
-#define WEIGHT_IDLEPRIO 2
-#define WMULT_IDLEPRIO (1 << 31)
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
/*
* Nice levels are multiplicative, with a gentle 10% change for every
@@ -3877,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
int cpu = smp_processor_id();
if (stop_tick) {
- cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
- /*
- * If we are going offline and still the leader, give up!
- */
- if (!cpu_active(cpu) &&
- atomic_read(&nohz.load_balancer) == cpu) {
+ if (!cpu_active(cpu)) {
+ if (atomic_read(&nohz.load_balancer) != cpu)
+ return 0;
+
+ /*
+ * If we are going offline and still the leader,
+ * give up!
+ */
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
+
return 0;
}
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
+
/* time for ilb owner also to sleep */
if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4437,7 +4445,7 @@ void __kprobes sub_preempt_count(int val)
/*
* Underflow?
*/
- if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
return;
/*
* Is the spinlock portion underflowing?
@@ -4684,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int sync, void *key)
+void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
@@ -5123,7 +5131,7 @@ int can_nice(const struct task_struct *p, const int nice)
* sys_setpriority is a more generic, but much slower function that
* does similar things.
*/
-asmlinkage long sys_nice(int increment)
+SYSCALL_DEFINE1(nice, int, increment)
{
long nice, retval;
@@ -5430,8 +5438,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @policy: new policy.
* @param: structure containing the new RT priority.
*/
-asmlinkage long
-sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
+ struct sched_param __user *, param)
{
/* negative values for policy are not valid */
if (policy < 0)
@@ -5445,7 +5453,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @pid: the pid in question.
* @param: structure containing the new RT priority.
*/
-asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
return do_sched_setscheduler(pid, -1, param);
}
@@ -5454,7 +5462,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
*/
-asmlinkage long sys_sched_getscheduler(pid_t pid)
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
struct task_struct *p;
int retval;
@@ -5479,7 +5487,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
* @pid: the pid in question.
* @param: structure containing the RT priority.
*/
-asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
struct sched_param lp;
struct task_struct *p;
@@ -5597,8 +5605,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
*/
-asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
cpumask_var_t new_mask;
int retval;
@@ -5645,8 +5653,8 @@ out_unlock:
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
*/
-asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
int ret;
cpumask_var_t mask;
@@ -5675,7 +5683,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
*/
-asmlinkage long sys_sched_yield(void)
+SYSCALL_DEFINE0(sched_yield)
{
struct rq *rq = this_rq_lock();
@@ -5816,7 +5824,7 @@ long __sched io_schedule_timeout(long timeout)
* this syscall returns the maximum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_max(int policy)
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
int ret = -EINVAL;
@@ -5841,7 +5849,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
* this syscall returns the minimum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_min(int policy)
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
int ret = -EINVAL;
@@ -5866,8 +5874,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
*/
-asmlinkage
-long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ struct timespec __user *, interval)
{
struct task_struct *p;
unsigned int time_slice;
@@ -6936,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
static void rq_attach_root(struct rq *rq, struct root_domain *rd)
{
+ struct root_domain *old_rd = NULL;
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
- struct root_domain *old_rd = rq->rd;
+ old_rd = rq->rd;
if (cpumask_test_cpu(rq->cpu, old_rd->online))
set_rq_offline(rq);
cpumask_clear_cpu(rq->cpu, old_rd->span);
- if (atomic_dec_and_test(&old_rd->refcount))
- free_rootdomain(old_rd);
+ /*
+ * If we dont want to free the old_rt yet then
+ * set old_rd to NULL to skip the freeing later
+ * in this function:
+ */
+ if (!atomic_dec_and_test(&old_rd->refcount))
+ old_rd = NULL;
}
atomic_inc(&rd->refcount);
@@ -6960,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
set_rq_online(rq);
spin_unlock_irqrestore(&rq->lock, flags);
+
+ if (old_rd)
+ free_rootdomain(old_rd);
}
static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -7282,10 +7299,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
* groups, so roll our own. Now each node has its own list of groups which
* gets dynamically allocated.
*/
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
static struct sched_group ***sched_group_nodes_bycpu;
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7560,7 +7577,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
#ifdef CONFIG_NUMA
if (cpumask_weight(cpu_map) >
SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- sd = &per_cpu(allnodes_domains, i);
+ sd = &per_cpu(allnodes_domains, i).sd;
SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7570,7 +7587,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
} else
p = NULL;
- sd = &per_cpu(node_domains, i);
+ sd = &per_cpu(node_domains, i).sd;
SD_INIT(sd, NODE);
set_domain_attribute(sd, attr);
sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7688,7 +7705,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
for_each_cpu(j, nodemask) {
struct sched_domain *sd;
- sd = &per_cpu(node_domains, j);
+ sd = &per_cpu(node_domains, j).sd;
sd->groups = sg;
}
sg->__cpu_power = 0;
@@ -9047,6 +9064,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
runtime = d->rt_runtime;
}
+#ifdef CONFIG_USER_SCHED
+ if (tg == &root_task_group) {
+ period = global_rt_period();
+ runtime = global_rt_runtime();
+ }
+#endif
+
/*
* Cannot have more runtime than the period.
*/
@@ -9200,6 +9224,16 @@ static int sched_rt_global_constraints(void)
return ret;
}
+
+int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
+{
+ /* Don't accept realtime tasks when there is no way for them to run */
+ if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+ return 0;
+
+ return 1;
+}
+
#else /* !CONFIG_RT_GROUP_SCHED */
static int sched_rt_global_constraints(void)
{
@@ -9293,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct task_struct *tsk)
{
#ifdef CONFIG_RT_GROUP_SCHED
- /* Don't accept realtime tasks when there is no way for them to run */
- if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
+ if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
return -EINVAL;
#else
/* We don't support RT-tasks being in separate groups */