diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/cpupri.c | 157 |
1 files changed, 101 insertions, 56 deletions
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 1a2719e1350a..1bcfa1995550 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -41,6 +41,59 @@ static int convert_prio(int prio) return cpupri; } +static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, + struct cpumask *lowest_mask, int idx) +{ + struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; + int skip = 0; + + if (!atomic_read(&(vec)->count)) + skip = 1; + /* + * When looking at the vector, we need to read the counter, + * do a memory barrier, then read the mask. + * + * Note: This is still all racey, but we can deal with it. + * Ideally, we only want to look at masks that are set. + * + * If a mask is not set, then the only thing wrong is that we + * did a little more work than necessary. + * + * If we read a zero count but the mask is set, because of the + * memory barriers, that can only happen when the highest prio + * task for a run queue has left the run queue, in which case, + * it will be followed by a pull. If the task we are processing + * fails to find a proper place to go, that pull request will + * pull this task if the run queue is running at a lower + * priority. + */ + smp_rmb(); + + /* Need to do the rmb for every iteration */ + if (skip) + return 0; + + if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) + return 0; + + if (lowest_mask) { + cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); + + /* + * We have to ensure that we have at least one bit + * still set in the array, since the map could have + * been concurrently emptied between the first and + * second reads of vec->mask. If we hit this + * condition, simply act as though we never hit this + * priority level and continue on. + */ + if (cpumask_empty(lowest_mask)) + return 0; + } + + return 1; +} + /** * cpupri_find - find the best (lowest-pri) CPU in the system * @cp: The cpupri context @@ -62,80 +115,72 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask, bool (*fitness_fn)(struct task_struct *p, int cpu)) { - int idx = 0; int task_pri = convert_prio(p->prio); + int best_unfit_idx = -1; + int idx = 0, cpu; BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); for (idx = 0; idx < task_pri; idx++) { - struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; - int skip = 0; - if (!atomic_read(&(vec)->count)) - skip = 1; - /* - * When looking at the vector, we need to read the counter, - * do a memory barrier, then read the mask. - * - * Note: This is still all racey, but we can deal with it. - * Ideally, we only want to look at masks that are set. - * - * If a mask is not set, then the only thing wrong is that we - * did a little more work than necessary. - * - * If we read a zero count but the mask is set, because of the - * memory barriers, that can only happen when the highest prio - * task for a run queue has left the run queue, in which case, - * it will be followed by a pull. If the task we are processing - * fails to find a proper place to go, that pull request will - * pull this task if the run queue is running at a lower - * priority. - */ - smp_rmb(); - - /* Need to do the rmb for every iteration */ - if (skip) - continue; - - if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) + if (!__cpupri_find(cp, p, lowest_mask, idx)) continue; - if (lowest_mask) { - int cpu; + if (!lowest_mask || !fitness_fn) + return 1; - cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); + /* Ensure the capacity of the CPUs fit the task */ + for_each_cpu(cpu, lowest_mask) { + if (!fitness_fn(p, cpu)) + cpumask_clear_cpu(cpu, lowest_mask); + } + /* + * If no CPU at the current priority can fit the task + * continue looking + */ + if (cpumask_empty(lowest_mask)) { /* - * We have to ensure that we have at least one bit - * still set in the array, since the map could have - * been concurrently emptied between the first and - * second reads of vec->mask. If we hit this - * condition, simply act as though we never hit this - * priority level and continue on. + * Store our fallback priority in case we + * didn't find a fitting CPU */ - if (cpumask_empty(lowest_mask)) - continue; + if (best_unfit_idx == -1) + best_unfit_idx = idx; - if (!fitness_fn) - return 1; - - /* Ensure the capacity of the CPUs fit the task */ - for_each_cpu(cpu, lowest_mask) { - if (!fitness_fn(p, cpu)) - cpumask_clear_cpu(cpu, lowest_mask); - } - - /* - * If no CPU at the current priority can fit the task - * continue looking - */ - if (cpumask_empty(lowest_mask)) - continue; + continue; } return 1; } + /* + * If we failed to find a fitting lowest_mask, make sure we fall back + * to the last known unfitting lowest_mask. + * + * Note that the map of the recorded idx might have changed since then, + * so we must ensure to do the full dance to make sure that level still + * holds a valid lowest_mask. + * + * As per above, the map could have been concurrently emptied while we + * were busy searching for a fitting lowest_mask at the other priority + * levels. + * + * This rule favours honouring priority over fitting the task in the + * correct CPU (Capacity Awareness being the only user now). + * The idea is that if a higher priority task can run, then it should + * run even if this ends up being on unfitting CPU. + * + * The cost of this trade-off is not entirely clear and will probably + * be good for some workloads and bad for others. + * + * The main idea here is that if some CPUs were overcommitted, we try + * to spread which is what the scheduler traditionally did. Sys admins + * must do proper RT planning to avoid overloading the system if they + * really care. + */ + if (best_unfit_idx != -1) + return __cpupri_find(cp, p, lowest_mask, best_unfit_idx); + return 0; } |