概述
我們知道當一個task被fork或者wakeup起來的時候,都會選擇一個目標CPU,進行入隊操作.這個函數就是在core.c裏面的select_task_rq函數,之後根據task的調度類型選擇進入到不同分支.所以下面就分析下rt下的函數:select_task_rq_rt
select_task_rq_rt
其源碼如下:
static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
bool test;
/*sd_flag在try_to_wake_up/wake_up_new_task的時候被設置爲如下兩flag數值.
其實就是確定在選擇cpu的時候,是否做balance操作,如果沒有設置,則直接運行在之前運
行過的CPU上.不在重新選擇CPU.*/
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
goto out;
rq = cpu_rq(cpu);
rcu_read_lock();
curr = READ_ONCE(rq->curr); /* unlocked access */
/*
* If the current task on @p's runqueue is an RT task, then
* try to see if we can wake this RT task up on another
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
*
* We want to avoid overloading runqueues. If the woken
* task is a higher priority, then it will stay on this CPU
* and the lower prio task should be moved to another CPU.
* Even though this will probably make the lower prio task
* lose its cache, we do not want to bounce a higher task
* around just because it gave up its CPU, perhaps for a
* lock?
*
* For equal prio tasks, we just let the scheduler sort it out.
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*
* We take into account the capacity of the CPU to ensure it fits the
* requirement of the task - which is only important on heterogeneous
* systems like big.LITTLE.
*/
test = curr &&
unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
/*符合下面一個條件即可選擇符合的target CPU
1. test: 確保test爲true,就是當前rq上的存在一個rt task,並且(當前rt task只
能運行在此CPU上或者說當前rt task優先級高於task p),這樣爲了讓p更快的運行,
只能去選擇其他有低優先級task的的cpu了
2. rt_task_fits_capacity: 如果test=false,但是task p的負載太大了,導致
cpu的capacity不能容納此task p,所以也希望p能夠去選擇一個存在低優先級的cpu作爲target cpu
**上面是選擇target cpu的原則.** .*/
if (test || !rt_task_fits_capacity(p, cpu)) {
/*這個就是第二篇文章講解的cpu priority,即優先選擇cpu priority高的cpu(其實
就是存在低優先級的task的cpu)*/
int target = find_lowest_rq(p);
/*
* Don't bother moving it if the destination CPU is
* not running a lower priority task.
*//*在此確認,target cpu上面沒有優先級更高的rt task在運行.這樣p
纔可以在target_cpu上運行.優先級數值越低,優先級越大.*/
if (target != -1 &&
p->prio < cpu_rq(target)->rt.highest_prio.curr)
cpu = target;
}
rcu_read_unlock();
out:
return cpu;
}
/*uclamp的概念類似與之前ARM的schedtune 功能.這個以後單獨章節講解.其實就是一個限制/提升
task/task group/rq util的功能*/
#ifdef CONFIG_UCLAMP_TASK
/*
* Verify the fitness of task @p to run on @cpu taking into account the uclamp
* settings.
*
* This check is only important for heterogeneous systems where uclamp_min value
* is higher than the capacity of a @cpu. For non-heterogeneous system this
* function will always return true.
*
* The function will return true if the capacity of the @cpu is >= the
* uclamp_min and false otherwise.
*
* Note that uclamp_min will be clamped to uclamp_max if uclamp_min
* > uclamp_max.
*/
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned int min_cap;
unsigned int max_cap;
unsigned int cpu_cap;
/*只有ASYM系統才能去檢測是否cpu 容量滿足task運行*/
/* Only heterogeneous systems can benefit from this check */
if (!static_branch_unlikely(&sched_asym_cpucapacity))
return true;
/*獲取進程p uclamp max/min value*/
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
max_cap = uclamp_eff_value(p, UCLAMP_MAX);
/*獲取cpu的物理capacity.*/
cpu_cap = capacity_orig_of(cpu);
/*task p經過uclamp改變其util之後,cpu是否還能容納p在其上面運行.*/
return cpu_cap >= min(min_cap, max_cap);
}
#else
static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
{
return true;
}
#endif
find_lowest_rq
接下來分析find_lowest_rq原理:
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
return -1;
/*cpu affinity限制,不能運行在其他CPU上.*/
if (task->nr_cpus_allowed == 1)
return -1; /* No other targets possible */
/*遍歷task->prio轉化爲cpu priority低的第一個非空的cpumask,賦值給lowest_mask
爲何需要這樣做呢? */
/*
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
/*考慮容量大小*/
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
task, lowest_mask,
rt_task_fits_capacity);
} else {
/*不考慮容量大小,因爲所有CPU都是一致的.*/
ret = cpupri_find(&task_rq(task)->rd->cpupri,
task, lowest_mask);
}
if (!ret)
return -1; /* No targets found */
/*
* At this point we have built a mask of CPUs representing the
* lowest priority tasks in the system. Now we want to elect
* the best one based on our affinity and topology.
*
* We prioritize the last CPU that the task executed on since
* it is most likely cache-hot in that location.
*//*task最後一次運行的cpu也在此lowest_mask裏面,考慮cache hot,可以直接使用這個
cpu作爲target_cpu*/
if (cpumask_test_cpu(cpu, lowest_mask))
return cpu;
/*
* Otherwise, we consult the sched_domains span maps to figure
* out which CPU is logically closest to our hot cache data.
*/
if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
rcu_read_lock();
/*下面的思想比較簡單,
1.喚醒此task的cpu在lowest_mask內,並且在當前sd->span內,則直接最爲target_cpu
2.如果當前CPU不會lowest_mask或者sd->span內,則直接在lowest_mask&sd->span選擇第一
個CPU id*/
for_each_domain(cpu, sd) {
/*喚醒task的那個喚醒者把CPU喚醒.一般都是設置這個flag的.*/
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
/*
* "this_cpu" is cheaper to preempt than a
* remote processor.
*/
if (this_cpu != -1 &&
cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
rcu_read_unlock();
return this_cpu;
}
best_cpu = cpumask_first_and(lowest_mask,
sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) {
rcu_read_unlock();
return best_cpu;
}
}
}
rcu_read_unlock();
/*
* And finally, if there were no matches within the domains
* just give the caller *something* to work with from the compatible
* locations.
*/
if (this_cpu != -1)
return this_cpu;
cpu = cpumask_any(lowest_mask);
if (cpu < nr_cpu_ids)
return cpu;
return -1;
}
比較簡單,核心函數:cpupri_find_fitness,在這個章節已經分析完畢: [scheduler]二. CPU priority概念以及原理
總之是層層推進,選擇符合要求的cpu id.