- 在sugov初始化時註冊回調函數,到update_util_data中
797 static int sugov_start(struct cpufreq_policy *policy) { 823 for_each_cpu(cpu, policy->cpus) { 824 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 825 826 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 827 policy_is_shared(policy) ? 828 sugov_update_shared : 829 sugov_update_single); 830 } 832 } 34 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, 35 void (*func)(struct update_util_data *data, u64 time, 36 unsigned int flags)) 44 data->func = func; 46 }
四核cpu三小一大,三個小核一個shared policy,一個大核一個single policy。
- cpu的util被更新時,不同的調度類都會調用cpufreq_update_util(),函數中調用剛註冊的回調函數,進行變頻
2225 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) 2226 { 2227 struct update_util_data *data; 2228 2229 data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, 2230 cpu_of(rq))); 2231 if (data) 2232 data->func(data, rq_clock(rq), flags); 2233 }
-
對於cpu0~2,會調用sugov_update_shared(), cpu3會調用sugov_update_single()
446 static void sugov_update_shared(struct update_util_data *hook, u64 time, 447 unsigned int flags) 448 { 455 util = sugov_get_util(&max, sg_cpu->cpu); 459 sg_cpu->util = util; 460 sg_cpu->max = max; 461 sg_cpu->flags = flags; 464 sugov_set_iowait_boost(sg_cpu, time, flags); 465 sg_cpu->last_update = time; 466 467 if (sugov_should_update_freq(sg_policy, time)) { 469 if (flags & SCHED_CPUFREQ_DL) { 470 next_f = sg_policy->policy->cpuinfo.max_freq; 472 } 473 else { 474 next_f = sugov_next_freq_shared(sg_cpu, time); 476 } 477 sugov_update_commit(sg_policy, time, next_f); 478 } 479 480 raw_spin_unlock(&sg_policy->update_lock); 481 }
(a) 其中sugov_get_util()獲取cpu的util,包含cfs/rt/irq幾個部分,計算公式參考265~270行代碼。 boost util要從幾個boost group中取最大的boost margin值boost_max, 對util進行調整
218 static unsigned long sugov_get_util(unsigned long *max, int cpu) 219 { 220 struct rq *rq = cpu_rq(cpu); 221 unsigned long max_cap, irq, util; 222 223 *max = max_cap = arch_scale_cpu_capacity(NULL, cpu); 233 /* 234 * Early check to see if IRQ/steal time saturates the CPU, can be 235 * because of inaccuracies in how we track these -- see 236 * update_irq_load_avg(). 237 */ 238 irq = cpu_util_irq(rq); 240 if (unlikely(irq >= max_cap)) 241 return max_cap; 242 243 /* 244 * Because the time spend on RT/DL tasks is visible as 'lost' time to 245 * CFS tasks and we use the same metric to track the effective 246 * utilization (PELT windows are synchronized) we can directly add them 247 * to obtain the CPU's actual utilization. 248 * 249 * Check if the CFS+RT sum is saturated (ie. no idle time) such that 250 * we select f_max when there is no idle time. 251 */ 252 util = cpu_util_freq(cpu); 254 util += sched_get_rt_rq_util(cpu); 256 if (util >= max_cap) 257 return max_cap; 258 259 /* 260 * There is still idle time; further improve the number by using the 261 * irq metric. Because IRQ/steal time is hidden from the task clock we 262 * need to scale the task numbers: 263 * 264 * irq 265 * U' = irq + (1 - ------- )* U 266 * max 267 */ 268 util = scale_irq_capacity(util, irq, max_cap); 270 util += irq; 271 272 util = boosted_cpu_util(cpu, util); 273 275 return min(util, max_cap); 276 }
(b) sugov_set_iowait_boost()只在幾個task裏纔會設置,292行不會被調用到
278 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, 279 unsigned int flags) 280 { 281 if (flags & SCHED_CPUFREQ_IOWAIT) { 282 trace_printk("----->%s;%d\n", __func__, __LINE__); 283 if (sg_cpu->iowait_boost_pending) 284 return; 285 trace_printk("----->%s;%d\n", __func__, __LINE__); 286 287 sg_cpu->iowait_boost_pending = true; 288 289 if (sg_cpu->iowait_boost) { 290 sg_cpu->iowait_boost <<= 1; 291 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) { 292 trace_printk("----->%s;%d;ioboost:%u;iobmax:%u\n", __func__, __LINE__, sg_cpu->iowait_boost, sg_cpu->iowait_boost_max); 293 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; 294 } 295 } else { 296 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; 297 trace_printk("----->%s;%d;ioboost:%u\n", __func__, __LINE__, sg_cpu->iowait_boost); 298 } 299 } else if (sg_cpu->iowait_boost) { 300 trace_printk("----->%s;%d\n", __func__, __LINE__); 301 s64 delta_ns = time - sg_cpu->last_update; 302 303 /* Clear iowait_boost if the CPU apprears to have been idle. */ 304 if (delta_ns > TICK_NSEC) { 305 trace_printk("----->%s;%d\n", __func__, __LINE__); 306 sg_cpu->iowait_boost = 0; 307 sg_cpu->iowait_boost_pending = false; 308 } 309 } 310 }
287644 mmcqd/0-154 [000] d..4 142.425328: sugov_set_iowait_boost: ----->sugov_set_iowait_boost;282 287645 mmcqd/0-154 [000] d..4 142.425330: sugov_set_iowait_boost.part.3: ----->sugov_set_iowait_boost;285 287646 mmcqd/0-154 [000] d..4 142.425332: sugov_set_iowait_boost.part.3: ----->sugov_set_iowait_boost;297;ioboost:614400
(c) sugov_next_freq_shared()
取shared policy中各個cpu util的最大值,計算iowait boost的值。根據這兩個值調用get_next_freq(sg_policy, util, max)計算變頻值,變頻公式爲:next_freq = C * max_freq * util / max (C=1.25,即cpu util值達到cpu最大util的1/1.25=0.8=80%時,cpu就變頻到最大頻率)。 -
sugov_update_commit(sg_policy, time, next_f)提交計算篩選出來的頻率值,給driver。通過queue一個irq work實現,irq_work_queue(&sg_policy->irq_work); 之後中斷處理程序quene一個sg_policy->work到sg_policy->worker->list中,ugov優先級是49,會搶佔調度。喚醒sugov並執行list上的sugov_work。488行調用驅動函數執行變頻操作。
627 static int sugov_kthread_create(struct sugov_policy *sg_policy){ 638 kthread_init_work(&sg_policy->work, sugov_work); 639 kthread_init_worker(&sg_policy->worker); 640 thread = kthread_create(kthread_worker_fn, &sg_policy->worker, 641 "sugov:%d", 642 cpumask_first(policy->related_cpus)); 661 init_irq_work(&sg_policy->irq_work, sugov_irq_work); 664 wake_up_process(thread); 667 } 495 static void sugov_irq_work(struct irq_work *irq_work) 496 { 514 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 515 } 483 static void sugov_work(struct kthread_work *work) 484 { 485 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); 486 487 mutex_lock(&sg_policy->work_lock); 488 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, 489 CPUFREQ_RELATION_L); 490 mutex_unlock(&sg_policy->work_lock); 491 492 sg_policy->work_in_progress = false; 493 }
-
驅動調頻
2035 int __cpufreq_driver_target(struct cpufreq_policy *policy, 2036 unsigned int target_freq, 2037 unsigned int relation) 2038 { 2039 unsigned int old_target_freq = target_freq; 2040 int index; 2041 unsigned int qos_max_freq = PM_QOS_FREQ_MAX_DEFAULT_VALUE; 2042 unsigned int qos_min_freq = PM_QOS_FREQ_MIN_DEFAULT_VALUE; 2043 unsigned int cluster_id; 2044 2045 policy->target_freq = target_freq; 2046 2047 if (cpufreq_disabled()) 2048 return -ENODEV; 2049 2050 /* Make sure that target freq is within qos request range */ 2051 cluster_id = topology_physical_package_id(policy->cpu); 2052 if (CPU_CLUSTER0 == cluster_id) { 2053 qos_max_freq = pm_qos_request(PM_QOS_CLUSTER0_FREQ_MAX); 2054 qos_min_freq = pm_qos_request(PM_QOS_CLUSTER0_FREQ_MIN); 2055 } else if (CPU_CLUSTER1 == cluster_id) { 2056 qos_max_freq = pm_qos_request(PM_QOS_CLUSTER1_FREQ_MAX); 2057 qos_min_freq = pm_qos_request(PM_QOS_CLUSTER1_FREQ_MIN); 2058 } else 2059 pr_warn("more cluster id is not support yet!\n"); 2060 target_freq = clamp_val(target_freq, qos_min_freq, qos_max_freq); 2061 2062 /* Make sure that target_freq is within supported range */ 2063 target_freq = clamp_val(target_freq, policy->min, policy->max); 2064 2065 pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", 2066 policy->cpu, target_freq, relation, old_target_freq); 2067 2068 /* 2069 * This might look like a redundant call as we are checking it again 2070 * after finding index. But it is left intentionally for cases where 2071 * exactly same freq is called again and so we can save on few function 2072 * calls. 2073 */ 2074 if (target_freq == policy->cur) 2075 return 0; 2076 2077 /* Save last value to restore later on errors */ 2078 policy->restore_freq = policy->cur; 2079 2080 if (cpufreq_driver->target) 2081 return cpufreq_driver->target(policy, target_freq, relation); 2082 2083 if (!cpufreq_driver->target_index) 2084 return -EINVAL; 2085 2086 index = cpufreq_frequency_table_target(policy, target_freq, relation); 2087 2088 return __target_index(policy, index); 2089 }