一、应用态参数介绍
/sys/devices/system/cpu/cpu0/cpufreq/
affected_cpus //当前策略作用于哪些online core
cpuinfo_cur_freq //当前CPU硬件频率
cpuinfo_max_freq //CPU硬件支持的最低频率
cpuinfo_min_freq //CPU硬件支持的最高频率
cpuinfo_transition_latency //硬件支持的切换频率最小间隔
related_cpus //online和offline core
scaling_available_frequencies //软件支持的频率列表
scaling_available_governors //支持的策略列表
scaling_cur_freq //软件设置的当前频率,通常与cpuinfo_cpus相同,如果出现硬件问题可能导致不一致
scaling_driver //当前使用的driver
scaling_governor //当前使用的governor
scaling_max_freq //软件governor设置的最高频率
scaling_min_freq //软件governor设置的最低频率
scaling_setspeed //需将governor类型切换为userspace,才会出现,通过echo修改数值,会切换主频
二、scaling_available_frequencies 频率列表
governor支持的频率列表是哪里来的呢。因为频率不是随意设置的,governor计算使用的频率,需要在频率支持列表中,在嵌入式中这个列表是在设备树中定义的。那么在x86 桌面系统中,这里是在哪里定义的呢
1、首先在drivers/cpufreq 中搜索关键字scaling_available_frequencies
231 static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf,
232 bool show_boost)
233 {
234 ssize_t count = 0;
235 struct cpufreq_frequency_table *pos, *table = policy->freq_table;
236
237 if (!table)
238 return -ENODEV;
239
240 cpufreq_for_each_valid_entry(pos, table) {
241 /*
242 * show_boost = true and driver_data = BOOST freq
243 * display BOOST freqs
244 *
245 * show_boost = false and driver_data = BOOST freq
246 * show_boost = true and driver_data != BOOST freq
247 * continue - do not display anything
248 *
249 * show_boost = false and driver_data != BOOST freq
250 * display NON BOOST freqs
251 */
252 if (show_boost ^ (pos->flags & CPUFREQ_BOOST_FREQ))
253 continue;
254
255 count += sprintf(&buf[count], "%d ", pos->frequency);
256 }
257 count += sprintf(&buf[count], "\n");
258
259 return count;
260
261 }
可以看到这个参数是从,freq_table 获取到的。
2、继续搜索关键字freq_table,找到关键函数acpi_cpufreq_cpu_init
650 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
...
690 result = acpi_processor_register_performance(perf, cpu);
691 if (result)
692 goto err_free_mask;
...
771 freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
772 GFP_KERNEL);
773 if (!freq_table) {
774 result = -ENOMEM;
775 goto err_unreg;
776 }
...
794 /* table init */
795 for (i = 0; i < perf->state_count; i++) {
796 if (i > 0 && perf->states[i].core_frequency >=
797 freq_table[valid_states-1].frequency / 1000)
798 continue;
799
800 freq_table[valid_states].driver_data = i;
801 freq_table[valid_states].frequency =
802 perf->states[i].core_frequency * 1000;
803 valid_states++;
804 }
...
}
...
从上面找到关键函数acpi_processor_register_performance,信息都是从这个函数中获取到的,最后拷贝到freq_table中。
分析acpi_processor_register_performance,调用关系为:acpi_processor_register_performance->acpi_processor_get_performance_info->acpi_processor_get_performance_states
3、分析acpi_processor_get_performance_states函数
316 static int acpi_processor_get_performance_states(struct acpi_processor *pr)
{
...
328 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
329 if (ACPI_FAILURE(status)) {
330 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PSS"));
331 return -ENODEV;
332 }
...
354 for (i = 0; i < pr->performance->state_count; i++) {
355
356 struct acpi_processor_px *px = &(pr->performance->states[i]);
357
358 state.length = sizeof(struct acpi_processor_px);
359 state.pointer = px;
...
383 /*
384 * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq
385 */
386 if (!px->core_frequency ||
387 ((u32)(px->core_frequency * 1000) !=
388 (px->core_frequency * 1000))) {
389 printk(KERN_ERR FW_BUG PREFIX
390 "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
391 pr->id, px->core_frequency);
392 if (last_invalid == -1)
393 last_invalid = i;
394 } else {
395 if (last_invalid != -1) {
396 /*
397 * Copy this valid entry over last_invalid entry
398 */
399 memcpy(&(pr->performance->states[last_invalid]),
400 px, sizeof(struct acpi_processor_px));
401 ++last_invalid;
402 }
403 }
...
}
我们查看px 的定义
114 struct acpi_processor_px {
115 u64 core_frequency; /* megahertz */
116 u64 power; /* milliWatts */
117 u64 transition_latency; /* microseconds */
118 u64 bus_master_latency; /* microseconds */
119 u64 control; /* control value */
120 u64 status; /* success indicator */
121 };
4、结合acpi 手册查看
5、总结:scaling_available_frequencies通过acpi 通bios中读取到的
三、频率设置方法
我们在sysfs下还看到诸如scaling_available_governors、scaling_driver等interface,在cpufreq子系统中(以下简称cpufreq), policy 、governor、driver是几个比较核心的概念。cpufreq的最终目的是动态调节频率,在前面提到,软件调节是有一定范围的,并且该范围(可以称为频率表)是硬件支持的一个子集,这个频率表的最大值和最小值是由policy决定的,当然不同的policy采用的频率表的范围不同。OK,现在我们用policy制定了频率表的范围,对应sysfs下的scaling_min_freq和scaling_max_freq,governor会从频率表中选择一个值最为CPU的最终工作频率。governor可以理解为“调节器“,查看scaling_available_governors可以知道设备支持哪些governor,比如我的设备支持ondemand userspace interactive performance等,每种governor有着不同的选频标准,比如ondemand会根据系统的workload来选频,如果当前workload超过某一阀值,则选择频率表中的最大频率,其他的几种governor后面有机会再介绍。假设某个governor根据一定的机制从频率表中选择了某个频率,接下来driver负责将governor选择的频率设置为CPU的工作频率,driver 设置频率函数__cpufreq_driver_target
__cpufreq_driver_target+1
cpufreq_set_policy+679
store_scaling_max_freq+124
store+118
kernfs_fop_write+278
vfs_write+165
ksys_write+87
do_syscall_64+90
entry_SYSCALL_64_after_hwframe+68
1、研究关键函数__cpufreq_driver_target
1956 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1957 unsigned int target_freq,
1958 unsigned int relation)
1959 {
...
1984 if (cpufreq_driver->target)
1985 return cpufreq_driver->target(policy, target_freq, relation);
1986
1987 if (!cpufreq_driver->target_index)
1988 return -EINVAL;
1989
1990 index = cpufreq_frequency_table_target(policy, target_freq, relation);
1991
1992 return __target_index(policy, index)
...
}
其中的__target_index
1899 static int __target_index(struct cpufreq_policy *policy, int index)
1900 {
...
1931 retval = cpufreq_driver->target_index(policy, index);
1932 if (retval)
1933 pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
1934 retval);
...
}
2、查找target_index函数实现
grep -r “.target_index” ./
我的电脑是在acpi-cpufreq.c实现的
904 static struct cpufreq_driver acpi_cpufreq_driver = {
905 .verify = cpufreq_generic_frequency_table_verify,
906 .target_index = acpi_cpufreq_target,
907 .fast_switch = acpi_cpufreq_fast_switch,
908 .bios_limit = acpi_processor_get_bios_limit,
909 .init = acpi_cpufreq_cpu_init,
910 .exit = acpi_cpufreq_cpu_exit,
911 .ready = acpi_cpufreq_cpu_ready,
912 .resume = acpi_cpufreq_resume,
913 .name = "acpi-cpufreq",
914 .attr = acpi_cpufreq_attr,
915 };
426 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 unsigned int index)
428 {
...
...
}
3、研究drv_write实现
337 /* Called via smp_call_function_many(), on the target CPUs */
338 static void do_drv_write(void *_cmd)
339 {
340 struct drv_cmd *cmd = _cmd;
341
342 cmd->func.write(cmd->reg, cmd->val);
343 }
344
345 static void drv_write(struct acpi_cpufreq_data *data,
346 const struct cpumask *mask, u32 val)
347 {
348 struct acpi_processor_performance *perf = to_perf_data(data);
349 struct drv_cmd cmd = {
350 .reg = &perf->control_register,
351 .val = val,
352 .func.write = data->cpu_freq_write,
353 };
354 int this_cpu;
355
356 this_cpu = get_cpu();
357 if (cpumask_test_cpu(this_cpu, mask))
358 do_drv_write(&cmd);
359
360 smp_call_function_many(mask, do_drv_write, &cmd, 1);
361 put_cpu();
362 }
很明显 关键在于func.write
让然在函数 acpi_cpufreq_cpu_init 中
650 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
651 {
...
748 case ACPI_ADR_SPACE_FIXED_HARDWARE:
749 pr_debug("HARDWARE addr space\n");
750 if (check_est_cpu(cpu)) {
751 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
752 data->cpu_freq_read = cpu_freq_read_intel;
753 data->cpu_freq_write = cpu_freq_write_intel;
754 break;
755 }
756 if (check_amd_hwpstate_cpu(cpu)) {
757 data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
758 data->cpu_freq_read = cpu_freq_read_amd;
759 data->cpu_freq_write = cpu_freq_write_amd;
760 break;
761 }
...
4、cpu 频率读写函数
263 static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
264 {
265 u32 val, dummy;
266
267 rdmsr(MSR_IA32_PERF_CTL, val, dummy);
268 return val;
269 }
270
271 static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
272 {
273 u32 lo, hi;
274
275 rdmsr(MSR_IA32_PERF_CTL, lo, hi);
276 lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
277 wrmsr(MSR_IA32_PERF_CTL, lo, hi);
278 }
总结,最终通过cpu 的msr 寄存器,进行cpu频率读写的操作
四、 cpuinfo_max_freq 和cpuinfo_min_freq 获取原理
看一下代码,应该就非常明显了吧,最大值和最小值,是通过利率列表进行比较获取的,并没有直接的数值,告诉你最大频率和最小频率。
36 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
37 struct cpufreq_frequency_table *table)
38 {
39 struct cpufreq_frequency_table *pos;
40 unsigned int min_freq = ~0;
41 unsigned int max_freq = 0;
42 unsigned int freq;
43
44 cpufreq_for_each_valid_entry(pos, table) {
45 freq = pos->frequency;
46
47 if (!cpufreq_boost_enabled()
48 && (pos->flags & CPUFREQ_BOOST_FREQ))
49 continue;
50
51 pr_debug("table entry %u: %u kHz\n", (int)(pos - table), freq);
52 if (freq < min_freq)
53 min_freq = freq;
54 if (freq > max_freq)
55 max_freq = freq;
56 }
57
58 policy->min = policy->cpuinfo.min_freq = min_freq;
59 policy->max = policy->cpuinfo.max_freq = max_freq;
60
61 if (policy->min == ~0)
62 return -EINVAL;
63 else
64 return 0;
65 }