一、应用态参数介绍

/sys/devices/system/cpu/cpu0/cpufreq/
affected_cpus //当前策略作用于哪些online core
cpuinfo_cur_freq //当前CPU硬件频率
cpuinfo_max_freq //CPU硬件支持的最低频率
cpuinfo_min_freq //CPU硬件支持的最高频率
cpuinfo_transition_latency //硬件支持的切换频率最小间隔
related_cpus //online和offline core
scaling_available_frequencies //软件支持的频率列表
scaling_available_governors //支持的策略列表
scaling_cur_freq //软件设置的当前频率,通常与cpuinfo_cpus相同,如果出现硬件问题可能导致不一致
scaling_driver //当前使用的driver
scaling_governor //当前使用的governor
scaling_max_freq //软件governor设置的最高频率
scaling_min_freq //软件governor设置的最低频率
scaling_setspeed //需将governor类型切换为userspace,才会出现,通过echo修改数值,会切换主频

二、scaling_available_frequencies 频率列表

governor支持的频率列表是哪里来的呢。因为频率不是随意设置的,governor计算使用的频率,需要在频率支持列表中,在嵌入式中这个列表是在设备树中定义的。那么在x86 桌面系统中,这里是在哪里定义的呢

1、首先在drivers/cpufreq 中搜索关键字scaling_available_frequencies
231 static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf,
232                     bool show_boost)
233 {   
234     ssize_t count = 0;
235     struct cpufreq_frequency_table *pos, *table = policy->freq_table;
236     
237     if (!table)
238         return -ENODEV;
239     
240     cpufreq_for_each_valid_entry(pos, table) {
241         /* 
242          * show_boost = true and driver_data = BOOST freq
243          * display BOOST freqs
244          * 
245          * show_boost = false and driver_data = BOOST freq
246          * show_boost = true and driver_data != BOOST freq
247          * continue - do not display anything
248          * 
249          * show_boost = false and driver_data != BOOST freq
250          * display NON BOOST freqs
251          */
252         if (show_boost ^ (pos->flags & CPUFREQ_BOOST_FREQ))
253             continue;
254         
255         count += sprintf(&buf[count], "%d ", pos->frequency);
256     }
257     count += sprintf(&buf[count], "\n");
258     
259     return count;
260 
261 }

可以看到这个参数是从,freq_table 获取到的。

2、继续搜索关键字freq_table,找到关键函数acpi_cpufreq_cpu_init
 650 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
...
 690     result = acpi_processor_register_performance(perf, cpu);
 691     if (result)
 692         goto err_free_mask;
...
 771     freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
 772                  GFP_KERNEL);
 773     if (!freq_table) {
 774         result = -ENOMEM;
 775         goto err_unreg;
 776     }
...
 794     /* table init */
 795     for (i = 0; i < perf->state_count; i++) {
 796         if (i > 0 && perf->states[i].core_frequency >=
 797             freq_table[valid_states-1].frequency / 1000)
 798             continue;
 799 
 800         freq_table[valid_states].driver_data = i;
 801         freq_table[valid_states].frequency =
 802             perf->states[i].core_frequency * 1000;
 803         valid_states++;
 804     }

...
}
...

从上面找到关键函数acpi_processor_register_performance,信息都是从这个函数中获取到的,最后拷贝到freq_table中。
分析acpi_processor_register_performance,调用关系为:acpi_processor_register_performance->acpi_processor_get_performance_info->acpi_processor_get_performance_states

3、分析acpi_processor_get_performance_states函数
316 static int acpi_processor_get_performance_states(struct acpi_processor *pr)
{
...
328     status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
329     if (ACPI_FAILURE(status)) {
330         ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PSS"));
331         return -ENODEV;
332     }
...
354     for (i = 0; i < pr->performance->state_count; i++) {
355 
356         struct acpi_processor_px *px = &(pr->performance->states[i]);
357 
358         state.length = sizeof(struct acpi_processor_px);
359         state.pointer = px;

...
383         /*
384          * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq
385          */
386         if (!px->core_frequency ||
387             ((u32)(px->core_frequency * 1000) !=
388              (px->core_frequency * 1000))) {
389             printk(KERN_ERR FW_BUG PREFIX
390                    "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
391                    pr->id, px->core_frequency);
392             if (last_invalid == -1)
393                 last_invalid = i;
394         } else {
395             if (last_invalid != -1) {
396                 /*
397                  * Copy this valid entry over last_invalid entry
398                  */
399                 memcpy(&(pr->performance->states[last_invalid]),
400                        px, sizeof(struct acpi_processor_px));
401                 ++last_invalid;
402             }
403         }
...
}

我们查看px 的定义

114 struct acpi_processor_px {
115     u64 core_frequency; /* megahertz */
116     u64 power;  /* milliWatts */
117     u64 transition_latency; /* microseconds */
118     u64 bus_master_latency; /* microseconds */
119     u64 control;    /* control value */
120     u64 status; /* success indicator */
121 };  
4、结合acpi 手册查看

cpu 动态调频之(频率范围,频率设置)-LMLPHP
cpu 动态调频之(频率范围,频率设置)-LMLPHP
cpu 动态调频之(频率范围,频率设置)-LMLPHP

5、总结:scaling_available_frequencies通过acpi 通bios中读取到的

三、频率设置方法

我们在sysfs下还看到诸如scaling_available_governors、scaling_driver等interface,在cpufreq子系统中(以下简称cpufreq), policy 、governor、driver是几个比较核心的概念。cpufreq的最终目的是动态调节频率,在前面提到,软件调节是有一定范围的,并且该范围(可以称为频率表)是硬件支持的一个子集,这个频率表的最大值和最小值是由policy决定的,当然不同的policy采用的频率表的范围不同。OK,现在我们用policy制定了频率表的范围,对应sysfs下的scaling_min_freq和scaling_max_freq,governor会从频率表中选择一个值最为CPU的最终工作频率。governor可以理解为“调节器“,查看scaling_available_governors可以知道设备支持哪些governor,比如我的设备支持ondemand userspace interactive performance等,每种governor有着不同的选频标准,比如ondemand会根据系统的workload来选频,如果当前workload超过某一阀值,则选择频率表中的最大频率,其他的几种governor后面有机会再介绍。假设某个governor根据一定的机制从频率表中选择了某个频率,接下来driver负责将governor选择的频率设置为CPU的工作频率,driver 设置频率函数__cpufreq_driver_target

    __cpufreq_driver_target+1
    cpufreq_set_policy+679
    store_scaling_max_freq+124
    store+118
    kernfs_fop_write+278
    vfs_write+165
    ksys_write+87
    do_syscall_64+90
    entry_SYSCALL_64_after_hwframe+68
1、研究关键函数__cpufreq_driver_target
1956 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1957                 unsigned int target_freq,
1958                 unsigned int relation)
1959 {  
...
1984     if (cpufreq_driver->target)
1985         return cpufreq_driver->target(policy, target_freq, relation);
1986     
1987     if (!cpufreq_driver->target_index)
1988         return -EINVAL;
1989     
1990     index = cpufreq_frequency_table_target(policy, target_freq, relation);
1991     
1992     return __target_index(policy, index)
...
}

其中的__target_index

1899 static int __target_index(struct cpufreq_policy *policy, int index)
1900 {
...
1931     retval = cpufreq_driver->target_index(policy, index);
1932     if (retval)
1933         pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
1934                retval);

...
}
2、查找target_index函数实现

grep -r “.target_index” ./
我的电脑是在acpi-cpufreq.c实现的

 904 static struct cpufreq_driver acpi_cpufreq_driver = {
 905     .verify     = cpufreq_generic_frequency_table_verify,
 906     .target_index   = acpi_cpufreq_target,
 907     .fast_switch    = acpi_cpufreq_fast_switch,
 908     .bios_limit = acpi_processor_get_bios_limit,
 909     .init       = acpi_cpufreq_cpu_init,
 910     .exit       = acpi_cpufreq_cpu_exit,
 911     .ready      = acpi_cpufreq_cpu_ready,
 912     .resume     = acpi_cpufreq_resume,
 913     .name       = "acpi-cpufreq",
 914     .attr       = acpi_cpufreq_attr,
 915 };


 426 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 427                    unsigned int index)
 428 {
...

...
}

3、研究drv_write实现
 337 /* Called via smp_call_function_many(), on the target CPUs */
 338 static void do_drv_write(void *_cmd)
 339 {
 340     struct drv_cmd *cmd = _cmd;
 341 
 342     cmd->func.write(cmd->reg, cmd->val);
 343 }
 344 
 345 static void drv_write(struct acpi_cpufreq_data *data,
 346               const struct cpumask *mask, u32 val)
 347 {
 348     struct acpi_processor_performance *perf = to_perf_data(data);
 349     struct drv_cmd cmd = {
 350         .reg = &perf->control_register,
 351         .val = val,
 352         .func.write = data->cpu_freq_write,
 353     };
 354     int this_cpu;
 355 
 356     this_cpu = get_cpu();
 357     if (cpumask_test_cpu(this_cpu, mask))
 358         do_drv_write(&cmd);
 359 
 360     smp_call_function_many(mask, do_drv_write, &cmd, 1);
 361     put_cpu();
 362 }

很明显 关键在于func.write

让然在函数 acpi_cpufreq_cpu_init 中

650 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 651 {
...
 748     case ACPI_ADR_SPACE_FIXED_HARDWARE:
 749         pr_debug("HARDWARE addr space\n");
 750         if (check_est_cpu(cpu)) {
 751             data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
 752             data->cpu_freq_read = cpu_freq_read_intel;
 753             data->cpu_freq_write = cpu_freq_write_intel;
 754             break;
 755         }
 756         if (check_amd_hwpstate_cpu(cpu)) {
 757             data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
 758             data->cpu_freq_read = cpu_freq_read_amd;
 759             data->cpu_freq_write = cpu_freq_write_amd;
 760             break;
 761         }

...
4、cpu 频率读写函数
 263 static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
 264 {
 265     u32 val, dummy;
 266 
 267     rdmsr(MSR_IA32_PERF_CTL, val, dummy);
 268     return val;
 269 }
 270 
 271 static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
 272 {
 273     u32 lo, hi;
 274 
 275     rdmsr(MSR_IA32_PERF_CTL, lo, hi);
 276     lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
 277     wrmsr(MSR_IA32_PERF_CTL, lo, hi);
 278 }

总结,最终通过cpu 的msr 寄存器,进行cpu频率读写的操作

四、 cpuinfo_max_freq 和cpuinfo_min_freq 获取原理

看一下代码,应该就非常明显了吧,最大值和最小值,是通过利率列表进行比较获取的,并没有直接的数值,告诉你最大频率和最小频率。

 36 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 37                     struct cpufreq_frequency_table *table)
 38 {
 39     struct cpufreq_frequency_table *pos;
 40     unsigned int min_freq = ~0;
 41     unsigned int max_freq = 0;
 42     unsigned int freq;
 43 
 44     cpufreq_for_each_valid_entry(pos, table) {
 45         freq = pos->frequency;
 46 
 47         if (!cpufreq_boost_enabled()
 48             && (pos->flags & CPUFREQ_BOOST_FREQ))
 49             continue;
 50 
 51         pr_debug("table entry %u: %u kHz\n", (int)(pos - table), freq);
 52         if (freq < min_freq)
 53             min_freq = freq;
 54         if (freq > max_freq)
 55             max_freq = freq;
 56     }
 57 
 58     policy->min = policy->cpuinfo.min_freq = min_freq;
 59     policy->max = policy->cpuinfo.max_freq = max_freq;
 60 
 61     if (policy->min == ~0)
 62         return -EINVAL;
 63     else
 64         return 0;
 65 }

05-09 17:49