diff options
-rw-r--r-- | drivers/cpufreq/cpufreq-dt.c | 3 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 206 | ||||
-rw-r--r-- | drivers/idle/intel_idle.c | 97 | ||||
-rw-r--r-- | drivers/mailbox/pcc.c | 4 |
4 files changed, 258 insertions, 52 deletions
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index f951f911786e..5f8dbe640a20 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -4,9 +4,6 @@ * Copyright (C) 2014 Linaro. * Viresh Kumar <viresh.kumar@linaro.org> * - * The OPP code in function set_target() is reused from - * drivers/cpufreq/omap-cpufreq.c - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4b644526fd59..8b5a415ee14a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x) return ret; } +/** + * struct sample - Store performance sample + * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual + * performance during last sample period + * @busy_scaled: Scaled busy value which is used to calculate next + * P state. This can be different than core_pct_busy + * to account for cpu idle period + * @aperf: Difference of actual performance frequency clock count + * read from APERF MSR between last and current sample + * @mperf: Difference of maximum performance frequency clock count + * read from MPERF MSR between last and current sample + * @tsc: Difference of time stamp counter between last and + * current sample + * @freq: Effective frequency calculated from APERF/MPERF + * @time: Current time from scheduler + * + * This structure is used in the cpudata structure to store performance sample + * data for choosing next P State. + */ struct sample { int32_t core_pct_busy; int32_t busy_scaled; @@ -74,6 +93,20 @@ struct sample { u64 time; }; +/** + * struct pstate_data - Store P state data + * @current_pstate: Current requested P state + * @min_pstate: Min P state possible for this platform + * @max_pstate: Max P state possible for this platform + * @max_pstate_physical:This is physical Max P state for a processor + * This can be higher than the max_pstate which can + * be limited by platform thermal design power limits + * @scaling: Scaling factor to convert frequency to cpufreq + * frequency units + * @turbo_pstate: Max Turbo P state possible for this platform + * + * Stores the per cpu model P state limits and current P state. + */ struct pstate_data { int current_pstate; int min_pstate; @@ -83,6 +116,19 @@ struct pstate_data { int turbo_pstate; }; +/** + * struct vid_data - Stores voltage information data + * @min: VID data for this platform corresponding to + * the lowest P state + * @max: VID data corresponding to the highest P State. + * @turbo: VID data for turbo P state + * @ratio: Ratio of (vid max - vid min) / + * (max P state - Min P State) + * + * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) + * This data is used in Atom platforms, where in addition to target P state, + * the voltage data needs to be specified to select next P State. + */ struct vid_data { int min; int max; @@ -90,6 +136,18 @@ struct vid_data { int32_t ratio; }; +/** + * struct _pid - Stores PID data + * @setpoint: Target set point for busyness or performance + * @integral: Storage for accumulated error values + * @p_gain: PID proportional gain + * @i_gain: PID integral gain + * @d_gain: PID derivative gain + * @deadband: PID deadband + * @last_err: Last error storage for integral part of PID calculation + * + * Stores PID coefficients and last error for PID controller. + */ struct _pid { int setpoint; int32_t integral; @@ -100,6 +158,23 @@ struct _pid { int32_t last_err; }; +/** + * struct cpudata - Per CPU instance data storage + * @cpu: CPU number for this instance data + * @update_util: CPUFreq utility callback information + * @pstate: Stores P state limits for this CPU + * @vid: Stores VID limits for this CPU + * @pid: Stores PID parameters for this CPU + * @last_sample_time: Last Sample time + * @prev_aperf: Last APERF value read from APERF MSR + * @prev_mperf: Last MPERF value read from MPERF MSR + * @prev_tsc: Last timestamp counter (TSC) value + * @prev_cummulative_iowait: IO Wait time difference from last and + * current sample + * @sample: Storage for storing last Sample data + * + * This structure stores per CPU instance data for all CPUs. + */ struct cpudata { int cpu; @@ -118,6 +193,19 @@ struct cpudata { }; static struct cpudata **all_cpu_data; + +/** + * struct pid_adjust_policy - Stores static PID configuration data + * @sample_rate_ms: PID calculation sample rate in ms + * @sample_rate_ns: Sample rate calculation in ns + * @deadband: PID deadband + * @setpoint: PID Setpoint + * @p_gain_pct: PID proportional gain + * @i_gain_pct: PID integral gain + * @d_gain_pct: PID derivative gain + * + * Stores per CPU model static PID configuration data. + */ struct pstate_adjust_policy { int sample_rate_ms; s64 sample_rate_ns; @@ -128,6 +216,20 @@ struct pstate_adjust_policy { int i_gain_pct; }; +/** + * struct pstate_funcs - Per CPU model specific callbacks + * @get_max: Callback to get maximum non turbo effective P state + * @get_max_physical: Callback to get maximum non turbo physical P state + * @get_min: Callback to get minimum P state + * @get_turbo: Callback to get turbo P state + * @get_scaling: Callback to get frequency scaling factor + * @get_val: Callback to convert P state to actual MSR write value + * @get_vid: Callback to get VID data for Atom platforms + * @get_target_pstate: Callback to a function to calculate next P state to use + * + * Core and Atom CPU models have different way to get P State limits. This + * structure is used to store those callbacks. + */ struct pstate_funcs { int (*get_max)(void); int (*get_max_physical)(void); @@ -139,6 +241,11 @@ struct pstate_funcs { int32_t (*get_target_pstate)(struct cpudata *); }; +/** + * struct cpu_defaults- Per CPU model default config data + * @pid_policy: PID config data + * @funcs: Callback function data + */ struct cpu_defaults { struct pstate_adjust_policy pid_policy; struct pstate_funcs funcs; @@ -151,6 +258,34 @@ static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; + +/** + * struct perf_limits - Store user and policy limits + * @no_turbo: User requested turbo state from intel_pstate sysfs + * @turbo_disabled: Platform turbo status either from msr + * MSR_IA32_MISC_ENABLE or when maximum available pstate + * matches the maximum turbo pstate + * @max_perf_pct: Effective maximum performance limit in percentage, this + * is minimum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @min_perf_pct: Effective minimum performance limit in percentage, this + * is maximum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct + * This value is used to limit max pstate + * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct + * This value is used to limit min pstate + * @max_policy_pct: The maximum performance in percentage enforced by + * cpufreq setpolicy interface + * @max_sysfs_pct: The maximum performance in percentage enforced by + * intel pstate sysfs interface + * @min_policy_pct: The minimum performance in percentage enforced by + * cpufreq setpolicy interface + * @min_sysfs_pct: The minimum performance in percentage enforced by + * intel pstate sysfs interface + * + * Storage for user and policy defined limits. + */ struct perf_limits { int no_turbo; int turbo_disabled; @@ -910,7 +1045,14 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; - return true; + /* + * First time this function is invoked in a given cycle, all of the + * previous sample data fields are equal to zero or stale and they must + * be populated with meaningful numbers for things to work, so assume + * that sample.time will always be reset before setting the utilization + * update hook and make the caller skip the sample then. + */ + return !!cpu->last_sample_time; } static inline int32_t get_avg_frequency(struct cpudata *cpu) @@ -984,8 +1126,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) * enough period of time to adjust our busyness. */ duration_ns = cpu->sample.time - cpu->last_sample_time; - if ((s64)duration_ns > pid_params.sample_rate_ns * 3 - && cpu->last_sample_time > 0) { + if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); @@ -1100,10 +1241,8 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_get_cpu_pstates(cpu); intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu, 0); cpu->update_util.func = intel_pstate_update_util; - cpufreq_set_update_util_data(cpunum, &cpu->update_util); pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); @@ -1122,22 +1261,54 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) return get_avg_frequency(cpu); } +static void intel_pstate_set_update_util_hook(unsigned int cpu_num) +{ + struct cpudata *cpu = all_cpu_data[cpu_num]; + + /* Prevent intel_pstate_update_util() from using stale data. */ + cpu->sample.time = 0; + cpufreq_set_update_util_data(cpu_num, &cpu->update_util); +} + +static void intel_pstate_clear_update_util_hook(unsigned int cpu) +{ + cpufreq_set_update_util_data(cpu, NULL); + synchronize_sched(); +} + +static void intel_pstate_set_performance_limits(struct perf_limits *limits) +{ + limits->no_turbo = 0; + limits->turbo_disabled = 0; + limits->max_perf_pct = 100; + limits->max_perf = int_tofp(1); + limits->min_perf_pct = 100; + limits->min_perf = int_tofp(1); + limits->max_policy_pct = 100; + limits->max_sysfs_pct = 100; + limits->min_policy_pct = 0; + limits->min_sysfs_pct = 0; +} + static int intel_pstate_set_policy(struct cpufreq_policy *policy) { if (!policy->cpuinfo.max_freq) return -ENODEV; - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && - policy->max >= policy->cpuinfo.max_freq) { - pr_debug("intel_pstate: set performance\n"); + intel_pstate_clear_update_util_hook(policy->cpu); + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; - if (hwp_active) - intel_pstate_hwp_set(policy->cpus); - return 0; + if (policy->max >= policy->cpuinfo.max_freq) { + pr_debug("intel_pstate: set performance\n"); + intel_pstate_set_performance_limits(limits); + goto out; + } + } else { + pr_debug("intel_pstate: set powersave\n"); + limits = &powersave_limits; } - pr_debug("intel_pstate: set powersave\n"); - limits = &powersave_limits; limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, @@ -1163,6 +1334,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100)); + out: + intel_pstate_set_update_util_hook(policy->cpu); + if (hwp_active) intel_pstate_hwp_set(policy->cpus); @@ -1187,8 +1361,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); - cpufreq_set_update_util_data(cpu_num, NULL); - synchronize_sched(); + intel_pstate_clear_update_util_hook(cpu_num); if (hwp_active) return; @@ -1455,8 +1628,7 @@ out: get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - cpufreq_set_update_util_data(cpu, NULL); - synchronize_sched(); + intel_pstate_clear_update_util_hook(cpu); kfree(all_cpu_data[cpu]); } } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index ba947df5a8c7..c6935de425fa 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -660,6 +660,35 @@ static struct cpuidle_state skl_cstates[] = { .enter = NULL } }; +static struct cpuidle_state skx_cstates[] = { + { + .name = "C1-SKX", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .name = "C1E-SKX", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .name = "C6-SKX", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, + .target_residency = 600, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .enter = NULL } +}; + static struct cpuidle_state atom_cstates[] = { { .name = "C1E-ATM", @@ -818,8 +847,11 @@ static int cpu_hotplug_notify(struct notifier_block *n, * driver in this case */ dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu); - if (!dev->registered) - intel_idle_cpu_init(hotcpu); + if (dev->registered) + break; + + if (intel_idle_cpu_init(hotcpu)) + return NOTIFY_BAD; break; } @@ -904,6 +936,10 @@ static const struct idle_cpu idle_cpu_skl = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_skx = { + .state_table = skx_cstates, + .disable_promotion_to_c1e = true, +}; static const struct idle_cpu idle_cpu_avn = { .state_table = avn_cstates, @@ -945,6 +981,9 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(0x56, idle_cpu_bdw), ICPU(0x4e, idle_cpu_skl), ICPU(0x5e, idle_cpu_skl), + ICPU(0x8e, idle_cpu_skl), + ICPU(0x9e, idle_cpu_skl), + ICPU(0x55, idle_cpu_skx), ICPU(0x57, idle_cpu_knl), {} }; @@ -987,22 +1026,15 @@ static int __init intel_idle_probe(void) icpu = (const struct idle_cpu *)id->driver_data; cpuidle_state_table = icpu->state_table; - if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; - else - on_each_cpu(__setup_broadcast_timer, (void *)true, 1); - pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); - pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", - lapic_timer_reliable_states); return 0; } /* * intel_idle_cpuidle_devices_uninit() - * unregister, free cpuidle_devices + * Unregisters the cpuidle devices. */ static void intel_idle_cpuidle_devices_uninit(void) { @@ -1013,9 +1045,6 @@ static void intel_idle_cpuidle_devices_uninit(void) dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); cpuidle_unregister_device(dev); } - - free_percpu(intel_idle_cpuidle_devices); - return; } /* @@ -1111,7 +1140,7 @@ static void intel_idle_state_table_update(void) * intel_idle_cpuidle_driver_init() * allocate, initialize cpuidle_states */ -static int __init intel_idle_cpuidle_driver_init(void) +static void __init intel_idle_cpuidle_driver_init(void) { int cstate; struct cpuidle_driver *drv = &intel_idle_driver; @@ -1163,18 +1192,10 @@ static int __init intel_idle_cpuidle_driver_init(void) drv->state_count += 1; } - if (icpu->auto_demotion_disable_flags) - on_each_cpu(auto_demotion_disable, NULL, 1); - if (icpu->byt_auto_demotion_disable_flag) { wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); } - - if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */ - on_each_cpu(c1e_promotion_disable, NULL, 1); - - return 0; } @@ -1193,7 +1214,6 @@ static int intel_idle_cpu_init(int cpu) if (cpuidle_register_device(dev)) { pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); - intel_idle_cpuidle_devices_uninit(); return -EIO; } @@ -1218,40 +1238,51 @@ static int __init intel_idle_init(void) if (retval) return retval; + intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (intel_idle_cpuidle_devices == NULL) + return -ENOMEM; + intel_idle_cpuidle_driver_init(); retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { struct cpuidle_driver *drv = cpuidle_get_driver(); printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", drv ? drv->name : "none"); + free_percpu(intel_idle_cpuidle_devices); return retval; } - intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); - if (intel_idle_cpuidle_devices == NULL) - return -ENOMEM; - cpu_notifier_register_begin(); for_each_online_cpu(i) { retval = intel_idle_cpu_init(i); if (retval) { + intel_idle_cpuidle_devices_uninit(); cpu_notifier_register_done(); cpuidle_unregister_driver(&intel_idle_driver); + free_percpu(intel_idle_cpuidle_devices); return retval; } } __register_cpu_notifier(&cpu_hotplug_notifier); + if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ + lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; + else + on_each_cpu(__setup_broadcast_timer, (void *)true, 1); + cpu_notifier_register_done(); + pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", + lapic_timer_reliable_states); + return 0; } static void __exit intel_idle_exit(void) { - intel_idle_cpuidle_devices_uninit(); - cpuidle_unregister_driver(&intel_idle_driver); + struct cpuidle_device *dev; + int i; cpu_notifier_register_begin(); @@ -1259,9 +1290,15 @@ static void __exit intel_idle_exit(void) on_each_cpu(__setup_broadcast_timer, (void *)false, 1); __unregister_cpu_notifier(&cpu_hotplug_notifier); + for_each_possible_cpu(i) { + dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); + cpuidle_unregister_device(dev); + } + cpu_notifier_register_done(); - return; + cpuidle_unregister_driver(&intel_idle_driver); + free_percpu(intel_idle_cpuidle_devices); } module_init(intel_idle_init); diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 0ddf638d60f3..043828d541f7 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -361,8 +361,6 @@ static int __init acpi_pcc_probe(void) struct acpi_generic_address *db_reg; struct acpi_pcct_hw_reduced *pcct_ss; pcc_mbox_channels[i].con_priv = pcct_entry; - pcct_entry = (struct acpi_subtable_header *) - ((unsigned long) pcct_entry + pcct_entry->length); /* If doorbell is in system memory cache the virt address */ pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry; @@ -370,6 +368,8 @@ static int __init acpi_pcc_probe(void) if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address, db_reg->bit_width/8); + pcct_entry = (struct acpi_subtable_header *) + ((unsigned long) pcct_entry + pcct_entry->length); } pcc_mbox_ctrl.num_chans = count; |