From 6e9643a864aa4d532b0d467bacc18a15adf5ca82 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 23 Oct 2020 17:58:39 +0800 Subject: cpufreq: e_powersaver: remove unreachable break A 'break' following a 'return' statement is pointless, so remove it. Signed-off-by: Zhang Qilong Acked-by: Viresh Kumar [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/e_powersaver.c | 1 - drivers/cpufreq/longhaul.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index 776a58bab0ff..ab93bce8ae77 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -223,7 +223,6 @@ static int eps_cpu_init(struct cpufreq_policy *policy) case EPS_BRAND_C3: pr_cont("C3\n"); return -ENODEV; - break; } /* Enable Enhanced PowerSaver */ rdmsrl(MSR_IA32_MISC_ENABLE, val); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index 123fb006810d..182a4dbca095 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -593,7 +593,6 @@ static void longhaul_setup_voltagescaling(void) break; default: return; - break; } if (min_vid_speed >= highest_speed) return; -- cgit v1.2.3-70-g09d2 From db865272d9c4687520dc29f77e701a1b2669872f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:15:56 +0200 Subject: cpufreq: Avoid configuring old governors as default with intel_pstate Commit 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by default without HWP") was meant to cause intel_pstate to be used in the passive mode with the schedutil governor on top of it, but it missed the case in which either "ondemand" or "conservative" was selected as the default governor in the existing kernel config, in which case the previous old governor configuration would be used, causing the default legacy governor to be used on top of intel_pstate instead of schedutil. Address this by preventing "ondemand" and "conservative" from being configured as the default cpufreq governor in the case when schedutil is the default choice for the default governor setting. [Note that the default cpufreq governor can still be set via the kernel command line if need be and that choice is not limited, so if anyone really wants to use one of the legacy governors by default, it can be achieved this way.] Fixes: 33aa46f252c7 ("cpufreq: intel_pstate: Use passive mode by default without HWP") Reported-by: Julia Lawall Cc: 5.8+ # 5.8+ Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 2c7171e0b001..85de313ddec2 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -71,6 +71,7 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE config CPU_FREQ_DEFAULT_GOV_ONDEMAND bool "ondemand" + depends on !(X86_INTEL_PSTATE && SMP) select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_PERFORMANCE help @@ -83,6 +84,7 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE bool "conservative" + depends on !(X86_INTEL_PSTATE && SMP) select CPU_FREQ_GOV_CONSERVATIVE select CPU_FREQ_GOV_PERFORMANCE help -- cgit v1.2.3-70-g09d2 From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:19 +0200 Subject: cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag Generally, a cpufreq driver may need to update some internal upper and lower frequency boundaries on policy max and min changes, respectively, but currently this does not work if the target frequency does not change along with the policy limit. Namely, if the target frequency does not change along with the policy min or max, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents driver callbacks from being invoked and they do not even have a chance to update the corresponding internal boundary. This particularly affects the "powersave" and "performance" governors that always set the target frequency to one of the policy limits and it never changes when the other limit is updated. To allow cpufreq the drivers needing to update internal frequency boundaries on policy limits changes to avoid this issue, introduce a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will neutralize the check mentioned above. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 3 ++- include/linux/cpufreq.h | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f4b60663efe6..ea58337fb65f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2187,7 +2187,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, * exactly same freq is called again and so we can save on few function * calls. */ - if (target_freq == policy->cur) + if (target_freq == policy->cur && + !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS)) return 0; /* Save last value to restore later on errors */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fa37b1c66443..038ed83aab41 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; - u8 flags; + u16 flags; void *driver_data; /* needed by all drivers */ @@ -422,6 +422,14 @@ struct cpufreq_driver { */ #define CPUFREQ_IS_COOLING_DEV BIT(7) +/* + * Set by drivers that need to update internale upper and lower boundaries along + * with the target frequency and so the core and governors should also invoke + * the diver if the target frequency does not change, but the policy min or max + * may have changed. + */ +#define CPUFREQ_NEED_UPDATE_LIMITS BIT(8) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3-70-g09d2 From e0be38ed4ab413ddd492118cf146369b86ee0ab5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:32 +0200 Subject: cpufreq: intel_pstate: Avoid missing HWP max updates in passive mode If the cpufreq policy max limit is changed when intel_pstate operates in the passive mode with HWP enabled and the "powersave" governor is used on top of it, the HWP max limit is not updated as appropriate. Namely, in the "powersave" governor case, the target P-state is always equal to the policy min limit, so if the latter does not change, intel_cpufreq_adjust_hwp() is not invoked to update the HWP Request MSR due to the "target_pstate != old_pstate" check in intel_cpufreq_update_pstate(), so the HWP max limit is not updated as a result. Also, if the CPUFREQ_NEED_UPDATE_LIMITS flag is not set for the driver and the target frequency does not change along with the policy max limit, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents the driver's ->target() callback from being invoked at all, so the HWP max limit is not updated. To prevent that occurring, set the CPUFREQ_NEED_UPDATE_LIMITS flag in the intel_cpufreq driver structure if HWP is enabled and modify intel_cpufreq_update_pstate() to do the "target_pstate != old_pstate" check only in the non-HWP case and let intel_cpufreq_adjust_hwp() always run in the HWP case (it will update HWP Request only if the cached value of the register is different from the new one including the limits, so if neither the target P-state value nor the max limit changes, the register write will still be avoided). Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Zhang Rui Cc: 5.9+ # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ... Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Zhang Rui --- drivers/cpufreq/intel_pstate.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3c1455518738..b7a9779250aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2568,14 +2568,12 @@ static int intel_cpufreq_update_pstate(struct cpudata *cpu, int target_pstate, int old_pstate = cpu->pstate.current_pstate; target_pstate = intel_pstate_prepare_request(cpu, target_pstate); - if (target_pstate != old_pstate) { + if (hwp_active) { + intel_cpufreq_adjust_hwp(cpu, target_pstate, fast_switch); + cpu->pstate.current_pstate = target_pstate; + } else if (target_pstate != old_pstate) { + intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); cpu->pstate.current_pstate = target_pstate; - if (hwp_active) - intel_cpufreq_adjust_hwp(cpu, target_pstate, - fast_switch); - else - intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, - fast_switch); } intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH : @@ -3032,6 +3030,7 @@ static int __init intel_pstate_init(void) hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; + intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; if (!default_driver) default_driver = &intel_pstate; -- cgit v1.2.3-70-g09d2 From 4e0ba5577dba686f96c1c10ef4166380667fdec7 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sun, 25 Oct 2020 00:29:53 +0800 Subject: intel_idle: Fix max_cstate for processor models without C-state tables Currently intel_idle driver gets the c-state information from ACPI _CST if the processor model is not recognized by it. However the c-state in _CST starts with index 1 which is different from the index in intel_idle driver's internal c-state table. While intel_idle_max_cstate_reached() was previously introduced to deal with intel_idle driver's internal c-state table, re-using this function directly on _CST is incorrect. Fix this by subtracting 1 from the index when checking max_cstate in the _CST case. For example, append intel_idle.max_cstate=1 in boot command line, Before the patch: grep . /sys/devices/system/cpu/cpu0/cpuidle/state*/name POLL After the patch: grep . /sys/devices/system/cpu/cpu0/cpuidle/state*/name /sys/devices/system/cpu/cpu0/cpuidle/state0/name:POLL /sys/devices/system/cpu/cpu0/cpuidle/state1/name:C1_ACPI Fixes: 18734958e9bf ("intel_idle: Use ACPI _CST for processor models without C-state tables") Reported-by: Pengfei Xu Cc: 5.6+ # 5.6+ Signed-off-by: Chen Yu [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 56f5b8077cba..01bace49a962 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1239,7 +1239,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) struct acpi_processor_cx *cx; struct cpuidle_state *state; - if (intel_idle_max_cstate_reached(cstate)) + if (intel_idle_max_cstate_reached(cstate - 1)) break; cx = &acpi_state_table.states[cstate]; -- cgit v1.2.3-70-g09d2 From 4d4ce8053bfac9a72b9094c6879119938efaa05d Mon Sep 17 00:00:00 2001 From: Jackie Zamow Date: Tue, 27 Oct 2020 07:43:19 -0500 Subject: PM: sleep: fix typo in kernel/power/process.c Fix a typo in a comment in freeze_processes(). Signed-off-by: Jackie Zamow [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 4b6a54da7e65..45b054b7b5ec 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -146,7 +146,7 @@ int freeze_processes(void) BUG_ON(in_atomic()); /* - * Now that the whole userspace is frozen we need to disbale + * Now that the whole userspace is frozen we need to disable * the OOM killer to disallow any further interference with * killable tasks. There is no guarantee oom victims will * ever reach a point they go away we have to wait with a timeout. -- cgit v1.2.3-70-g09d2 From 00d4394792418f8fe968f0cb22557053c6310010 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 27 Oct 2020 11:59:34 -0700 Subject: cpufreq: speedstep: remove unneeded semicolon A semicolon is not needed after a switch statement. Signed-off-by: Tom Rix Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index a13a2d1e444e..0b66df4ed513 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -240,7 +240,7 @@ unsigned int speedstep_get_frequency(enum speedstep_processor processor) return pentium3_get_frequency(processor); default: return 0; - }; + } return 0; } EXPORT_SYMBOL_GPL(speedstep_get_frequency); -- cgit v1.2.3-70-g09d2 From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:46 +0200 Subject: cpufreq: Introduce cpufreq_driver_test_flags() Add a helper function to test the flags of the cpufreq driver in use againt a given flags mask. In particular, this will be needed to test the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil governor. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 12 ++++++++++++ include/linux/cpufreq.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ea58337fb65f..336b5e94cbc8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1907,6 +1907,18 @@ void cpufreq_resume(void) } } +/** + * cpufreq_driver_test_flags - Test cpufreq driver's flags against given ones. + * @flags: Flags to test against the current cpufreq driver's flags. + * + * Assumes that the driver is there, so callers must ensure that this is the + * case. + */ +bool cpufreq_driver_test_flags(u16 flags) +{ + return !!(cpufreq_driver->flags & flags); +} + /** * cpufreq_get_current_driver - return current driver's name * diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 038ed83aab41..1eaa04f1bae6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -433,6 +433,7 @@ struct cpufreq_driver { int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); +bool cpufreq_driver_test_flags(u16 flags); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); -- cgit v1.2.3-70-g09d2 From d1e7c2996e988866e7ceceb4641a0886885b7889 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 Oct 2020 12:12:46 +0100 Subject: cpufreq: schedutil: Always call driver if CPUFREQ_NEED_UPDATE_LIMITS is set Because sugov_update_next_freq() may skip a frequency update even if the need_freq_update flag has been set for the policy at hand, policy limits updates may not take effect as expected. For example, if the intel_pstate driver operates in the passive mode with HWP enabled, it needs to update the HWP min and max limits when the policy min and max limits change, respectively, but that may not happen if the target frequency does not change along with the limit at hand. In particular, if the policy min is changed first, causing the target frequency to be adjusted to it, and the policy max limit is changed later to the same value, the HWP max limit will not be updated to follow it as expected, because the target frequency is still equal to the policy min limit and it will not change until that limit is updated. To address this issue, modify get_next_freq() to let the driver callback run if the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag is set regardless of whether or not the new frequency to set is equal to the previous one. Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Zhang Rui Tested-by: Zhang Rui Cc: 5.9+ # 5.9+: 1c534352f47f cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS ... Cc: 5.9+ # 5.9+: a62f68f5ca53 cpufreq: Introduce cpufreq_driver_test_flags() Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e254745a82cb..c03a5775d019 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -102,7 +102,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (sg_policy->next_freq == next_freq) + if (sg_policy->next_freq == next_freq && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) return false; sg_policy->next_freq = next_freq; @@ -161,7 +162,8 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, freq = map_util_freq(util, freq, max); - if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) return sg_policy->next_freq; sg_policy->need_freq_update = false; -- cgit v1.2.3-70-g09d2