From b9794a822281944ef3de5b1812a94cbdb8134320 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:33 +0100 Subject: powercap/drivers/dtpm: Convert the init table section to a simple array The init table section is freed after the system booted. However the next changes will make per module the DTPM description, so the table won't be accessible when the module is loaded. In order to fix that, we should move the table to the data section where there are very few entries and that makes strange to add it there. The main goal of the table was to keep self-encapsulated code and we can keep it almost as it by using an array instead. Suggested-by: Ulf Hansson Reviewed-by: Ulf Hansson Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220128163537.212248-2-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 2 ++ drivers/powercap/dtpm_cpu.c | 5 ++++- drivers/powercap/dtpm_subsys.h | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 drivers/powercap/dtpm_subsys.h (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 8cb45f2d3d78..0e5c93443c70 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -24,6 +24,8 @@ #include #include +#include "dtpm_subsys.h" + #define DTPM_POWER_LIMIT_FLAG 0 static const char *constraint_name[] = { diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index b740866b228d..5763e0ce2af5 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -269,4 +269,7 @@ static int __init dtpm_cpu_init(void) return 0; } -DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init); +struct dtpm_subsys_ops dtpm_cpu_ops = { + .name = KBUILD_MODNAME, + .init = dtpm_cpu_init, +}; diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h new file mode 100644 index 000000000000..2a3a2055f60e --- /dev/null +++ b/drivers/powercap/dtpm_subsys.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Linaro Ltd + * + * Author: Daniel Lezcano + */ +#ifndef ___DTPM_SUBSYS_H__ +#define ___DTPM_SUBSYS_H__ + +extern struct dtpm_subsys_ops dtpm_cpu_ops; + +struct dtpm_subsys_ops *dtpm_subsys[] = { +#ifdef CONFIG_DTPM_CPU + &dtpm_cpu_ops, +#endif +}; + +#endif -- cgit v1.2.3-70-g09d2 From 3759ec678e8944dc2ea70cab77a300408f78ae27 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:34 +0100 Subject: powercap/drivers/dtpm: Add hierarchy creation The DTPM framework is available but without a way to configure it. This change provides a way to create a hierarchy of DTPM node where the power consumption reflects the sum of the children's power consumption. It is up to the platform to specify an array of dtpm nodes where each element has a pointer to its parent, except the top most one. The type of the node gives the indication of which initialization callback to call. At this time, we can create a virtual node, where its purpose is to be a parent in the hierarchy, and a DT node where the name describes its path. In order to ensure a nice self-encapsulation, the DTPM subsys array contains a couple of initialization functions, one to setup the DTPM backend and one to initialize it up. With this approach, the DTPM framework has a very few material to export. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220128163537.212248-3-daniel.lezcano@linaro.org --- drivers/powercap/Kconfig | 1 + drivers/powercap/dtpm.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/dtpm.h | 15 ++++ 3 files changed, 203 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 8242e8c5ed77..b1ca339957e3 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -46,6 +46,7 @@ config IDLE_INJECT config DTPM bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)" + depends on OF help This enables support for the power capping for the dynamic thermal power management userspace engine. diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 0e5c93443c70..414826a1509b 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "dtpm_subsys.h" @@ -463,14 +464,197 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) return 0; } -static int __init init_dtpm(void) +static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy, + struct dtpm *parent) { + struct dtpm *dtpm; + int ret; + + dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL); + if (!dtpm) + return ERR_PTR(-ENOMEM); + dtpm_init(dtpm, NULL); + + ret = dtpm_register(hierarchy->name, dtpm, parent); + if (ret) { + pr_err("Failed to register dtpm node '%s': %d\n", + hierarchy->name, ret); + kfree(dtpm); + return ERR_PTR(ret); + } + + return dtpm; +} + +static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy, + struct dtpm *parent) +{ + struct device_node *np; + int i, ret; + + np = of_find_node_by_path(hierarchy->name); + if (!np) { + pr_err("Failed to find '%s'\n", hierarchy->name); + return ERR_PTR(-ENXIO); + } + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->setup) + continue; + + ret = dtpm_subsys[i]->setup(parent, np); + if (ret) { + pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret); + of_node_put(np); + return ERR_PTR(ret); + } + } + + of_node_put(np); + + /* + * By returning a NULL pointer, we let know the caller there + * is no child for us as we are a leaf of the tree + */ + return NULL; +} + +typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *); + +dtpm_node_callback_t dtpm_node_callback[] = { + [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual, + [DTPM_NODE_DT] = dtpm_setup_dt, +}; + +static int dtpm_for_each_child(const struct dtpm_node *hierarchy, + const struct dtpm_node *it, struct dtpm *parent) +{ + struct dtpm *dtpm; + int i, ret; + + for (i = 0; hierarchy[i].name; i++) { + + if (hierarchy[i].parent != it) + continue; + + dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent); + + /* + * A NULL pointer means there is no children, hence we + * continue without going deeper in the recursivity. + */ + if (!dtpm) + continue; + + /* + * There are multiple reasons why the callback could + * fail. The generic glue is abstracting the backend + * and therefore it is not possible to report back or + * take a decision based on the error. In any case, + * if this call fails, it is not critical in the + * hierarchy creation, we can assume the underlying + * service is not found, so we continue without this + * branch in the tree but with a warning to log the + * information the node was not created. + */ + if (IS_ERR(dtpm)) { + pr_warn("Failed to create '%s' in the hierarchy\n", + hierarchy[i].name); + continue; + } + + ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm); + if (ret) + return ret; + } + + return 0; +} + +/** + * dtpm_create_hierarchy - Create the dtpm hierarchy + * @hierarchy: An array of struct dtpm_node describing the hierarchy + * + * The function is called by the platform specific code with the + * description of the different node in the hierarchy. It creates the + * tree in the sysfs filesystem under the powercap dtpm entry. + * + * The expected tree has the format: + * + * struct dtpm_node hierarchy[] = { + * [0] { .name = "topmost", type = DTPM_NODE_VIRTUAL }, + * [1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] }, + * [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] }, + * [6] { } + * }; + * + * The last element is always an empty one and marks the end of the + * array. + * + * Return: zero on success, a negative value in case of error. Errors + * are reported back from the underlying functions. + */ +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) +{ + const struct of_device_id *match; + const struct dtpm_node *hierarchy; + struct device_node *np; + int i, ret; + + if (pct) + return -EBUSY; + pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { pr_err("Failed to register control type\n"); - return PTR_ERR(pct); + ret = PTR_ERR(pct); + goto out_pct; + } + + ret = -ENODEV; + np = of_find_node_by_path("/"); + if (!np) + goto out_err; + + match = of_match_node(dtpm_match_table, np); + + of_node_put(np); + + if (!match) + goto out_err; + + hierarchy = match->data; + if (!hierarchy) { + ret = -EFAULT; + goto out_err; + } + + ret = dtpm_for_each_child(hierarchy, NULL, NULL); + if (ret) + goto out_err; + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->init) + continue; + + ret = dtpm_subsys[i]->init(); + if (ret) + pr_info("Failed to initialze '%s': %d", + dtpm_subsys[i]->name, ret); } return 0; + +out_err: + powercap_unregister_control_type(pct); +out_pct: + pct = NULL; + + return ret; } -late_initcall(init_dtpm); +EXPORT_SYMBOL_GPL(dtpm_create_hierarchy); diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index 506048158a50..f7a25c70dd4c 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -32,9 +32,23 @@ struct dtpm_ops { void (*release)(struct dtpm *); }; +struct device_node; + struct dtpm_subsys_ops { const char *name; int (*init)(void); + int (*setup)(struct dtpm *, struct device_node *); +}; + +enum DTPM_NODE_TYPE { + DTPM_NODE_VIRTUAL = 0, + DTPM_NODE_DT, +}; + +struct dtpm_node { + enum DTPM_NODE_TYPE type; + const char *name; + struct dtpm_node *parent; }; static inline struct dtpm *to_dtpm(struct powercap_zone *zone) @@ -52,4 +66,5 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); +int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); #endif -- cgit v1.2.3-70-g09d2 From 73dbcb6e37bf0c43bac8c15fe5bcab2bec2367fb Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:35 +0100 Subject: powercap/drivers/dtpm: Add CPU DT initialization support Based on the previous DT changes in the core code, use the 'setup' callback to initialize the CPU DTPM backend. Code is reorganized to stick to the DTPM table description. No functional changes. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220128163537.212248-4-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 5763e0ce2af5..eed5ad688d46 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -176,6 +177,17 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu) } static int cpuhp_dtpm_cpu_online(unsigned int cpu) +{ + struct dtpm_cpu *dtpm_cpu; + + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return dtpm_update_power(&dtpm_cpu->dtpm); + + return 0; +} + +static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) { struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; @@ -183,6 +195,10 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) char name[CPUFREQ_NAME_LEN]; int ret = -ENOMEM; + dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); + if (dtpm_cpu) + return 0; + policy = cpufreq_cpu_get(cpu); if (!policy) return 0; @@ -191,10 +207,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) if (!pd) return -EINVAL; - dtpm_cpu = per_cpu(dtpm_per_cpu, cpu); - if (dtpm_cpu) - return dtpm_update_power(&dtpm_cpu->dtpm); - dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); if (!dtpm_cpu) return -ENOMEM; @@ -207,7 +219,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu); - ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL); + ret = dtpm_register(name, &dtpm_cpu->dtpm, parent); if (ret) goto out_kfree_dtpm_cpu; @@ -231,7 +243,18 @@ out_kfree_dtpm_cpu: return ret; } -static int __init dtpm_cpu_init(void) +static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np) +{ + int cpu; + + cpu = of_cpu_node_to_id(np); + if (cpu < 0) + return 0; + + return __dtpm_cpu_setup(cpu, dtpm); +} + +static int dtpm_cpu_init(void) { int ret; @@ -272,4 +295,5 @@ static int __init dtpm_cpu_init(void) struct dtpm_subsys_ops dtpm_cpu_ops = { .name = KBUILD_MODNAME, .init = dtpm_cpu_init, + .setup = dtpm_cpu_setup, }; -- cgit v1.2.3-70-g09d2 From e446556173170e675a7a321e76ce5fa3587de724 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:36 +0100 Subject: powercap/drivers/dtpm: Add dtpm devfreq with energy model support Currently the dtpm supports the CPUs via cpufreq and the energy model. This change provides the same for the device which supports devfreq. Each device supporting devfreq and having an energy model can be added to the hierarchy. The concept is the same as the cpufreq DTPM support: the QoS is used to aggregate the requests and the energy model gives the value of the instantaneous power consumption ponderated by the load of the device. Cc: Chanwoo Choi Cc: Lukasz Luba Cc: Kyungmin Park Cc: MyungJoo Ham Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220128163537.212248-5-daniel.lezcano@linaro.org --- drivers/powercap/Kconfig | 7 ++ drivers/powercap/Makefile | 1 + drivers/powercap/dtpm_devfreq.c | 203 ++++++++++++++++++++++++++++++++++++++++ drivers/powercap/dtpm_subsys.h | 4 + 4 files changed, 215 insertions(+) create mode 100644 drivers/powercap/dtpm_devfreq.c (limited to 'drivers') diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index b1ca339957e3..515e3ceb3393 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -57,4 +57,11 @@ config DTPM_CPU help This enables support for CPU power limitation based on energy model. + +config DTPM_DEVFREQ + bool "Add device power capping based on the energy model" + depends on DTPM && ENERGY_MODEL + help + This enables support for device power limitation based on + energy model. endif diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index fabcf388a8d3..494617cdad88 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_DTPM) += dtpm.o obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o +obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c new file mode 100644 index 000000000000..91276761a31d --- /dev/null +++ b/drivers/powercap/dtpm_devfreq.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Linaro Limited + * + * Author: Daniel Lezcano + * + * The devfreq device combined with the energy model and the load can + * give an estimation of the power consumption as well as limiting the + * power. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +struct dtpm_devfreq { + struct dtpm dtpm; + struct dev_pm_qos_request qos_req; + struct devfreq *devfreq; +}; + +static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm) +{ + return container_of(dtpm, struct dtpm_devfreq, dtpm); +} + +static int update_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + + dtpm->power_min = pd->table[0].power; + dtpm->power_min *= MICROWATT_PER_MILLIWATT; + + dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; + dtpm->power_max *= MICROWATT_PER_MILLIWATT; + + return 0; +} + +static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + unsigned long freq; + u64 power; + int i; + + for (i = 0; i < pd->nr_perf_states; i++) { + + power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + if (power > power_limit) + break; + } + + freq = pd->table[i - 1].frequency; + + dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); + + power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; + + return power_limit; +} + +static void _normalize_load(struct devfreq_dev_status *status) +{ + if (status->total_time > 0xfffff) { + status->total_time >>= 10; + status->busy_time >>= 10; + } + + status->busy_time <<= 10; + status->busy_time /= status->total_time ? : 1; + + status->busy_time = status->busy_time ? : 1; + status->total_time = 1024; +} + +static u64 get_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + struct devfreq *devfreq = dtpm_devfreq->devfreq; + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + struct devfreq_dev_status status; + unsigned long freq; + u64 power; + int i; + + mutex_lock(&devfreq->lock); + status = devfreq->last_status; + mutex_unlock(&devfreq->lock); + + freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ); + _normalize_load(&status); + + for (i = 0; i < pd->nr_perf_states; i++) { + + if (pd->table[i].frequency < freq) + continue; + + power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + power *= status.busy_time; + power >>= 10; + + return power; + } + + return 0; +} + +static void pd_release(struct dtpm *dtpm) +{ + struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm); + + if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req)) + dev_pm_qos_remove_request(&dtpm_devfreq->qos_req); + + kfree(dtpm_devfreq); +} + +static struct dtpm_ops dtpm_ops = { + .set_power_uw = set_pd_power_limit, + .get_power_uw = get_pd_power_uw, + .update_power_uw = update_pd_power_uw, + .release = pd_release, +}; + +static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent) +{ + struct device *dev = devfreq->dev.parent; + struct dtpm_devfreq *dtpm_devfreq; + struct em_perf_domain *pd; + int ret = -ENOMEM; + + pd = em_pd_get(dev); + if (!pd) { + ret = dev_pm_opp_of_register_em(dev, NULL); + if (ret) { + pr_err("No energy model available for '%s'\n", dev_name(dev)); + return -EINVAL; + } + } + + dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL); + if (!dtpm_devfreq) + return -ENOMEM; + + dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops); + + dtpm_devfreq->devfreq = devfreq; + + ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent); + if (ret) { + pr_err("Failed to register '%s': %d\n", dev_name(dev), ret); + kfree(dtpm_devfreq); + return ret; + } + + ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + if (ret) { + pr_err("Failed to add QoS request: %d\n", ret); + goto out_dtpm_unregister; + } + + dtpm_update_power(&dtpm_devfreq->dtpm); + + return 0; + +out_dtpm_unregister: + dtpm_unregister(&dtpm_devfreq->dtpm); + + return ret; +} + +static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np) +{ + struct devfreq *devfreq; + + devfreq = devfreq_get_devfreq_by_node(np); + if (IS_ERR(devfreq)) + return 0; + + return __dtpm_devfreq_setup(devfreq, dtpm); +} + +struct dtpm_subsys_ops dtpm_devfreq_ops = { + .name = KBUILD_MODNAME, + .setup = dtpm_devfreq_setup, +}; diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h index 2a3a2055f60e..db1712938a96 100644 --- a/drivers/powercap/dtpm_subsys.h +++ b/drivers/powercap/dtpm_subsys.h @@ -8,11 +8,15 @@ #define ___DTPM_SUBSYS_H__ extern struct dtpm_subsys_ops dtpm_cpu_ops; +extern struct dtpm_subsys_ops dtpm_devfreq_ops; struct dtpm_subsys_ops *dtpm_subsys[] = { #ifdef CONFIG_DTPM_CPU &dtpm_cpu_ops, #endif +#ifdef CONFIG_DTPM_DEVFREQ + &dtpm_devfreq_ops, +#endif }; #endif -- cgit v1.2.3-70-g09d2 From b9d6c47a2be8d273ecc063afda6e9fd66a35116d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 28 Jan 2022 17:35:37 +0100 Subject: rockchip/soc/drivers: Add DTPM description for rk3399 The DTPM framework does support now the hierarchy description. The platform specific code can call the hierarchy creation function with an array of struct dtpm_node pointing to their parent. This patch provides a description of the big / Little CPUs and the GPU and tie them together under a virtual 'package' name. Only rk3399 is described now. The description could be extended in the future with the memory controller with devfreq. The description is always a module and it describes the soft dependencies. The userspace has to load the softdeps module in the right order. Signed-off-by: Daniel Lezcano Reviewed-by; Heiko Stuebner Link: https://lore.kernel.org/r/20220128163537.212248-6-daniel.lezcano@linaro.org --- drivers/soc/rockchip/Kconfig | 8 ++++++ drivers/soc/rockchip/Makefile | 1 + drivers/soc/rockchip/dtpm.c | 59 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 drivers/soc/rockchip/dtpm.c (limited to 'drivers') diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig index 25eb2c1e31bb..156ac0e0c8fe 100644 --- a/drivers/soc/rockchip/Kconfig +++ b/drivers/soc/rockchip/Kconfig @@ -34,4 +34,12 @@ config ROCKCHIP_PM_DOMAINS If unsure, say N. +config ROCKCHIP_DTPM + tristate "Rockchip DTPM hierarchy" + depends on DTPM && m + help + Describe the hierarchy for the Dynamic Thermal Power + Management tree on this platform. That will create all the + power capping capable devices. + endif diff --git a/drivers/soc/rockchip/Makefile b/drivers/soc/rockchip/Makefile index 875032f7344e..05f31a4e743c 100644 --- a/drivers/soc/rockchip/Makefile +++ b/drivers/soc/rockchip/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_ROCKCHIP_GRF) += grf.o obj-$(CONFIG_ROCKCHIP_IODOMAIN) += io-domain.o obj-$(CONFIG_ROCKCHIP_PM_DOMAINS) += pm_domains.o +obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c new file mode 100644 index 000000000000..ebebb748488b --- /dev/null +++ b/drivers/soc/rockchip/dtpm.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Linaro Limited + * + * Author: Daniel Lezcano + * + * DTPM hierarchy description + */ +#include +#include +#include +#include + +static struct dtpm_node __initdata rk3399_hierarchy[] = { + [0]{ .name = "rk3399", + .type = DTPM_NODE_VIRTUAL }, + [1]{ .name = "package", + .type = DTPM_NODE_VIRTUAL, + .parent = &rk3399_hierarchy[0] }, + [2]{ .name = "/cpus/cpu@0", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [3]{ .name = "/cpus/cpu@1", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [4]{ .name = "/cpus/cpu@2", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [5]{ .name = "/cpus/cpu@3", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [6]{ .name = "/cpus/cpu@100", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [7]{ .name = "/cpus/cpu@101", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [8]{ .name = "/gpu@ff9a0000", + .type = DTPM_NODE_DT, + .parent = &rk3399_hierarchy[1] }, + [9]{ /* sentinel */ } +}; + +static struct of_device_id __initdata rockchip_dtpm_match_table[] = { + { .compatible = "rockchip,rk3399", .data = rk3399_hierarchy }, + {}, +}; + +static int __init rockchip_dtpm_init(void) +{ + return dtpm_create_hierarchy(rockchip_dtpm_match_table); +} +module_init(rockchip_dtpm_init); + +MODULE_SOFTDEP("pre: panfrost cpufreq-dt"); +MODULE_DESCRIPTION("Rockchip DTPM driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dtpm"); +MODULE_AUTHOR("Daniel Lezcano Date: Sun, 23 Jan 2022 20:45:06 +0800 Subject: cpufreq: Move to_gov_attr_set() to cpufreq.h So it can be reused by other codes. Signed-off-by: Kevin Hao Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor_attr_set.c | 5 ----- include/linux/cpufreq.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index a6f365b9cc1a..771770ea0ed0 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -8,11 +8,6 @@ #include "cpufreq_governor.h" -static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) -{ - return container_of(kobj, struct gov_attr_set, kobj); -} - static inline struct governor_attr *to_gov_attr(struct attribute *attr) { return container_of(attr, struct governor_attr, attr); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1ab29e61b078..f0dfc0b260ec 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -658,6 +658,11 @@ struct gov_attr_set { /* sysfs ops for cpufreq governors */ extern const struct sysfs_ops governor_sysfs_ops; +static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +{ + return container_of(kobj, struct gov_attr_set, kobj); +} + void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); -- cgit v1.2.3-70-g09d2 From 7ddf5e37631ac7a96920f0f8aa3c8c4c289aaa25 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Jan 2022 20:43:59 +0100 Subject: cpufreq: longhaul: Replace acpi_bus_get_device() Replace acpi_bus_get_device() that is going to be dropped with acpi_fetch_acpi_dev(). No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/longhaul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c538a153ee82..3e000e1a75c6 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, u32 nesting_level, void *context, void **return_value) { - struct acpi_device *d; + struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle); - if (acpi_bus_get_device(obj_handle, &d)) + if (!d) return 0; *return_value = acpi_driver_data(d); -- cgit v1.2.3-70-g09d2 From 7b75bbdf5bedebed387aac6ad8411ed1cf3db5d0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:03 +0100 Subject: powercap/dtpm: Change locking scheme The different functions are all called through the dtpm_create_hierarchy() which handle the mutex. The different functions are used in this context, consequently with the lock always held. Remove all locks taken in the function and add the lock in the hierarchy creation function. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-1-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 95 ++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 68 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 414826a1509b..0b0121c37a1b 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -51,9 +51,7 @@ static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw) { struct dtpm *dtpm = to_dtpm(pcz); - mutex_lock(&dtpm_lock); *max_power_uw = dtpm->power_max - dtpm->power_min; - mutex_unlock(&dtpm_lock); return 0; } @@ -83,14 +81,7 @@ static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw) static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw) { - struct dtpm *dtpm = to_dtpm(pcz); - int ret; - - mutex_lock(&dtpm_lock); - ret = __get_power_uw(dtpm, power_uw); - mutex_unlock(&dtpm_lock); - - return ret; + return __get_power_uw(to_dtpm(pcz), power_uw); } static void __dtpm_rebalance_weight(struct dtpm *dtpm) @@ -133,7 +124,16 @@ static void __dtpm_add_power(struct dtpm *dtpm) } } -static int __dtpm_update_power(struct dtpm *dtpm) +/** + * dtpm_update_power - Update the power on the dtpm + * @dtpm: a pointer to a dtpm structure to update + * + * Function to update the power values of the dtpm node specified in + * parameter. These new values will be propagated to the tree. + * + * Return: zero on success, -EINVAL if the values are inconsistent + */ +int dtpm_update_power(struct dtpm *dtpm) { int ret; @@ -155,26 +155,6 @@ static int __dtpm_update_power(struct dtpm *dtpm) return ret; } -/** - * dtpm_update_power - Update the power on the dtpm - * @dtpm: a pointer to a dtpm structure to update - * - * Function to update the power values of the dtpm node specified in - * parameter. These new values will be propagated to the tree. - * - * Return: zero on success, -EINVAL if the values are inconsistent - */ -int dtpm_update_power(struct dtpm *dtpm) -{ - int ret; - - mutex_lock(&dtpm_lock); - ret = __dtpm_update_power(dtpm); - mutex_unlock(&dtpm_lock); - - return ret; -} - /** * dtpm_release_zone - Cleanup when the node is released * @pcz: a pointer to a powercap_zone structure @@ -191,20 +171,14 @@ int dtpm_release_zone(struct powercap_zone *pcz) struct dtpm *dtpm = to_dtpm(pcz); struct dtpm *parent = dtpm->parent; - mutex_lock(&dtpm_lock); - - if (!list_empty(&dtpm->children)) { - mutex_unlock(&dtpm_lock); + if (!list_empty(&dtpm->children)) return -EBUSY; - } if (parent) list_del(&dtpm->sibling); __dtpm_sub_power(dtpm); - mutex_unlock(&dtpm_lock); - if (dtpm->ops) dtpm->ops->release(dtpm); @@ -216,23 +190,12 @@ int dtpm_release_zone(struct powercap_zone *pcz) return 0; } -static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit) -{ - *power_limit = dtpm->power_limit; - return 0; -} - static int get_power_limit_uw(struct powercap_zone *pcz, int cid, u64 *power_limit) { - struct dtpm *dtpm = to_dtpm(pcz); - int ret; - - mutex_lock(&dtpm_lock); - ret = __get_power_limit_uw(dtpm, cid, power_limit); - mutex_unlock(&dtpm_lock); - - return ret; + *power_limit = to_dtpm(pcz)->power_limit; + + return 0; } /* @@ -292,7 +255,7 @@ static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit) ret = __set_power_limit_uw(child, cid, power); if (!ret) - ret = __get_power_limit_uw(child, cid, &power); + ret = get_power_limit_uw(&child->zone, cid, &power); if (ret) break; @@ -310,8 +273,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz, struct dtpm *dtpm = to_dtpm(pcz); int ret; - mutex_lock(&dtpm_lock); - /* * Don't allow values outside of the power range previously * set when initializing the power numbers. @@ -323,8 +284,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz, pr_debug("%s: power limit: %llu uW, power max: %llu uW\n", dtpm->zone.name, dtpm->power_limit, dtpm->power_max); - mutex_unlock(&dtpm_lock); - return ret; } @@ -335,11 +294,7 @@ static const char *get_constraint_name(struct powercap_zone *pcz, int cid) static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power) { - struct dtpm *dtpm = to_dtpm(pcz); - - mutex_lock(&dtpm_lock); - *max_power = dtpm->power_max; - mutex_unlock(&dtpm_lock); + *max_power = to_dtpm(pcz)->power_max; return 0; } @@ -442,8 +397,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) if (IS_ERR(pcz)) return PTR_ERR(pcz); - mutex_lock(&dtpm_lock); - if (parent) { list_add_tail(&dtpm->sibling, &parent->children); dtpm->parent = parent; @@ -459,8 +412,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n", dtpm->zone.name, dtpm->power_min, dtpm->power_max); - mutex_unlock(&dtpm_lock); - return 0; } @@ -605,8 +556,12 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) struct device_node *np; int i, ret; - if (pct) - return -EBUSY; + mutex_lock(&dtpm_lock); + + if (pct) { + ret = -EBUSY; + goto out_unlock; + } pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { @@ -648,12 +603,16 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) dtpm_subsys[i]->name, ret); } + mutex_unlock(&dtpm_lock); + return 0; out_err: powercap_unregister_control_type(pct); out_pct: pct = NULL; +out_unlock: + mutex_unlock(&dtpm_lock); return ret; } -- cgit v1.2.3-70-g09d2 From 0aea2e4ec2a2bfa2d7e8820e37ba5b5ce04f20a5 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:04 +0100 Subject: powercap/dtpm_cpu: Reset per_cpu variable in the release function The release function does not reset the per cpu variable when it is called. That will prevent creation again as the variable will be already from the previous creation. Fix it by resetting them. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-2-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index eed5ad688d46..71f45d2f5a60 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -151,10 +151,17 @@ static int update_pd_power_uw(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct cpufreq_policy *policy; if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); + policy = cpufreq_cpu_get(dtpm_cpu->cpu); + if (policy) { + for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + } + kfree(dtpm_cpu); } -- cgit v1.2.3-70-g09d2 From 690de0b4013f6f35bc9fced12746b9f396c471ae Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:05 +0100 Subject: powercap/dtpm: Fixup kfree for virtual node When the node is virtual there is no release function associated which can free the memory. Free the memory when no 'ops' exists. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220130210210.549877-3-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 0b0121c37a1b..7bddd25a6767 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -181,12 +181,12 @@ int dtpm_release_zone(struct powercap_zone *pcz) if (dtpm->ops) dtpm->ops->release(dtpm); + else + kfree(dtpm); if (root == dtpm) root = NULL; - kfree(dtpm); - return 0; } -- cgit v1.2.3-70-g09d2 From c404c64d64bc31bebe8a2015103671f7cd282731 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:06 +0100 Subject: powercap/dtpm: Destroy hierarchy function The hierarchy creation function exits but without a destroy hierarchy function. Due to that, the modules creating the hierarchy can not be unloaded properly because they don't have an exit callback. Provide the dtpm_destroy_hierarchy() function to remove the previously created hierarchy. The function relies on all the release mechanisms implemented by the underlying powercap framework. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-4-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/dtpm.h | 3 +++ 2 files changed, 46 insertions(+) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 7bddd25a6767..d9d74f981118 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -617,3 +617,46 @@ out_unlock: return ret; } EXPORT_SYMBOL_GPL(dtpm_create_hierarchy); + +static void __dtpm_destroy_hierarchy(struct dtpm *dtpm) +{ + struct dtpm *child, *aux; + + list_for_each_entry_safe(child, aux, &dtpm->children, sibling) + __dtpm_destroy_hierarchy(child); + + /* + * At this point, we know all children were removed from the + * recursive call before + */ + dtpm_unregister(dtpm); +} + +void dtpm_destroy_hierarchy(void) +{ + int i; + + mutex_lock(&dtpm_lock); + + if (!pct) + goto out_unlock; + + __dtpm_destroy_hierarchy(root); + + + for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) { + + if (!dtpm_subsys[i]->exit) + continue; + + dtpm_subsys[i]->exit(); + } + + powercap_unregister_control_type(pct); + + pct = NULL; + +out_unlock: + mutex_unlock(&dtpm_lock); +} +EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy); diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index f7a25c70dd4c..a4a13514b730 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -37,6 +37,7 @@ struct device_node; struct dtpm_subsys_ops { const char *name; int (*init)(void); + void (*exit)(void); int (*setup)(struct dtpm *, struct device_node *); }; @@ -67,4 +68,6 @@ void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table); + +void dtpm_destroy_hierarchy(void); #endif -- cgit v1.2.3-70-g09d2 From 4712a236db409d5ee5dccb8c7e57fe54d7d3ec66 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:07 +0100 Subject: powercap/dtpm: Move the 'root' reset place The 'root' node is checked everytime a dtpm node is destroyed. When we reach the end of the hierarchy destruction function, we can unconditionnaly set the 'root' node to NULL again. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-5-daniel.lezcano@linaro.org --- drivers/powercap/dtpm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index d9d74f981118..ec931a06d90a 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -184,9 +184,6 @@ int dtpm_release_zone(struct powercap_zone *pcz) else kfree(dtpm); - if (root == dtpm) - root = NULL; - return 0; } @@ -656,6 +653,8 @@ void dtpm_destroy_hierarchy(void) pct = NULL; + root = NULL; + out_unlock: mutex_unlock(&dtpm_lock); } -- cgit v1.2.3-70-g09d2 From bfded2ca8f36935ff13b3b30f8e66d6135e178ac Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:08 +0100 Subject: powercap/dtpm_cpu: Add exit function Now that we can destroy the hierarchy, the code must remove what it had put in place at the creation. In our case, the cpu hotplug callbacks. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-6-daniel.lezcano@linaro.org --- drivers/powercap/dtpm_cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 71f45d2f5a60..bca2f912d349 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -299,8 +299,15 @@ static int dtpm_cpu_init(void) return 0; } +static void dtpm_cpu_exit(void) +{ + cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN); + cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD); +} + struct dtpm_subsys_ops dtpm_cpu_ops = { .name = KBUILD_MODNAME, .init = dtpm_cpu_init, + .exit = dtpm_cpu_exit, .setup = dtpm_cpu_setup, }; -- cgit v1.2.3-70-g09d2 From f1ebef9e55f3c49063b575e97d2019832b8f8ef9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 30 Jan 2022 22:02:09 +0100 Subject: dtpm/soc/rk3399: Add the ability to unload the module The dtpm hierarchy can now be removed with the dtpm_destroy_hierarchy() function. Add the module_exit() callback so the module can be unloaded by removing the previously created hierarchy. Signed-off-by: Daniel Lezcano Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220130210210.549877-7-daniel.lezcano@linaro.org --- drivers/soc/rockchip/dtpm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c index ebebb748488b..5a23784b5221 100644 --- a/drivers/soc/rockchip/dtpm.c +++ b/drivers/soc/rockchip/dtpm.c @@ -52,6 +52,12 @@ static int __init rockchip_dtpm_init(void) } module_init(rockchip_dtpm_init); +static void __exit rockchip_dtpm_exit(void) +{ + return dtpm_destroy_hierarchy(); +} +module_exit(rockchip_dtpm_exit); + MODULE_SOFTDEP("pre: panfrost cpufreq-dt"); MODULE_DESCRIPTION("Rockchip DTPM driver"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From e7d90cfac5510f8c94baa18f9f3f7808859c8332 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 17 Feb 2022 13:49:50 +0100 Subject: PM: domains: Prevent power off for parent unless child is in deepest state A PM domain managed by genpd may support multiple idlestates (power-off states). During genpd_power_off() a genpd governor may be asked to select one of the idlestates based upon the dev PM QoS constraints, for example. However, there is a problem with the behaviour around this in genpd. More precisely, a parent-domain is allowed to be powered off, no matter of what idlestate that has been selected for the child-domain. For the stm32mp1 platform from STMicro, this behaviour doesn't play well. Instead, the parent-domain must not be powered off, unless the deepest idlestate has been selected for the child-domain. As the current behaviour in genpd is quite questionable anyway, let's simply change it into what is needed by the stm32mp1 platform. If it surprisingly turns out that other platforms may need a different behaviour from genpd, then we will have to revisit this to find a way to make it configurable. Signed-off-by: Ulf Hansson Reviewed-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5db704f02e71..c87588c21700 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, atomic_read(&genpd->sd_count) > 0) return -EBUSY; + /* + * The children must be in their deepest (powered-off) states to allow + * the parent to be powered off. Note that, there's no need for + * additional locking, as powering on a child, requires the parent's + * lock to be acquired first. + */ + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *child = link->child; + if (child->state_idx < child->state_count - 1) + return -EBUSY; + } + list_for_each_entry(pdd, &genpd->dev_list, list_node) { enum pm_qos_flags_status stat; @@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, || atomic_read(&genpd->sd_count) > 0) return; + /* Check that the children are in their deepest (powered-off) state. */ + list_for_each_entry(link, &genpd->parent_links, parent_node) { + struct generic_pm_domain *child = link->child; + if (child->state_idx < child->state_count - 1) + return; + } + /* Choose the deepest state when suspending */ genpd->state_idx = genpd->state_count - 1; if (_genpd_power_off(genpd, false)) -- cgit v1.2.3-70-g09d2 From 9a6582b839281ee0e874621f1a2139d2aeb9489e Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Wed, 23 Feb 2022 09:03:23 +0100 Subject: PM: domains: use dev_err_probe() to simplify error handling dev_err_probe() can reduce code size, makes the code easier to read and has the added benefit of recording the defer reason for later read out. Use it where appropriate. This also fixes an issue, where an error message in __genpd_dev_pm_attach was not terminated by a line break. Signed-off-by: Ahmad Fatoum Signed-off-by: Sascha Hauer Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c87588c21700..c0d9ad01b32c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2267,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np, /* Parse genpd OPP table */ if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table(&genpd->dev); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&genpd->dev, "Failed to add OPP table: %d\n", - ret); - return ret; - } + if (ret) + return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n"); /* * Save table for faster processing while setting performance @@ -2331,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np, if (genpd->set_performance_state) { ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i); if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n", - i, ret); + dev_err_probe(&genpd->dev, ret, + "Failed to add OPP table for index %d\n", i); goto error; } @@ -2691,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, ret = genpd_add_device(pd, dev, base_dev); mutex_unlock(&gpd_list_lock); - if (ret < 0) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to add to PM domain %s: %d", - pd->name, ret); - return ret; - } + if (ret < 0) + return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name); dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; -- cgit v1.2.3-70-g09d2 From f6bfe8b5b2c2a5ac8bd2fc7bca3706e6c3fc26d8 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 25 Feb 2022 14:48:15 +0800 Subject: PM: domains: Fix sleep-in-atomic bug caused by genpd_debug_remove() When a genpd with GENPD_FLAG_IRQ_SAFE gets removed, the following sleep-in-atomic bug will be seen, as genpd_debug_remove() will be called with a spinlock being held. [ 0.029183] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1460 [ 0.029204] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0 [ 0.029219] preempt_count: 1, expected: 0 [ 0.029230] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4+ #489 [ 0.029245] Hardware name: Thundercomm TurboX CM2290 (DT) [ 0.029256] Call trace: [ 0.029265] dump_backtrace.part.0+0xbc/0xd0 [ 0.029285] show_stack+0x3c/0xa0 [ 0.029298] dump_stack_lvl+0x7c/0xa0 [ 0.029311] dump_stack+0x18/0x34 [ 0.029323] __might_resched+0x10c/0x13c [ 0.029338] __might_sleep+0x4c/0x80 [ 0.029351] down_read+0x24/0xd0 [ 0.029363] lookup_one_len_unlocked+0x9c/0xcc [ 0.029379] lookup_positive_unlocked+0x10/0x50 [ 0.029392] debugfs_lookup+0x68/0xac [ 0.029406] genpd_remove.part.0+0x12c/0x1b4 [ 0.029419] of_genpd_remove_last+0xa8/0xd4 [ 0.029434] psci_cpuidle_domain_probe+0x174/0x53c [ 0.029449] platform_probe+0x68/0xe0 [ 0.029462] really_probe+0x190/0x430 [ 0.029473] __driver_probe_device+0x90/0x18c [ 0.029485] driver_probe_device+0x40/0xe0 [ 0.029497] __driver_attach+0xf4/0x1d0 [ 0.029508] bus_for_each_dev+0x70/0xd0 [ 0.029523] driver_attach+0x24/0x30 [ 0.029534] bus_add_driver+0x164/0x22c [ 0.029545] driver_register+0x78/0x130 [ 0.029556] __platform_driver_register+0x28/0x34 [ 0.029569] psci_idle_init_domains+0x1c/0x28 [ 0.029583] do_one_initcall+0x50/0x1b0 [ 0.029595] kernel_init_freeable+0x214/0x280 [ 0.029609] kernel_init+0x2c/0x13c [ 0.029622] ret_from_fork+0x10/0x20 It doesn't seem necessary to call genpd_debug_remove() with the lock, so move it out from locking to fix the problem. Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains") Signed-off-by: Shawn Guo Reviewed-by: Ulf Hansson Cc: 5.11+ # 5.11+ Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c0d9ad01b32c..1ee878d126fd 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2077,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } - genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); + genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); -- cgit v1.2.3-70-g09d2 From 7dfe105dfc724c82ed3d79a4c47439c516a2410b Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 11 Feb 2022 08:10:27 -0800 Subject: PM: sleep: wakeup: Fix typos in comments Remove the second 'the'. Replace the second 'of' with 'the'. Replace 'couter' with 'counter'. Signed-off-by: Tom Rix Acked-by: Randy Dunlap Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 2 +- drivers/base/power/wakeup.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 0004db4a9d3b..d487a6bac630 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq); * * Enables wakeirq conditionally. We need to enable wake-up interrupt * lazily on the first rpm_suspend(). This is needed as the consumer device - * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would + * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would * otherwise try to disable already disabled wakeirq. The wake-up interrupt * starts disabled with IRQ_NOAUTOEN set. * diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 8666590201c9..a57d469676ca 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -587,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws) * @ws: Wakeup source to handle. * * Update the @ws' statistics and, if @ws has just been activated, notify the PM - * core of the event by incrementing the counter of of wakeup events being + * core of the event by incrementing the counter of the wakeup events being * processed. */ static void wakeup_source_activate(struct wakeup_source *ws) @@ -733,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) /* * Increment the counter of registered wakeup events and decrement the - * couter of wakeup events in progress simultaneously. + * counter of wakeup events in progress simultaneously. */ cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); trace_wakeup_source_deactivate(ws->name, cec); -- cgit v1.2.3-70-g09d2 From 5bf19d0aa3d2f22d4c050c030f436ab97b7e6f1e Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Tue, 1 Mar 2022 11:28:54 +0800 Subject: powercap: DTPM: dtpm_node_callback[] can be static drivers/powercap/dtpm.c:525:22: warning: symbol 'dtpm_node_callback' was not declared. Should it be static? Fixes: 3759ec678e89 ("powercap/drivers/dtpm: Add hierarchy creation") Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index ec931a06d90a..41010dec069d 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -470,7 +470,7 @@ static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy, typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *); -dtpm_node_callback_t dtpm_node_callback[] = { +static dtpm_node_callback_t dtpm_node_callback[] = { [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual, [DTPM_NODE_DT] = dtpm_setup_dt, }; -- cgit v1.2.3-70-g09d2 From 55ddcd9f3226b21c5b63d893712c85e85d73f4cb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Mar 2022 09:03:27 +0000 Subject: powercap: DTPM: Fix spelling mistake "initialze" -> "initialize" There is a spelling mistake in a pr_info() message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 41010dec069d..ce920f17f45f 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -596,7 +596,7 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table) ret = dtpm_subsys[i]->init(); if (ret) - pr_info("Failed to initialze '%s': %d", + pr_info("Failed to initialize '%s': %d", dtpm_subsys[i]->name, ret); } -- cgit v1.2.3-70-g09d2 From eb087f305919ee8169ad65665610313e74260463 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 25 Feb 2022 13:06:46 -0600 Subject: ACPI: processor idle: Check for architectural support for LPI When `osc_pc_lpi_support_confirmed` is set through `_OSC` and `_LPI` is populated then the cpuidle driver assumes that LPI is fully functional. However currently the kernel only provides architectural support for LPI on ARM. This leads to high power consumption on X86 platforms that otherwise try to enable LPI. So probe whether or not LPI support is implemented before enabling LPI in the kernel. This is done by overloading `acpi_processor_ffh_lpi_probe` to check whether it returns `-EOPNOTSUPP`. It also means that all future implementations of `acpi_processor_ffh_lpi_probe` will need to follow these semantics as well. Reviewed-by: Sudeep Holla Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f8e9fa82cb9b..32b20efff5f8 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1080,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, return 0; } +int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return -EOPNOTSUPP; +} + static int acpi_processor_get_lpi_info(struct acpi_processor *pr) { int ret, i; @@ -1088,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) struct acpi_device *d = NULL; struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; + /* make sure our architecture has support */ + ret = acpi_processor_ffh_lpi_probe(pr->id); + if (ret == -EOPNOTSUPP) + return ret; + if (!osc_pc_lpi_support_confirmed) return -EOPNOTSUPP; @@ -1139,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) return 0; } -int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return -ENODEV; -} - int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { return -ENODEV; -- cgit v1.2.3-70-g09d2 From b4060db9251f919506e4d672737c6b8ab9a84701 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 23 Feb 2022 08:34:48 -0800 Subject: PM: runtime: Have devm_pm_runtime_enable() handle pm_runtime_dont_use_autosuspend() The PM Runtime docs say: Drivers in ->remove() callback should undo the runtime PM changes done in ->probe(). Usually this means calling pm_runtime_disable(), pm_runtime_dont_use_autosuspend() etc. From grepping code, it's clear that many people aren't aware of the need to call pm_runtime_dont_use_autosuspend(). When brainstorming solutions, one idea that came up was to leverage the new-ish devm_pm_runtime_enable() function. The idea here is that: * When the devm action is called we know that the driver is being removed. It's the perfect time to undo the use_autosuspend. * The code of pm_runtime_dont_use_autosuspend() already handles the case of being called when autosuspend wasn't enabled. Suggested-by: Laurent Pinchart Signed-off-by: Douglas Anderson Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 5 +++++ include/linux/pm_runtime.h | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 2f3cce17219b..d4059e6ffeae 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1476,11 +1476,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_enable); static void pm_runtime_disable_action(void *data) { + pm_runtime_dont_use_autosuspend(data); pm_runtime_disable(data); } /** * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable. + * + * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for + * you at driver exit time if needed. + * * @dev: Device to handle. */ int devm_pm_runtime_enable(struct device *dev) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9f09601c465a..2bff6a10095d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev) * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). + * + * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend() + * at driver exit time unless your driver initially enabled pm_runtime + * with devm_pm_runtime_enable() (which handles it for you). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { -- cgit v1.2.3-70-g09d2 From 9edf3c0ffef0ec1bed8300315852b5c6a0997130 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:15:58 +0200 Subject: intel_idle: add SPR support Add Sapphire Rapids Xeon support. Up until very recently, the C1 and C1E C-states were independent, but this has changed in some new chips, including Sapphire Rapids Xeon (SPR). In these chips the C1 and C1E states cannot be enabled at the same time. The "C1E promotion" bit in 'MSR_IA32_POWER_CTL' also has its semantics changed a bit. Here are the C1, C1E, and "C1E promotion" bit rules on Xeons before SPR. 1. If C1E promotion bit is disabled. a. C1 requests end up with C1 C-state. b. C1E requests end up with C1E C-state. 2. If C1E promotion bit is enabled. a. C1 requests end up with C1E C-state. b. C1E requests end up with C1E C-state. Here are the C1, C1E, and "C1E promotion" bit rules on Sapphire Rapids Xeon. 1. If C1E promotion bit is disabled. a. C1 requests end up with C1 C-state. b. C1E requests end up with C1 C-state. 2. If C1E promotion bit is enabled. a. C1 requests end up with C1E C-state. b. C1E requests end up with C1E C-state. Before SPR Xeon, the 'intel_idle' driver was disabling C1E promotion and was exposing C1 and C1E as independent C-states. But on SPR, C1 and C1E cannot be enabled at the same time. This patch adds both C1 and C1E states. However, C1E is marked as with the "CPUIDLE_FLAG_UNUSABLE" flag, which means that in won't be registered by default. The C1E promotion bit will be cleared, which means that by default only C1 and C6 will be registered on SPR. The next patch will add an option for enabling C1E and disabling C1 on SPR. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 0b66e25c0e2d..1c7c25909e54 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -761,6 +761,46 @@ static struct cpuidle_state icx_cstates[] __initdata = { .enter = NULL } }; +/* + * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice + * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in + * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 + * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then + * both C1 and C1E requests end up with C1, so there is effectively no C1E. + * + * By default we enable C1 and disable C1E by marking it with + * 'CPUIDLE_FLAG_UNUSABLE'. + */ +static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ + CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 290, + .target_residency = 800, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state atom_cstates[] __initdata = { { .name = "C1E", @@ -1104,6 +1144,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_spr __initconst = { + .state_table = spr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1166,6 +1212,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), -- cgit v1.2.3-70-g09d2 From da0e58c038e60e7e65d30813ebdfe91687aa8a24 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:15:59 +0200 Subject: intel_idle: add 'preferred_cstates' module argument On Sapphire Rapids Xeon (SPR) the C1 and C1E states are basically mutually exclusive - only one of them can be enabled. By default, 'intel_idle' driver enables C1 and disables C1E. However, some users prefer to use C1E instead of C1, because it saves more energy. This patch adds a new module parameter ('preferred_cstates') for enabling C1E and disabling C1. Here is the idea behind it. 1. This option has effect only for "mutually exclusive" C-states like C1 and C1E on SPR. 2. It does not have any effect on independent C-states, which do not require other C-states to be disabled (most states on most platforms as of today). 3. For mutually exclusive C-states, the 'intel_idle' driver always has a reasonable default, such as enabling C1 on SPR by default. On other platforms, the default may be different. 4. Users can override the default using the 'preferred_cstates' parameter. 5. The parameter accepts the preferred C-states bit-mask, similarly to the existing 'states_off' parameter. 6. This parameter is not limited to C1/C1E, and leaves room for supporting other mutually exclusive C-states, if they come in the future. Today 'intel_idle' can only be compiled-in, which means that on SPR, in order to disable C1 and enable C1E, users should boot with the following kernel argument: intel_idle.preferred_cstates=4 Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1c7c25909e54..b2688c326522 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -64,6 +64,7 @@ static struct cpuidle_driver intel_idle_driver = { /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int disabled_states_mask; +static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; @@ -1400,6 +1401,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +static void c1e_promotion_enable(void); + /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1570,6 +1573,26 @@ static void __init skx_idle_state_table_update(void) } } +/** + * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + */ +static void __init spr_idle_state_table_update(void) +{ + /* Check if user prefers C1E over C1. */ + if (preferred_states_mask & BIT(2)) { + if (preferred_states_mask & BIT(1)) + /* Both can't be enabled, stick to the defaults. */ + return; + + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ + c1e_promotion_enable(); + disable_promotion_to_c1e = false; + } +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; @@ -1604,6 +1627,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SKYLAKE_X: skx_idle_state_table_update(); break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + spr_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -1676,6 +1702,15 @@ static void auto_demotion_disable(void) wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } +static void c1e_promotion_enable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits |= 0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + static void c1e_promotion_disable(void) { unsigned long long msr_bits; @@ -1845,3 +1880,14 @@ module_param(max_cstate, int, 0444); */ module_param_named(states_off, disabled_states_mask, uint, 0444); MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); +/* + * Some platforms come with mutually exclusive C-states, so that if one is + * enabled, the other C-states must not be used. Example: C1 and C1E on + * Sapphire Rapids platform. This parameter allows for selecting the + * preferred C-states among the groups of mutually exclusive C-states - the + * selected C-states will be registered, the other C-states from the mutually + * exclusive group won't be registered. If the platform has no mutually + * exclusive C-states, this parameter has no effect. + */ +module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); +MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); -- cgit v1.2.3-70-g09d2 From 3a9cf77b60dc9839b6674943bb7c9dcd524b6294 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:16:00 +0200 Subject: intel_idle: add core C6 optimization for SPR Add a Sapphire Rapids Xeon C6 optimization, similar to what we have for Sky Lake Xeon: if package C6 is disabled, adjust C6 exit latency and target residency to match core C6 values, instead of using the default package C6 values. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b2688c326522..e385ddf15b32 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1578,6 +1578,8 @@ static void __init skx_idle_state_table_update(void) */ static void __init spr_idle_state_table_update(void) { + unsigned long long msr; + /* Check if user prefers C1E over C1. */ if (preferred_states_mask & BIT(2)) { if (preferred_states_mask & BIT(1)) @@ -1591,6 +1593,19 @@ static void __init spr_idle_state_table_update(void) c1e_promotion_enable(); disable_promotion_to_c1e = false; } + + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match + * core C6. + */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* Limit value 2 and above allow for PC6. */ + if ((msr & 0x7) < 2) { + spr_cstates[2].exit_latency = 190; + spr_cstates[2].target_residency = 600; + } } static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) -- cgit v1.2.3-70-g09d2 From a759de6991b35ad437adba32b5f0cb2fd9e75929 Mon Sep 17 00:00:00 2001 From: Youngjin Jang Date: Tue, 8 Mar 2022 04:07:39 +0900 Subject: PM: sleep: Add device name to suspend_report_result() Currently, suspend_report_result() prints only function information. If any driver uses a common PM function, nobody knows who exactly called the failing function. A device pinter is needed to recognize the failing device. For example: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0 PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0 become after the change: serial 00:05: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0 pci 0000:00:01.3: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0 Signed-off-by: Youngjin Jang [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 10 +++++----- drivers/pci/pci-driver.c | 14 +++++++------- drivers/pnp/driver.c | 2 +- drivers/usb/core/hcd-pci.c | 4 ++-- include/linux/pm.h | 8 ++++---- 5 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 04ea92cbd9cf..41e17b8c2c20 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -485,7 +485,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, trace_device_pm_callback_start(dev, info, state.event); error = cb(dev); trace_device_pm_callback_end(dev, error); - suspend_report_result(cb, error); + suspend_report_result(dev, cb, error); initcall_debug_report(dev, calltime, cb, error); @@ -1568,7 +1568,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state, trace_device_pm_callback_start(dev, info, state.event); error = cb(dev, state); trace_device_pm_callback_end(dev, error); - suspend_report_result(cb, error); + suspend_report_result(dev, cb, error); initcall_debug_report(dev, calltime, cb, error); @@ -1855,7 +1855,7 @@ unlock: device_unlock(dev); if (ret < 0) { - suspend_report_result(callback, ret); + suspend_report_result(dev, callback, ret); pm_runtime_put(dev); return ret; } @@ -1960,10 +1960,10 @@ int dpm_suspend_start(pm_message_t state) } EXPORT_SYMBOL_GPL(dpm_suspend_start); -void __suspend_report_result(const char *function, void *fn, int ret) +void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret) { if (ret) - pr_err("%s(): %pS returns %d\n", function, fn, ret); + dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret); } EXPORT_SYMBOL_GPL(__suspend_report_result); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 588588cfda48..415f7664b010 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -596,7 +596,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) int error; error = drv->suspend(pci_dev, state); - suspend_report_result(drv->suspend, error); + suspend_report_result(dev, drv->suspend, error); if (error) return error; @@ -775,7 +775,7 @@ static int pci_pm_suspend(struct device *dev) int error; error = pm->suspend(dev); - suspend_report_result(pm->suspend, error); + suspend_report_result(dev, pm->suspend, error); if (error) return error; @@ -821,7 +821,7 @@ static int pci_pm_suspend_noirq(struct device *dev) int error; error = pm->suspend_noirq(dev); - suspend_report_result(pm->suspend_noirq, error); + suspend_report_result(dev, pm->suspend_noirq, error); if (error) return error; @@ -1010,7 +1010,7 @@ static int pci_pm_freeze(struct device *dev) int error; error = pm->freeze(dev); - suspend_report_result(pm->freeze, error); + suspend_report_result(dev, pm->freeze, error); if (error) return error; } @@ -1030,7 +1030,7 @@ static int pci_pm_freeze_noirq(struct device *dev) int error; error = pm->freeze_noirq(dev); - suspend_report_result(pm->freeze_noirq, error); + suspend_report_result(dev, pm->freeze_noirq, error); if (error) return error; } @@ -1116,7 +1116,7 @@ static int pci_pm_poweroff(struct device *dev) int error; error = pm->poweroff(dev); - suspend_report_result(pm->poweroff, error); + suspend_report_result(dev, pm->poweroff, error); if (error) return error; } @@ -1154,7 +1154,7 @@ static int pci_pm_poweroff_noirq(struct device *dev) int error; error = pm->poweroff_noirq(dev); - suspend_report_result(pm->poweroff_noirq, error); + suspend_report_result(dev, pm->poweroff_noirq, error); if (error) return error; } diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index cc6757dfa3f1..c02e7bf643a6 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -171,7 +171,7 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state) if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) { error = pnp_drv->driver.pm->suspend(dev); - suspend_report_result(pnp_drv->driver.pm->suspend, error); + suspend_report_result(dev, pnp_drv->driver.pm->suspend, error); if (error) return error; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index d630cccd2e6e..dd44e37a454a 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -446,7 +446,7 @@ static int suspend_common(struct device *dev, bool do_wakeup) HCD_WAKEUP_PENDING(hcd->shared_hcd)) return -EBUSY; retval = hcd->driver->pci_suspend(hcd, do_wakeup); - suspend_report_result(hcd->driver->pci_suspend, retval); + suspend_report_result(dev, hcd->driver->pci_suspend, retval); /* Check again in case wakeup raced with pci_suspend */ if ((retval == 0 && do_wakeup && HCD_WAKEUP_PENDING(hcd)) || @@ -556,7 +556,7 @@ static int hcd_pci_suspend_noirq(struct device *dev) dev_dbg(dev, "--> PCI %s\n", pci_power_name(pci_dev->current_state)); } else { - suspend_report_result(pci_prepare_to_sleep, retval); + suspend_report_result(dev, pci_prepare_to_sleep, retval); return retval; } diff --git a/include/linux/pm.h b/include/linux/pm.h index f7d2be686359..e65b3ab28377 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state); extern int dpm_suspend(pm_message_t state); extern int dpm_prepare(pm_message_t state); -extern void __suspend_report_result(const char *function, void *fn, int ret); +extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret); -#define suspend_report_result(fn, ret) \ +#define suspend_report_result(dev, fn, ret) \ do { \ - __suspend_report_result(__func__, fn, ret); \ + __suspend_report_result(__func__, dev, fn, ret); \ } while (0) extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); @@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state) return 0; } -#define suspend_report_result(fn, ret) do {} while (0) +#define suspend_report_result(dev, fn, ret) do {} while (0) static inline int device_pm_wait_for_dev(struct device *a, struct device *b) { -- cgit v1.2.3-70-g09d2 From 23c296fb7eeea99dec035521c07ae54c28b8a267 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Wed, 9 Mar 2022 09:23:48 +0800 Subject: cpufreq: amd-pstate: Add more tracepoint for AMD P-State module Add frequency, mperf, aperf and tsc in the trace. This can be used to debug and tune the performance of AMD P-state driver. Use the time difference between amd_pstate_update to calculate CPU frequency. There could be sleep in arch_freq_get_on_cpu, so do not use it here. Signed-off-by: Jinzhou Su Co-developed-by: Huang Rui Signed-off-by: Huang Rui Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-trace.h | 22 +++++++++++++- drivers/cpufreq/amd-pstate.c | 59 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 647505957d4f..35f38ae67fb1 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf, TP_PROTO(unsigned long min_perf, unsigned long target_perf, unsigned long capacity, + u64 freq, + u64 mperf, + u64 aperf, + u64 tsc, unsigned int cpu_id, bool changed, bool fast_switch @@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf, TP_ARGS(min_perf, target_perf, capacity, + freq, + mperf, + aperf, + tsc, cpu_id, changed, fast_switch @@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf, __field(unsigned long, min_perf) __field(unsigned long, target_perf) __field(unsigned long, capacity) + __field(unsigned long long, freq) + __field(unsigned long long, mperf) + __field(unsigned long long, aperf) + __field(unsigned long long, tsc) __field(unsigned int, cpu_id) __field(bool, changed) __field(bool, fast_switch) @@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf, __entry->min_perf = min_perf; __entry->target_perf = target_perf; __entry->capacity = capacity; + __entry->freq = freq; + __entry->mperf = mperf; + __entry->aperf = aperf; + __entry->tsc = tsc; __entry->cpu_id = cpu_id; __entry->changed = changed; __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s", + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", (unsigned long)__entry->min_perf, (unsigned long)__entry->target_perf, (unsigned long)__entry->capacity, + (unsigned long long)__entry->freq, + (unsigned long long)__entry->mperf, + (unsigned long long)__entry->aperf, + (unsigned long long)__entry->tsc, (unsigned int)__entry->cpu_id, (__entry->changed) ? "true" : "false", (__entry->fast_switch) ? "true" : "false" diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9ce75ed11f8e..7be38bc6a673 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem, static struct cpufreq_driver amd_pstate_driver; +/** + * struct amd_aperf_mperf + * @aperf: actual performance frequency clock count + * @mperf: maximum performance frequency clock count + * @tsc: time stamp counter + */ +struct amd_aperf_mperf { + u64 aperf; + u64 mperf; + u64 tsc; +}; + /** * struct amd_cpudata - private CPU data for AMD P-State * @cpu: CPU number @@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver; * @min_freq: the frequency that mapped to lowest_perf * @nominal_freq: the frequency that mapped to nominal_perf * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample + * @prev: Last Aperf/Mperf/tsc count value read from register + * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and @@ -102,6 +117,10 @@ struct amd_cpudata { u32 nominal_freq; u32 lowest_nonlinear_freq; + struct amd_aperf_mperf cur; + struct amd_aperf_mperf prev; + + u64 freq; bool boost_supported; }; @@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, max_perf, fast_switch); } +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) +{ + u64 aperf, mperf, tsc; + unsigned long flags; + + local_irq_save(flags); + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + tsc = rdtsc(); + + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { + local_irq_restore(flags); + return false; + } + + local_irq_restore(flags); + + cpudata->cur.aperf = aperf; + cpudata->cur.mperf = mperf; + cpudata->cur.tsc = tsc; + cpudata->cur.aperf -= cpudata->prev.aperf; + cpudata->cur.mperf -= cpudata->prev.mperf; + cpudata->cur.tsc -= cpudata->prev.tsc; + + cpudata->prev.aperf = aperf; + cpudata->prev.mperf = mperf; + cpudata->prev.tsc = tsc; + + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); + + return true; +} + static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, value &= ~AMD_CPPC_MAX_PERF(~0L); value |= AMD_CPPC_MAX_PERF(max_perf); - trace_amd_pstate_perf(min_perf, des_perf, max_perf, - cpudata->cpu, (value != prev), fast_switch); + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, + cpudata->cpu, (value != prev), fast_switch); + } if (value == prev) return; -- cgit v1.2.3-70-g09d2 From 659b66e98bb38dc6300dca3c9ebebeba194b575b Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 4 Mar 2022 15:53:28 +0800 Subject: cpuidle: haltpoll: Call cpuidle_poll_state_init() later Call cpuidle_poll_state_init() only if it is needed to avoid doing useless work. Signed-off-by: Li RongQing [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-haltpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index fcc53215bac8..3a39a7f48b77 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -108,11 +108,11 @@ static int __init haltpoll_init(void) if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV; - cpuidle_poll_state_init(drv); - if (!kvm_para_available() || !haltpoll_want()) return -ENODEV; + cpuidle_poll_state_init(drv); + ret = cpuidle_register_driver(drv); if (ret < 0) return ret; -- cgit v1.2.3-70-g09d2 From 524bb1da785a7ae43dd413cd392b5071c6c367f8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 5 Mar 2022 14:02:14 +0300 Subject: PM: core: keep irq flags in device_pm_check_callbacks() The function device_pm_check_callbacks() can be called under the spin lock (in the reported case it happens from genpd_add_device() -> dev_pm_domain_set(), when the genpd uses spinlocks rather than mutexes. However this function uncoditionally uses spin_lock_irq() / spin_unlock_irq(), thus not preserving the CPU flags. Use the irqsave/irqrestore instead. The backtrace for the reference: [ 2.752010] ------------[ cut here ]------------ [ 2.756769] raw_local_irq_restore() called with IRQs enabled [ 2.762596] WARNING: CPU: 4 PID: 1 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x34/0x50 [ 2.772338] Modules linked in: [ 2.775487] CPU: 4 PID: 1 Comm: swapper/0 Tainted: G S 5.17.0-rc6-00384-ge330d0d82eff-dirty #684 [ 2.781384] Freeing initrd memory: 46024K [ 2.785839] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.785841] pc : warn_bogus_irq_restore+0x34/0x50 [ 2.785844] lr : warn_bogus_irq_restore+0x34/0x50 [ 2.785846] sp : ffff80000805b7d0 [ 2.785847] x29: ffff80000805b7d0 x28: 0000000000000000 x27: 0000000000000002 [ 2.785850] x26: ffffd40e80930b18 x25: ffff7ee2329192b8 x24: ffff7edfc9f60800 [ 2.785853] x23: ffffd40e80930b18 x22: ffffd40e80930d30 x21: ffff7edfc0dffa00 [ 2.785856] x20: ffff7edfc09e3768 x19: 0000000000000000 x18: ffffffffffffffff [ 2.845775] x17: 6572206f74206465 x16: 6c696166203a3030 x15: ffff80008805b4f7 [ 2.853108] x14: 0000000000000000 x13: ffffd40e809550b0 x12: 00000000000003d8 [ 2.860441] x11: 0000000000000148 x10: ffffd40e809550b0 x9 : ffffd40e809550b0 [ 2.867774] x8 : 00000000ffffefff x7 : ffffd40e809ad0b0 x6 : ffffd40e809ad0b0 [ 2.875107] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000 [ 2.882440] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff7edfc03a8000 [ 2.889774] Call trace: [ 2.892290] warn_bogus_irq_restore+0x34/0x50 [ 2.896770] _raw_spin_unlock_irqrestore+0x94/0xa0 [ 2.901690] genpd_unlock_spin+0x20/0x30 [ 2.905724] genpd_add_device+0x100/0x2d0 [ 2.909850] __genpd_dev_pm_attach+0xa8/0x23c [ 2.914329] genpd_dev_pm_attach_by_id+0xc4/0x190 [ 2.919167] genpd_dev_pm_attach_by_name+0x3c/0xd0 [ 2.924086] dev_pm_domain_attach_by_name+0x24/0x30 [ 2.929102] psci_dt_attach_cpu+0x24/0x90 [ 2.933230] psci_cpuidle_probe+0x2d4/0x46c [ 2.937534] platform_probe+0x68/0xe0 [ 2.941304] really_probe.part.0+0x9c/0x2fc [ 2.945605] __driver_probe_device+0x98/0x144 [ 2.950085] driver_probe_device+0x44/0x15c [ 2.954385] __device_attach_driver+0xb8/0x120 [ 2.958950] bus_for_each_drv+0x78/0xd0 [ 2.962896] __device_attach+0xd8/0x180 [ 2.966843] device_initial_probe+0x14/0x20 [ 2.971144] bus_probe_device+0x9c/0xa4 [ 2.975092] device_add+0x380/0x88c [ 2.978679] platform_device_add+0x114/0x234 [ 2.983067] platform_device_register_full+0x100/0x190 [ 2.988344] psci_idle_init+0x6c/0xb0 [ 2.992113] do_one_initcall+0x74/0x3a0 [ 2.996060] kernel_init_freeable+0x2fc/0x384 [ 3.000543] kernel_init+0x28/0x130 [ 3.004132] ret_from_fork+0x10/0x20 [ 3.007817] irq event stamp: 319826 [ 3.011404] hardirqs last enabled at (319825): [] __up_console_sem+0x78/0x84 [ 3.020332] hardirqs last disabled at (319826): [] el1_dbg+0x24/0x8c [ 3.028458] softirqs last enabled at (318312): [] _stext+0x410/0x588 [ 3.036678] softirqs last disabled at (318299): [] __irq_exit_rcu+0x158/0x174 [ 3.045607] ---[ end trace 0000000000000000 ]--- Signed-off-by: Dmitry Baryshkov Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 04ea92cbd9cf..08c8a69d7b81 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops) void device_pm_check_callbacks(struct device *dev) { - spin_lock_irq(&dev->power.lock); + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && @@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev) (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); } bool dev_pm_skip_suspend(struct device *dev) -- cgit v1.2.3-70-g09d2 From 85750bcd480c74b13661ee2c9db49de500fd2823 Mon Sep 17 00:00:00 2001 From: Lianjie Zhang Date: Thu, 10 Mar 2022 22:38:30 +0800 Subject: cpufreq: unify show() and store() naming and use __ATTR_XX Usually, sysfs attributes have .show and .store and their naming convention is filename_show() and filename_store(). But in cpufreq the naming convention of these functions is show_filename() and store_filename() which prevents __ATTR_RW() and __ATTR_RO() from being used in there to simplify code. Accordingly, change the naming convention of the sysfs .show and .store methods in cpufreq to follow the one expected by __ATTR_RW() and __ATTR_RO() and use these macros in that code. Signed-off-by: Lianjie Zhang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 10 +++++----- drivers/cpufreq/cpufreq_governor.c | 6 +++--- drivers/cpufreq/cpufreq_governor.h | 12 +++++------- drivers/cpufreq/cpufreq_ondemand.c | 10 +++++----- 4 files changed, 18 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 08515f7e515f..b6bd0ff35323 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -146,7 +146,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ -static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, +static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -161,7 +161,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, return count; } -static ssize_t store_up_threshold(struct gov_attr_set *attr_set, +static ssize_t up_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -177,7 +177,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_down_threshold(struct gov_attr_set *attr_set, +static ssize_t down_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -195,7 +195,7 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, +static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -220,7 +220,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, return count; } -static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf, +static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 63f7c219062b..0d42cf8b88d8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ /* - * store_sampling_rate - update sampling rate effective immediately if needed. + * sampling_rate_store - update sampling rate effective immediately if needed. * * If new rate is smaller than the old, simply updating * dbs.sampling_rate might not be appropriate. For example, if the @@ -41,7 +41,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex); * This must be called with dbs_data->mutex held, otherwise traversing * policy_dbs_list isn't safe. */ -ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, +ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -80,7 +80,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, return count; } -EXPORT_SYMBOL_GPL(store_sampling_rate); +EXPORT_SYMBOL_GPL(sampling_rate_store); /** * gov_update_cpu_data - Update CPU load data. diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bab8e6140377..a5a0bc3cc23e 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -51,7 +51,7 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set) } #define gov_show_one(_gov, file_name) \ -static ssize_t show_##file_name \ +static ssize_t file_name##_show \ (struct gov_attr_set *attr_set, char *buf) \ { \ struct dbs_data *dbs_data = to_dbs_data(attr_set); \ @@ -60,7 +60,7 @@ static ssize_t show_##file_name \ } #define gov_show_one_common(file_name) \ -static ssize_t show_##file_name \ +static ssize_t file_name##_show \ (struct gov_attr_set *attr_set, char *buf) \ { \ struct dbs_data *dbs_data = to_dbs_data(attr_set); \ @@ -68,12 +68,10 @@ static ssize_t show_##file_name \ } #define gov_attr_ro(_name) \ -static struct governor_attr _name = \ -__ATTR(_name, 0444, show_##_name, NULL) +static struct governor_attr _name = __ATTR_RO(_name) #define gov_attr_rw(_name) \ -static struct governor_attr _name = \ -__ATTR(_name, 0644, show_##_name, store_##_name) +static struct governor_attr _name = __ATTR_RW(_name) /* Common to all CPUs of a policy */ struct policy_dbs_info { @@ -176,7 +174,7 @@ void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); -ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, +ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf, size_t count); void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 6a41ea4729b8..e8fbf970ff07 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -202,7 +202,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ static struct dbs_governor od_dbs_gov; -static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, +static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -220,7 +220,7 @@ static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, return count; } -static ssize_t store_up_threshold(struct gov_attr_set *attr_set, +static ssize_t up_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -237,7 +237,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, +static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -265,7 +265,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, return count; } -static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, +static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -290,7 +290,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, return count; } -static ssize_t store_powersave_bias(struct gov_attr_set *attr_set, +static ssize_t powersave_bias_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); -- cgit v1.2.3-70-g09d2 From 3d13058ed2a6d0ddb55a45a979acc49a845be874 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 10 Mar 2022 14:42:23 -0800 Subject: cpufreq: intel_pstate: Use firmware default EPP For some specific platforms (E.g. AlderLake) the balance performance EPP is updated from the hard coded value in the driver. This acts as the default and balance_performance EPP. The purpose of this EPP update is to reach maximum 1 core turbo frequency (when possible) out of the box. Although we can achieve the objective by using hard coded value in the driver, there can be other EPP which can be better in terms of power. But that will be very subjective based on platform and use cases. This is not practical to have a per platform specific default hard coded in the driver. If a platform wants to specify default EPP, it can be set in the firmware. If this EPP is not the chipset default of 0x80 (balance_perf_epp unless driver changed it) and more performance oriented but not 0, the driver can use this as the default and balanced_perf EPP. In this case no driver update is required every time there is some new platform and default EPP. If the firmware didn't update the EPP from the chipset default then the hard coded value is used as per existing implementation. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bc7f7e6759bd..846bb3a78788 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1692,6 +1692,37 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) } } +static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) +{ + cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + + /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + + /* + * If powerup EPP is something other than chipset default 0x80 and + * - is more performance oriented than 0x80 (default balance_perf EPP) + * - But less performance oriented than performance EPP + * then use this as new balance_perf EPP. + */ + if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE && + cpudata->epp_default > HWP_EPP_PERFORMANCE) { + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default; + return; + } + + /* + * Use hard coded value per gen to update the balance_perf + * and default EPP. + */ + cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE]; + intel_pstate_set_epp(cpudata, cpudata->epp_default); +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt till we activate again */ @@ -1705,12 +1736,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) if (cpudata->epp_default >= 0) return; - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) { - cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); - } else { - cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE]; - intel_pstate_set_epp(cpudata, cpudata->epp_default); - } + intel_pstate_update_epp_defaults(cpudata); } static int atom_get_min_pstate(void) -- cgit v1.2.3-70-g09d2 From f6c46b1d62f8ffbf2cf6eb43ab0d277bd3f7e948 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 11 Mar 2022 19:20:17 +0000 Subject: PM: hibernate: Honour ACPI hardware signature by default for virtual guests The ACPI specification says that OSPM should refuse to restore from hibernate if the hardware signature changes, and should boot from scratch. However, real BIOSes often vary the hardware signature in cases where we *do* want to resume from hibernate, so Linux doesn't follow the spec by default. However, in a virtual environment there's no reason for the VMM to vary the hardware signature *unless* it wants to trigger a clean reboot as defined by the ACPI spec. So enable the check by default if a hypervisor is detected. Signed-off-by: David Woodhouse Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/sleep.c | 23 +++++++++++++++++++++-- drivers/acpi/sleep.c | 11 +++-------- include/linux/acpi.h | 2 +- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1e97f944b47d..3b7f4cdbf2e0 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "../../realmode/rm/wakeup.h" @@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 4; #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_hwsig", 8) == 0) - acpi_check_s4_hw_signature(1); + acpi_check_s4_hw_signature = 1; if (strncmp(str, "s4_nohwsig", 10) == 0) - acpi_check_s4_hw_signature(0); + acpi_check_s4_hw_signature = 0; #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); @@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str) } __setup("acpi_sleep=", acpi_sleep_setup); + +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST) +static int __init init_s4_sigcheck(void) +{ + /* + * If running on a hypervisor, honour the ACPI specification + * by default and trigger a clean reboot when the hardware + * signature in FACS is changed after hibernation. + */ + if (acpi_check_s4_hw_signature == -1 && + !hypervisor_is_type(X86_HYPER_NATIVE)) + acpi_check_s4_hw_signature = 1; + + return 0; +} +/* This must happen before acpi_init() which is a subsys initcall */ +arch_initcall(init_s4_sigcheck); +#endif diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index d4fbea91ab6b..74e54fd44b8e 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -869,12 +869,7 @@ static inline void acpi_sleep_syscore_init(void) {} #ifdef CONFIG_HIBERNATION static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; -static int sigcheck = -1; /* Default behaviour is just to warn */ - -void __init acpi_check_s4_hw_signature(int check) -{ - sigcheck = check; -} +int acpi_check_s4_hw_signature = -1; /* Default behaviour is just to warn */ static int acpi_hibernation_begin(pm_message_t stage) { @@ -999,7 +994,7 @@ static void acpi_sleep_hibernate_setup(void) hibernation_set_ops(old_suspend_ordering ? &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; - if (!sigcheck) + if (!acpi_check_s4_hw_signature) return; acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs); @@ -1011,7 +1006,7 @@ static void acpi_sleep_hibernate_setup(void) */ s4_hardware_signature = facs->hardware_signature; - if (sigcheck > 0) { + if (acpi_check_s4_hw_signature > 0) { /* * If we're actually obeying the ACPI specification * then the signature is written out as part of the diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6274758648e3..766dbcb82df1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_check_s4_hw_signature(int check); +extern int acpi_check_s4_hw_signature; #endif #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3-70-g09d2 From a335b1e6bb29300d3bc6749763a4298627e594ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2022 20:35:47 +0100 Subject: cpuidle: intel_idle: Update intel_idle() kerneldoc comment Commit bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic") moved the leave_mm() call away from intel_idle(), but it didn't update its kerneldoc comment accordingly, so do that now. Fixes: bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic") Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e385ddf15b32..4ba4ab974dbe 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -122,9 +122,6 @@ static unsigned int mwait_substates __initdata; * If the local APIC timer is not known to be reliable in the target idle state, * enable one-shot tick broadcasting for the target CPU before executing MWAIT. * - * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to - * flushing user TLBs. - * * Must be called under local_irq_disable(). */ static __cpuidle int intel_idle(struct cpuidle_device *dev, -- cgit v1.2.3-70-g09d2 From 03eb65224e5711e7a2f34b500d44866b322a249a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2022 20:36:42 +0100 Subject: cpuidle: intel_idle: Drop redundant backslash at line end Drop a redundant backslash character at the end of a line in the spr_cstates[] definition. Signed-off-by: Rafael J. Wysocki Acked-by: Artem Bityutskiy --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 4ba4ab974dbe..b7640cfe0020 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -781,7 +781,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | CPUIDLE_FLAG_UNUSABLE, .exit_latency = 2, .target_residency = 4, -- cgit v1.2.3-70-g09d2 From 3870a44d50feeb5118a2698617d251123d9cde4c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 16 Mar 2022 16:55:48 -0500 Subject: cpufreq: powernow-k8: Re-order the init checks The powernow-k8 driver will do checks at startup that the current active driver is acpi-cpufreq and show a warning when they're not expected. Because of this the following warning comes up on systems that support amd-pstate and compiled in both drivers: `WTF driver: amd-pstate` The systems that support powernow-k8 will not support amd-pstate, so re-order the checks to validate the CPU model number first to avoid this warning being displayed on modern SOCs. Signed-off-by: Mario Limonciello Acked-by: Viresh Kumar Reviewed-by: Huang Rui Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 12ab4014af71..d289036beff2 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1172,14 +1172,14 @@ static int powernowk8_init(void) unsigned int i, supported_cpus = 0; int ret; + if (!x86_match_cpu(powernow_k8_ids)) + return -ENODEV; + if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) { __request_acpi_cpufreq(); return -ENODEV; } - if (!x86_match_cpu(powernow_k8_ids)) - return -ENODEV; - cpus_read_lock(); for_each_online_cpu(i) { smp_call_function_single(i, check_supported_cpu, &ret, 1); -- cgit v1.2.3-70-g09d2