From 4274521fabee05375d10bea0e36a806ed4ab7b45 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:35 +0000 Subject: PM: EM: Add missing newline for the message log Fix missing newline for the string long in the error code path. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7b44f5b89fa1..8b9dd4a39f63 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -250,7 +250,7 @@ static void em_cpufreq_update_efficiencies(struct device *dev) policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); if (!policy) { - dev_warn(dev, "EM: Access to CPUFreq policy failed"); + dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); return; } -- cgit v1.3.1 From e7b1cc9a7ea6d7862baac0fd7b145573618727dd Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:36 +0000 Subject: PM: EM: Extend em_cpufreq_update_efficiencies() argument list In order to prepare the code for the modifiable EM perf_state table, make em_cpufreq_update_efficiencies() take a pointer to the EM table as its second argument and modify it to use that new argument instead of the 'table' member of dev->em_pd. No functional impact. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8b9dd4a39f63..8c373b151875 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -237,15 +237,15 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; } -static void em_cpufreq_update_efficiencies(struct device *dev) +static void +em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) { struct em_perf_domain *pd = dev->em_pd; - struct em_perf_state *table; struct cpufreq_policy *policy; int found = 0; int i; - if (!_is_cpu_device(dev) || !pd) + if (!_is_cpu_device(dev)) return; policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); @@ -254,8 +254,6 @@ static void em_cpufreq_update_efficiencies(struct device *dev) return; } - table = pd->table; - for (i = 0; i < pd->nr_perf_states; i++) { if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) continue; @@ -397,7 +395,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->flags |= flags; - em_cpufreq_update_efficiencies(dev); + em_cpufreq_update_efficiencies(dev, dev->em_pd->table); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); -- cgit v1.3.1 From 99907d6054f2d39a625004f9f4e3fe9297838a3c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:37 +0000 Subject: PM: EM: Find first CPU active while updating OPP efficiency The Energy Model might be updated at runtime and the energy efficiency for each OPP may change. Thus, there is a need to update also the cpufreq framework and make it aligned to the new values. In order to do that, use a first active CPU from the Performance Domain. This is needed since the first CPU in the cpumask might be offline when we run this code path. Reviewed-by: Hongyan Xia Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 8c373b151875..0c3220ff54f7 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -243,12 +243,19 @@ em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table) struct em_perf_domain *pd = dev->em_pd; struct cpufreq_policy *policy; int found = 0; - int i; + int i, cpu; if (!_is_cpu_device(dev)) return; - policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); + /* Try to get a CPU which is active and in this PD */ + cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); + if (cpu >= nr_cpu_ids) { + dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); + return; + } + + policy = cpufreq_cpu_get(cpu); if (!policy) { dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); return; -- cgit v1.3.1 From faf7075b79a259136e2b57ce52b48a7096270e8b Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:39 +0000 Subject: PM: EM: Introduce em_compute_costs() Move the EM costs computation code into a new dedicated function, em_compute_costs(), that can be reused in other places in the future. This change is not expected to alter the general functionality. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 72 +++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0c3220ff54f7..5c47caaf270e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -103,14 +103,52 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif +static int em_compute_costs(struct device *dev, struct em_perf_state *table, + struct em_data_callback *cb, int nr_states, + unsigned long flags) +{ + unsigned long prev_cost = ULONG_MAX; + u64 fmax; + int i, ret; + + /* Compute the cost of each performance state. */ + fmax = (u64) table[nr_states - 1].frequency; + for (i = nr_states - 1; i >= 0; i--) { + unsigned long power_res, cost; + + if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + ret = cb->get_cost(dev, table[i].frequency, &cost); + if (ret || !cost || cost > EM_MAX_POWER) { + dev_err(dev, "EM: invalid cost %lu %d\n", + cost, ret); + return -EINVAL; + } + } else { + power_res = table[i].power; + cost = div64_u64(fmax * power_res, table[i].frequency); + } + + table[i].cost = cost; + + if (table[i].cost >= prev_cost) { + table[i].flags = EM_PERF_STATE_INEFFICIENT; + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", + table[i].frequency); + } else { + prev_cost = table[i].cost; + } + } + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, int nr_states, struct em_data_callback *cb, unsigned long flags) { - unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; + unsigned long power, freq, prev_freq = 0; struct em_perf_state *table; int i, ret; - u64 fmax; table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); if (!table) @@ -154,33 +192,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].frequency = prev_freq = freq; } - /* Compute the cost of each performance state. */ - fmax = (u64) table[nr_states - 1].frequency; - for (i = nr_states - 1; i >= 0; i--) { - unsigned long power_res, cost; - - if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { - ret = cb->get_cost(dev, table[i].frequency, &cost); - if (ret || !cost || cost > EM_MAX_POWER) { - dev_err(dev, "EM: invalid cost %lu %d\n", - cost, ret); - goto free_ps_table; - } - } else { - power_res = table[i].power; - cost = div64_u64(fmax * power_res, table[i].frequency); - } - - table[i].cost = cost; - - if (table[i].cost >= prev_cost) { - table[i].flags = EM_PERF_STATE_INEFFICIENT; - dev_dbg(dev, "EM: OPP:%lu is inefficient\n", - table[i].frequency); - } else { - prev_cost = table[i].cost; - } - } + ret = em_compute_costs(dev, table, cb, nr_states, flags); + if (ret) + goto free_ps_table; pd->table = table; pd->nr_perf_states = nr_states; -- cgit v1.3.1 From 818867224d41725dcf4abe890d8f24e5d6bd9c67 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:40 +0000 Subject: PM: EM: Check if the get_cost() callback is present in em_compute_costs() Subsequent changes will introduce a case in which 'cb->get_cost' may not be set in em_compute_costs(), so add a check to ensure that it is not NULL before attempting to dereference it. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 5c47caaf270e..21d761223255 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -116,7 +116,7 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; - if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) { ret = cb->get_cost(dev, table[i].frequency, &cost); if (ret || !cost || cost > EM_MAX_POWER) { dev_err(dev, "EM: invalid cost %lu %d\n", -- cgit v1.3.1 From 8552d6820168d6508bd1f7cd49be248dcb74efb3 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:41 +0000 Subject: PM: EM: Split the allocation and initialization of the EM table Split the process of allocation and data initialization for the EM table. The upcoming changes for modifiable EM will use it. This change is not expected to alter the general functionality. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 55 +++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 21d761223255..7468fa92134b 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -142,18 +142,26 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } +static int em_allocate_perf_table(struct em_perf_domain *pd, + int nr_states) +{ + pd->table = kcalloc(nr_states, sizeof(struct em_perf_state), + GFP_KERNEL); + if (!pd->table) + return -ENOMEM; + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, - int nr_states, struct em_data_callback *cb, + struct em_perf_state *table, + struct em_data_callback *cb, unsigned long flags) { unsigned long power, freq, prev_freq = 0; - struct em_perf_state *table; + int nr_states = pd->nr_perf_states; int i, ret; - table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; - /* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* @@ -165,7 +173,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (ret) { dev_err(dev, "EM: invalid perf. state: %d\n", ret); - goto free_ps_table; + return -EINVAL; } /* @@ -175,7 +183,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (freq <= prev_freq) { dev_err(dev, "EM: non-increasing freq: %lu\n", freq); - goto free_ps_table; + return -EINVAL; } /* @@ -185,7 +193,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, if (!power || power > EM_MAX_POWER) { dev_err(dev, "EM: invalid power: %lu\n", power); - goto free_ps_table; + return -EINVAL; } table[i].power = power; @@ -194,16 +202,9 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, ret = em_compute_costs(dev, table, cb, nr_states, flags); if (ret) - goto free_ps_table; - - pd->table = table; - pd->nr_perf_states = nr_states; + return -EINVAL; return 0; - -free_ps_table: - kfree(table); - return -EINVAL; } static int em_create_pd(struct device *dev, int nr_states, @@ -234,11 +235,15 @@ static int em_create_pd(struct device *dev, int nr_states, return -ENOMEM; } - ret = em_create_perf_table(dev, pd, nr_states, cb, flags); - if (ret) { - kfree(pd); - return ret; - } + pd->nr_perf_states = nr_states; + + ret = em_allocate_perf_table(pd, nr_states); + if (ret) + goto free_pd; + + ret = em_create_perf_table(dev, pd, pd->table, cb, flags); + if (ret) + goto free_pd_table; if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { @@ -249,6 +254,12 @@ static int em_create_pd(struct device *dev, int nr_states, dev->em_pd = pd; return 0; + +free_pd_table: + kfree(pd->table); +free_pd: + kfree(pd); + return -EINVAL; } static void -- cgit v1.3.1 From ca0fc871f16f4bef746b5ba814b67afb59119700 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:42 +0000 Subject: PM: EM: Introduce runtime modifiable table The new runtime table can be populated with a new power data to better reflect the actual efficiency of the device e.g. CPU. The power can vary over time e.g. due to the SoC temperature change. Higher temperature can increase power values. For longer running scenarios, such as game or camera, when also other devices are used (e.g. GPU, ISP) the CPU power can change. The new EM framework is able to addresses this issue and change the EM data at runtime safely. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 12 ++++++++++ kernel/power/energy_model.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 1dcd1645dde7..8ddf1d8a9581 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -36,9 +36,20 @@ struct em_perf_state { */ #define EM_PERF_STATE_INEFFICIENT BIT(0) +/** + * struct em_perf_table - Performance states table + * @rcu: RCU used for safe access and destruction + * @state: List of performance states, in ascending order + */ +struct em_perf_table { + struct rcu_head rcu; + struct em_perf_state state[]; +}; + /** * struct em_perf_domain - Performance domain * @table: List of performance states, in ascending order + * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here @@ -54,6 +65,7 @@ struct em_perf_state { */ struct em_perf_domain { struct em_perf_state *table; + struct em_perf_table __rcu *em_table; int nr_perf_states; unsigned long flags; unsigned long cpus[]; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7468fa92134b..131ff1d0dc5b 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -23,6 +23,9 @@ */ static DEFINE_MUTEX(em_pd_mutex); +static void em_cpufreq_update_efficiencies(struct device *dev, + struct em_perf_state *table); + static bool _is_cpu_device(struct device *dev) { return (dev->bus == &cpu_subsys); @@ -103,6 +106,31 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif +static void em_destroy_table_rcu(struct rcu_head *rp) +{ + struct em_perf_table __rcu *table; + + table = container_of(rp, struct em_perf_table, rcu); + kfree(table); +} + +static void em_free_table(struct em_perf_table __rcu *table) +{ + call_rcu(&table->rcu, em_destroy_table_rcu); +} + +static struct em_perf_table __rcu * +em_allocate_table(struct em_perf_domain *pd) +{ + struct em_perf_table __rcu *table; + int table_size; + + table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + + table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); + return table; +} + static int em_compute_costs(struct device *dev, struct em_perf_state *table, struct em_data_callback *cb, int nr_states, unsigned long flags) @@ -153,6 +181,24 @@ static int em_allocate_perf_table(struct em_perf_domain *pd, return 0; } +static int em_create_runtime_table(struct em_perf_domain *pd) +{ + struct em_perf_table __rcu *table; + int table_size; + + table = em_allocate_table(pd); + if (!table) + return -ENOMEM; + + /* Initialize runtime table with existing data */ + table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + memcpy(table->state, pd->table, table_size); + + rcu_assign_pointer(pd->em_table, table); + + return 0; +} + static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, struct em_data_callback *cb, @@ -245,6 +291,10 @@ static int em_create_pd(struct device *dev, int nr_states, if (ret) goto free_pd_table; + ret = em_create_runtime_table(pd); + if (ret) + goto free_pd_table; + if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { cpu_dev = get_cpu_device(cpu); @@ -461,6 +511,9 @@ void em_dev_unregister_perf_domain(struct device *dev) em_debug_remove_pd(dev); kfree(dev->em_pd->table); + + em_free_table(dev->em_pd->em_table); + kfree(dev->em_pd); dev->em_pd = NULL; mutex_unlock(&em_pd_mutex); -- cgit v1.3.1 From ffcf9bce7af02a21fb73738999de1e3d4fde5aca Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:44 +0000 Subject: PM: EM: Add functions for memory allocations for new EM tables The runtime modified EM table can be provided from drivers. Create mechanism which allows safely allocate and free the table for device drivers. The same table can be used by the EAS in task scheduler code paths, so make sure the memory is not freed when the device driver module is unloaded. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 11 +++++++++++ kernel/power/energy_model.c | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 5 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 5f842da3bb0c..27911dc1887e 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -39,10 +40,12 @@ struct em_perf_state { /** * struct em_perf_table - Performance states table * @rcu: RCU used for safe access and destruction + * @kref: Reference counter to track the users * @state: List of performance states, in ascending order */ struct em_perf_table { struct rcu_head rcu; + struct kref kref; struct em_perf_state state[]; }; @@ -184,6 +187,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table __rcu *table); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -365,6 +370,12 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } +static inline +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +{ + return NULL; +} +static inline void em_table_free(struct em_perf_table __rcu *table) {} #endif #endif diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 131ff1d0dc5b..16795743f969 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -114,13 +114,36 @@ static void em_destroy_table_rcu(struct rcu_head *rp) kfree(table); } -static void em_free_table(struct em_perf_table __rcu *table) +static void em_release_table_kref(struct kref *kref) { + struct em_perf_table __rcu *table; + + /* It was the last owner of this table so we can free */ + table = container_of(kref, struct em_perf_table, kref); + call_rcu(&table->rcu, em_destroy_table_rcu); } -static struct em_perf_table __rcu * -em_allocate_table(struct em_perf_domain *pd) +/** + * em_table_free() - Handles safe free of the EM table when needed + * @table : EM table which is going to be freed + * + * No return values. + */ +void em_table_free(struct em_perf_table __rcu *table) +{ + kref_put(&table->kref, em_release_table_kref); +} + +/** + * em_table_alloc() - Allocate a new EM table + * @pd : EM performance domain for which this must be done + * + * Allocate a new EM table and initialize its kref to indicate that it + * has a user. + * Returns allocated table or NULL. + */ +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) { struct em_perf_table __rcu *table; int table_size; @@ -128,6 +151,11 @@ em_allocate_table(struct em_perf_domain *pd) table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); + if (!table) + return NULL; + + kref_init(&table->kref); + return table; } @@ -186,7 +214,7 @@ static int em_create_runtime_table(struct em_perf_domain *pd) struct em_perf_table __rcu *table; int table_size; - table = em_allocate_table(pd); + table = em_table_alloc(pd); if (!table) return -ENOMEM; @@ -512,7 +540,7 @@ void em_dev_unregister_perf_domain(struct device *dev) kfree(dev->em_pd->table); - em_free_table(dev->em_pd->em_table); + em_table_free(dev->em_pd->em_table); kfree(dev->em_pd); dev->em_pd = NULL; -- cgit v1.3.1 From 977230d5d50314f9920d3ee6348773d8babbfb58 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:45 +0000 Subject: PM: EM: Introduce em_dev_update_perf_domain() for EM updates Add API function em_dev_update_perf_domain() which allows the EM to be changed safely. Concurrent updaters are serialized with a mutex and the removal of memory that will not be used any more is carried out with the help of RCU. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 8 ++++++++ kernel/power/energy_model.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 27911dc1887e..324a3a8e0a2d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -183,6 +183,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, bool microwatts); @@ -376,6 +378,12 @@ struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) return NULL; } static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + return -EINVAL; +} #endif #endif diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 16795743f969..667619b70be7 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -209,6 +209,50 @@ static int em_allocate_perf_table(struct em_perf_domain *pd, return 0; } +/** + * em_dev_update_perf_domain() - Update runtime EM table for a device + * @dev : Device for which the EM is to be updated + * @new_table : The new EM table that is going to be used from now + * + * Update EM runtime modifiable table for the @dev using the provided @table. + * + * This function uses a mutex to serialize writers, so it must not be called + * from a non-sleeping context. + * + * Return 0 on success or an error code on failure. + */ +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + struct em_perf_table __rcu *old_table; + struct em_perf_domain *pd; + + if (!dev) + return -EINVAL; + + /* Serialize update/unregister or concurrent updates */ + mutex_lock(&em_pd_mutex); + + if (!dev->em_pd) { + mutex_unlock(&em_pd_mutex); + return -EINVAL; + } + pd = dev->em_pd; + + kref_get(&new_table->kref); + + old_table = pd->em_table; + rcu_assign_pointer(pd->em_table, new_table); + + em_cpufreq_update_efficiencies(dev, new_table->state); + + em_table_free(old_table); + + mutex_unlock(&em_pd_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); + static int em_create_runtime_table(struct em_perf_domain *pd) { struct em_perf_table __rcu *table; -- cgit v1.3.1 From 5a367f7b7014af86bd1ac0865a42db55187dbd3c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:47 +0000 Subject: PM: EM: Add performance field to struct em_perf_state and optimize The performance doesn't scale linearly with the frequency. Also, it may be different in different workloads. Some CPUs are designed to be particularly good at some applications e.g. images or video processing and other CPUs in different. When those different types of CPUs are combined in one SoC they should be properly modeled to get max of the HW in Energy Aware Scheduler (EAS). The Energy Model (EM) provides the power vs. performance curves to the EAS, but assumes the CPUs capacity is fixed and scales linearly with the frequency. This patch allows to adjust the curve on the 'performance' axis as well. Code speed optimization: Removing map_util_freq() allows to avoid one division and one multiplication operations from the EAS hot code path. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 24 ++++++++++++------------ kernel/power/energy_model.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 158dad6ea313..ce24ea3fe41c 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -13,6 +13,7 @@ /** * struct em_perf_state - Performance state of a performance domain + * @performance: CPU performance (capacity) at a given frequency * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -21,6 +22,7 @@ * @flags: see "em_perf_state flags" description below. */ struct em_perf_state { + unsigned long performance; unsigned long frequency; unsigned long power; unsigned long cost; @@ -196,25 +198,25 @@ void em_table_free(struct em_perf_table __rcu *table); * em_pd_get_efficient_state() - Get an efficient performance state from the EM * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states - * @freq: Frequency to map with the EM + * @max_util: Max utilization to map with the EM * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. * - * Return: An efficient performance state id, high enough to meet @freq + * Return: An efficient performance state id, high enough to meet @max_util * requirement. */ static inline int em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, - unsigned long freq, unsigned long pd_flags) + unsigned long max_util, unsigned long pd_flags) { struct em_perf_state *ps; int i; for (i = 0; i < nr_perf_states; i++) { ps = &table[i]; - if (ps->frequency >= freq) { + if (ps->performance >= max_util) { if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && ps->flags & EM_PERF_STATE_INEFFICIENT) continue; @@ -245,9 +247,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util, unsigned long allowed_cpu_cap) { - unsigned long freq, ref_freq, scale_cpu; struct em_perf_table *em_table; struct em_perf_state *ps; + unsigned long scale_cpu; int cpu, i; #ifdef CONFIG_SCHED_DEBUG @@ -260,25 +262,23 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested - * frequency, like schedutil. Take also into account that the real - * frequency might be set lower (due to thermal capping). Thus, clamp + * performance, like schedutil. Take also into account that the real + * performance might be set lower (due to thermal capping). Thus, clamp * max utilization to the allowed CPU capacity before calculating - * effective frequency. + * effective performance. */ cpu = cpumask_first(to_cpumask(pd->cpus)); scale_cpu = arch_scale_cpu_capacity(cpu); - ref_freq = arch_scale_freq_ref(cpu); max_util = min(max_util, allowed_cpu_cap); - freq = map_util_freq(max_util, ref_freq, scale_cpu); /* * Find the lowest performance state of the Energy Model above the - * requested frequency. + * requested performance. */ em_table = rcu_dereference(pd->em_table); i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, - freq, pd->flags); + max_util, pd->flags); ps = &em_table->state[i]; /* diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 667619b70be7..41418aa6daa6 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -46,6 +46,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) debugfs_create_ulong("frequency", 0444, d, &ps->frequency); debugfs_create_ulong("power", 0444, d, &ps->power); debugfs_create_ulong("cost", 0444, d, &ps->cost); + debugfs_create_ulong("performance", 0444, d, &ps->performance); debugfs_create_ulong("inefficient", 0444, d, &ps->flags); } @@ -159,6 +160,30 @@ struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) return table; } +static void em_init_performance(struct device *dev, struct em_perf_domain *pd, + struct em_perf_state *table, int nr_states) +{ + u64 fmax, max_cap; + int i, cpu; + + /* This is needed only for CPUs and EAS skip other devices */ + if (!_is_cpu_device(dev)) + return; + + cpu = cpumask_first(em_span_cpus(pd)); + + /* + * Calculate the performance value for each frequency with + * linear relationship. The final CPU capacity might not be ready at + * boot time, but the EM will be updated a bit later with correct one. + */ + fmax = (u64) table[nr_states - 1].frequency; + max_cap = (u64) arch_scale_cpu_capacity(cpu); + for (i = 0; i < nr_states; i++) + table[i].performance = div64_u64(max_cap * table[i].frequency, + fmax); +} + static int em_compute_costs(struct device *dev, struct em_perf_state *table, struct em_data_callback *cb, int nr_states, unsigned long flags) @@ -318,6 +343,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].frequency = prev_freq = freq; } + em_init_performance(dev, pd, table, nr_states); + ret = em_compute_costs(dev, table, cb, nr_states, flags); if (ret) return -EINVAL; -- cgit v1.3.1 From e3f1164fc9ee8430b3a51e400abfa1b67664f538 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:48 +0000 Subject: PM: EM: Support late CPUs booting and capacity adjustment The patch adds needed infrastructure to handle the late CPUs boot, which might change the previous CPUs capacity values. With this changes the new CPUs which try to register EM will trigger the needed re-calculations for other CPUs EMs. Thanks to that the em_per_state::performance values will be aligned with the CPU capacity information after all CPUs finish the boot and EM registrations. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 124 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 41418aa6daa6..b192b0ac8c6e 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -25,6 +25,9 @@ static DEFINE_MUTEX(em_pd_mutex); static void em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table); +static void em_check_capacity_update(void); +static void em_update_workfn(struct work_struct *work); +static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn); static bool _is_cpu_device(struct device *dev) { @@ -583,6 +586,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, unlock: mutex_unlock(&em_pd_mutex); + + if (_is_cpu_device(dev)) + em_check_capacity_update(); + return ret; } EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); @@ -618,3 +625,120 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_unlock(&em_pd_mutex); } EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); + +/* + * Adjustment of CPU performance values after boot, when all CPUs capacites + * are correctly calculated. + */ +static void em_adjust_new_capacity(struct device *dev, + struct em_perf_domain *pd, + u64 max_cap) +{ + struct em_perf_table __rcu *em_table; + struct em_perf_state *ps, *new_ps; + int ret, ps_size; + + em_table = em_table_alloc(pd); + if (!em_table) { + dev_warn(dev, "EM: allocation failed\n"); + return; + } + + new_ps = em_table->state; + + rcu_read_lock(); + ps = em_perf_state_from_pd(pd); + /* Initialize data based on old table */ + ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states; + memcpy(new_ps, ps, ps_size); + + rcu_read_unlock(); + + em_init_performance(dev, pd, new_ps, pd->nr_perf_states); + ret = em_compute_costs(dev, new_ps, NULL, pd->nr_perf_states, + pd->flags); + if (ret) { + dev_warn(dev, "EM: compute costs failed\n"); + return; + } + + ret = em_dev_update_perf_domain(dev, em_table); + if (ret) + dev_warn(dev, "EM: update failed %d\n", ret); + + /* + * This is one-time-update, so give up the ownership in this updater. + * The EM framework has incremented the usage counter and from now + * will keep the reference (then free the memory when needed). + */ + em_table_free(em_table); +} + +static void em_check_capacity_update(void) +{ + cpumask_var_t cpu_done_mask; + struct em_perf_state *table; + struct em_perf_domain *pd; + unsigned long cpu_capacity; + int cpu; + + if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) { + pr_warn("no free memory\n"); + return; + } + + /* Check if CPUs capacity has changed than update EM */ + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy; + unsigned long em_max_perf; + struct device *dev; + int nr_states; + + if (cpumask_test_cpu(cpu, cpu_done_mask)) + continue; + + policy = cpufreq_cpu_get(cpu); + if (!policy) { + pr_debug("Accessing cpu%d policy failed\n", cpu); + schedule_delayed_work(&em_update_work, + msecs_to_jiffies(1000)); + break; + } + cpufreq_cpu_put(policy); + + pd = em_cpu_get(cpu); + if (!pd || em_is_artificial(pd)) + continue; + + cpumask_or(cpu_done_mask, cpu_done_mask, + em_span_cpus(pd)); + + nr_states = pd->nr_perf_states; + cpu_capacity = arch_scale_cpu_capacity(cpu); + + rcu_read_lock(); + table = em_perf_state_from_pd(pd); + em_max_perf = table[pd->nr_perf_states - 1].performance; + rcu_read_unlock(); + + /* + * Check if the CPU capacity has been adjusted during boot + * and trigger the update for new performance values. + */ + if (em_max_perf == cpu_capacity) + continue; + + pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", + cpu, cpu_capacity, em_max_perf); + + dev = get_cpu_device(cpu); + em_adjust_new_capacity(dev, pd, cpu_capacity); + } + + free_cpumask_var(cpu_done_mask); +} + +static void em_update_workfn(struct work_struct *work) +{ + em_check_capacity_update(); +} -- cgit v1.3.1 From 1b600da510735a0f92c8b4140a7e2cb037a6a6c3 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:49 +0000 Subject: PM: EM: Optimize em_cpu_energy() and remove division The Energy Model (EM) can be modified at runtime which brings new possibilities. The em_cpu_energy() is called by the Energy Aware Scheduler (EAS) in its hot path. The energy calculation uses power value for a given performance state (ps) and the CPU busy time as percentage for that given frequency. It is possible to avoid the division by 'scale_cpu' at runtime, because EM is updated whenever new max capacity CPU is set in the system. Use that feature and do the needed division during the calculation of the coefficient 'ps->cost'. That enhanced 'ps->cost' value can be then just multiplied simply by utilization: pd_nrg = ps->cost * \Sum cpu_util to get the needed energy for whole Performance Domain (PD). With this optimization and earlier removal of map_util_freq(), the em_cpu_energy() should run faster on the Big CPU by 1.43x and on the Little CPU by 1.69x (RockPi 4B board). Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 55 ++++++++++++-------------------------------- kernel/power/energy_model.c | 7 +++--- 2 files changed, 18 insertions(+), 44 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index ce24ea3fe41c..aabfc26fcd31 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -115,27 +115,6 @@ struct em_perf_domain { #define EM_MAX_NUM_CPUS 16 #endif -/* - * To avoid an overflow on 32bit machines while calculating the energy - * use a different order in the operation. First divide by the 'cpu_scale' - * which would reduce big value stored in the 'cost' field, then multiply by - * the 'sum_util'. This would allow to handle existing platforms, which have - * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. - * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' - * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. - * This reordering of operations has some limitations, we lose small - * precision in the estimation (comparing to 64bit platform w/o reordering). - * - * We are safe on 64bit machine. - */ -#ifdef CONFIG_64BIT -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) * (sum_util)) / (scale_cpu)) -#else -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) / (scale_cpu)) * (sum_util)) -#endif - struct em_data_callback { /** * active_power() - Provide power at the next performance state of @@ -249,8 +228,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, { struct em_perf_table *em_table; struct em_perf_state *ps; - unsigned long scale_cpu; - int cpu, i; + int i; #ifdef CONFIG_SCHED_DEBUG WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); @@ -267,9 +245,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * max utilization to the allowed CPU capacity before calculating * effective performance. */ - cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(cpu); - + max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); /* @@ -282,12 +258,12 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, ps = &em_table->state[i]; /* - * The capacity of a CPU in the domain at the performance state (ps) - * can be computed as: + * The performance (capacity) of a CPU in the domain at the performance + * state (ps) can be computed as: * - * ps->freq * scale_cpu - * ps->cap = -------------------- (1) - * cpu_max_freq + * ps->freq * scale_cpu + * ps->performance = -------------------- (1) + * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in * the EM), the energy consumed by this CPU at that performance state @@ -295,9 +271,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * ps->cap + * ps->performance * - * since 'cpu_util / ps->cap' represents its percentage of busy time. + * since 'cpu_util / ps->performance' represents its percentage of busy + * time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -307,9 +284,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * ps->power * cpu_max_freq cpu_util - * cpu_nrg = ------------------------ * --------- (3) - * ps->freq scale_cpu + * ps->power * cpu_max_freq + * cpu_nrg = ------------------------ * cpu_util (3) + * ps->freq * scale_cpu * * The first term is static, and is stored in the em_perf_state struct * as 'ps->cost'. @@ -319,11 +296,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * ps->cost * \Sum cpu_util - * pd_nrg = ------------------------ (4) - * scale_cpu + * pd_nrg = ps->cost * \Sum cpu_util (4) */ - return em_estimate_energy(ps->cost, sum_util, scale_cpu); + return ps->cost * sum_util; } /** diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index b192b0ac8c6e..a631d7d52c40 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -192,11 +192,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, unsigned long flags) { unsigned long prev_cost = ULONG_MAX; - u64 fmax; int i, ret; /* Compute the cost of each performance state. */ - fmax = (u64) table[nr_states - 1].frequency; for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; @@ -208,8 +206,9 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return -EINVAL; } } else { - power_res = table[i].power; - cost = div64_u64(fmax * power_res, table[i].frequency); + /* increase resolution of 'cost' precision */ + power_res = table[i].power * 10; + cost = power_res / table[i].performance; } table[i].cost = cost; -- cgit v1.3.1 From 09417e673cbd578a1eaf8aba34a668119622d79c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:54 +0000 Subject: PM: EM: Change debugfs configuration to use runtime EM table data Dump the runtime EM table values which can be modified in time. In order to do that allocate chunk of debug memory which can be later freed automatically thanks to devm_kcalloc(). This design can handle the fact that the EM table memory can change after EM update, so debug code cannot use the pointer from initialization phase. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 67 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index a631d7d52c40..548908e686ed 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -37,20 +37,65 @@ static bool _is_cpu_device(struct device *dev) #ifdef CONFIG_DEBUG_FS static struct dentry *rootdir; -static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) +struct em_dbg_info { + struct em_perf_domain *pd; + int ps_id; +}; + +#define DEFINE_EM_DBG_SHOW(name, fname) \ +static int em_debug_##fname##_show(struct seq_file *s, void *unused) \ +{ \ + struct em_dbg_info *em_dbg = s->private; \ + struct em_perf_state *table; \ + unsigned long val; \ + \ + rcu_read_lock(); \ + table = em_perf_state_from_pd(em_dbg->pd); \ + val = table[em_dbg->ps_id].name; \ + rcu_read_unlock(); \ + \ + seq_printf(s, "%lu\n", val); \ + return 0; \ +} \ +DEFINE_SHOW_ATTRIBUTE(em_debug_##fname) + +DEFINE_EM_DBG_SHOW(frequency, frequency); +DEFINE_EM_DBG_SHOW(power, power); +DEFINE_EM_DBG_SHOW(cost, cost); +DEFINE_EM_DBG_SHOW(performance, performance); +DEFINE_EM_DBG_SHOW(flags, inefficiency); + +static void em_debug_create_ps(struct em_perf_domain *em_pd, + struct em_dbg_info *em_dbg, int i, + struct dentry *pd) { + struct em_perf_state *table; + unsigned long freq; struct dentry *d; char name[24]; - snprintf(name, sizeof(name), "ps:%lu", ps->frequency); + em_dbg[i].pd = em_pd; + em_dbg[i].ps_id = i; + + rcu_read_lock(); + table = em_perf_state_from_pd(em_pd); + freq = table[i].frequency; + rcu_read_unlock(); + + snprintf(name, sizeof(name), "ps:%lu", freq); /* Create per-ps directory */ d = debugfs_create_dir(name, pd); - debugfs_create_ulong("frequency", 0444, d, &ps->frequency); - debugfs_create_ulong("power", 0444, d, &ps->power); - debugfs_create_ulong("cost", 0444, d, &ps->cost); - debugfs_create_ulong("performance", 0444, d, &ps->performance); - debugfs_create_ulong("inefficient", 0444, d, &ps->flags); + debugfs_create_file("frequency", 0444, d, &em_dbg[i], + &em_debug_frequency_fops); + debugfs_create_file("power", 0444, d, &em_dbg[i], + &em_debug_power_fops); + debugfs_create_file("cost", 0444, d, &em_dbg[i], + &em_debug_cost_fops); + debugfs_create_file("performance", 0444, d, &em_dbg[i], + &em_debug_performance_fops); + debugfs_create_file("inefficient", 0444, d, &em_dbg[i], + &em_debug_inefficiency_fops); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -73,6 +118,7 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_flags); static void em_debug_create_pd(struct device *dev) { + struct em_dbg_info *em_dbg; struct dentry *d; int i; @@ -86,9 +132,14 @@ static void em_debug_create_pd(struct device *dev) debugfs_create_file("flags", 0444, d, dev->em_pd, &em_debug_flags_fops); + em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states, + sizeof(*em_dbg), GFP_KERNEL); + if (!em_dbg) + return; + /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) - em_debug_create_ps(&dev->em_pd->table[i], d); + em_debug_create_ps(dev->em_pd, em_dbg, i, d); } -- cgit v1.3.1 From 24e9fb635df2790eccb0e95ff65c6dee7a97fcb7 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:55 +0000 Subject: PM: EM: Remove old table Remove the old EM table which wasn't able to modify the data. Clean the unneeded function and refactor the code a bit. Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 2 -- kernel/power/energy_model.c | 46 +++++++------------------------------------- 2 files changed, 7 insertions(+), 41 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index aabfc26fcd31..92866a81abe4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -53,7 +53,6 @@ struct em_perf_table { /** * struct em_perf_domain - Performance domain - * @table: List of performance states, in ascending order * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" @@ -69,7 +68,6 @@ struct em_perf_table { * field is unused. */ struct em_perf_domain { - struct em_perf_state *table; struct em_perf_table __rcu *em_table; int nr_perf_states; unsigned long flags; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 548908e686ed..57838d28af85 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -276,17 +276,6 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } -static int em_allocate_perf_table(struct em_perf_domain *pd, - int nr_states) -{ - pd->table = kcalloc(nr_states, sizeof(struct em_perf_state), - GFP_KERNEL); - if (!pd->table) - return -ENOMEM; - - return 0; -} - /** * em_dev_update_perf_domain() - Update runtime EM table for a device * @dev : Device for which the EM is to be updated @@ -331,24 +320,6 @@ int em_dev_update_perf_domain(struct device *dev, } EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); -static int em_create_runtime_table(struct em_perf_domain *pd) -{ - struct em_perf_table __rcu *table; - int table_size; - - table = em_table_alloc(pd); - if (!table) - return -ENOMEM; - - /* Initialize runtime table with existing data */ - table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; - memcpy(table->state, pd->table, table_size); - - rcu_assign_pointer(pd->em_table, table); - - return 0; -} - static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, struct em_data_callback *cb, @@ -409,6 +380,7 @@ static int em_create_pd(struct device *dev, int nr_states, struct em_data_callback *cb, cpumask_t *cpus, unsigned long flags) { + struct em_perf_table __rcu *em_table; struct em_perf_domain *pd; struct device *cpu_dev; int cpu, ret, num_cpus; @@ -435,17 +407,15 @@ static int em_create_pd(struct device *dev, int nr_states, pd->nr_perf_states = nr_states; - ret = em_allocate_perf_table(pd, nr_states); - if (ret) + em_table = em_table_alloc(pd); + if (!em_table) goto free_pd; - ret = em_create_perf_table(dev, pd, pd->table, cb, flags); + ret = em_create_perf_table(dev, pd, em_table->state, cb, flags); if (ret) goto free_pd_table; - ret = em_create_runtime_table(pd); - if (ret) - goto free_pd_table; + rcu_assign_pointer(pd->em_table, em_table); if (_is_cpu_device(dev)) for_each_cpu(cpu, cpus) { @@ -458,7 +428,7 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; free_pd_table: - kfree(pd->table); + kfree(em_table); free_pd: kfree(pd); return -EINVAL; @@ -629,7 +599,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->flags |= flags; - em_cpufreq_update_efficiencies(dev, dev->em_pd->table); + em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); @@ -666,8 +636,6 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_lock(&em_pd_mutex); em_debug_remove_pd(dev); - kfree(dev->em_pd->table); - em_table_free(dev->em_pd->em_table); kfree(dev->em_pd); -- cgit v1.3.1 From 22ea02848c07d1cbd15a5f442138ca429866300d Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 8 Feb 2024 11:55:56 +0000 Subject: PM: EM: Add em_dev_compute_costs() The device drivers can modify EM at runtime by providing a new EM table. The EM is used by the EAS and the em_perf_state::cost stores pre-calculated value to avoid overhead. This patch provides the API for device drivers to calculate the cost values properly (and not duplicate the same code). Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 8 ++++++++ kernel/power/energy_model.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'kernel/power') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 92866a81abe4..770755df852f 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -170,6 +170,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, void em_dev_unregister_perf_domain(struct device *dev); struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); void em_table_free(struct em_perf_table __rcu *table); +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -379,6 +381,12 @@ struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) { return NULL; } +static inline +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return -EINVAL; +} #endif #endif diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 57838d28af85..7101fa3fa0c0 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -276,6 +276,24 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, return 0; } +/** + * em_dev_compute_costs() - Calculate cost values for new runtime EM table + * @dev : Device for which the EM table is to be updated + * @table : The new EM table that is going to get the costs calculated + * + * Calculate the em_perf_state::cost values for new runtime EM table. The + * values are used for EAS during task placement. It also calculates and sets + * the efficiency flag for each performance state. When the function finish + * successfully the EM table is ready to be updated and used by EAS. + * + * Return 0 on success or a proper error in case of failure. + */ +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return em_compute_costs(dev, table, NULL, nr_states, 0); +} + /** * em_dev_update_perf_domain() - Update runtime EM table for a device * @dev : Device for which the EM is to be updated -- cgit v1.3.1 From 3a561ea2413ea5a740f3b1d6b5355d46f88a7456 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Wed, 21 Feb 2024 14:25:50 +0000 Subject: PM: EM: Fix nr_states warnings in static checks During the static checks nr_states has been mentioned by the kernel test robot. Fix the warning in those 2 places. Reported-by: kernel test robot Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 7101fa3fa0c0..b686ac0345bd 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -280,6 +280,7 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, * em_dev_compute_costs() - Calculate cost values for new runtime EM table * @dev : Device for which the EM table is to be updated * @table : The new EM table that is going to get the costs calculated + * @nr_states : Number of performance states * * Calculate the em_perf_state::cost values for new runtime EM table. The * values are used for EAS during task placement. It also calculates and sets @@ -728,7 +729,6 @@ static void em_check_capacity_update(void) struct cpufreq_policy *policy; unsigned long em_max_perf; struct device *dev; - int nr_states; if (cpumask_test_cpu(cpu, cpu_done_mask)) continue; @@ -749,7 +749,6 @@ static void em_check_capacity_update(void) cpumask_or(cpu_done_mask, cpu_done_mask, em_span_cpus(pd)); - nr_states = pd->nr_perf_states; cpu_capacity = arch_scale_cpu_capacity(cpu); rcu_read_lock(); -- cgit v1.3.1