summaryrefslogtreecommitdiff
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-22 19:29:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-22 19:29:48 -0800
commit6f9baa9b92c2e4e28e385fa581f0511621db3f9a (patch)
treee942fd6f0ce883d237b239826978121200331eef /drivers/cpufreq
parent619d996c86421da1057d589123a28e2da0bf0785 (diff)
parentf3e66e78f2ecb18a55374f7d58030556b751dd79 (diff)
Merge tag 'pm-6.13-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These mostly are updates of cpufreq drivers used on ARM platforms plus one new DT-based cpufreq driver for virtualized guests and two cpuidle changes that should not make any difference on systems currently in the field, but will be needed for future development: - Add virtual cpufreq driver for guest kernels (David Dai) - Minor cleanup to various cpufreq drivers (Andy Shevchenko, Dhruva Gole, Jie Zhan, Jinjie Ruan, Shuosheng Huang, Sibi Sankar, and Yuan Can) - Revert "cpufreq: brcmstb-avs-cpufreq: Fix initial command check" (Colin Ian King) - Improve DT bindings for qcom-hw driver (Dmitry Baryshkov, Konrad Dybcio, and Nikunj Kela) - Make cpuidle_play_dead() try all idle states with :enter_dead() callbacks and change their return type to void (Rafael Wysocki)" * tag 'pm-6.13-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (22 commits) cpuidle: Change :enter_dead() driver callback return type to void cpuidle: Do not return from cpuidle_play_dead() on callback failures arm64: dts: qcom: sc8180x: Add a SoC-specific compatible to cpufreq-hw dt-bindings: cpufreq: cpufreq-qcom-hw: Add SC8180X compatible cpufreq: sun50i: add a100 cpufreq support cpufreq: mediatek-hw: Fix wrong return value in mtk_cpufreq_get_cpu_power() cpufreq: CPPC: Fix wrong return value in cppc_get_cpu_power() cpufreq: CPPC: Fix wrong return value in cppc_get_cpu_cost() cpufreq: loongson3: Check for error code from devm_mutex_init() call cpufreq: scmi: Fix cleanup path when boost enablement fails cpufreq: CPPC: Fix possible null-ptr-deref for cppc_get_cpu_cost() cpufreq: CPPC: Fix possible null-ptr-deref for cpufreq_cpu_get_raw() Revert "cpufreq: brcmstb-avs-cpufreq: Fix initial command check" dt-bindings: cpufreq: cpufreq-qcom-hw: Add SAR2130P compatible cpufreq: add virtual-cpufreq driver dt-bindings: cpufreq: add virtual cpufreq device cpufreq: loongson2: Unregister platform_driver on failure cpufreq: ti-cpufreq: Remove revision offsets in AM62 family cpufreq: ti-cpufreq: Allow backward compatibility for efuse syscon cppc_cpufreq: Remove HiSilicon CPPC workaround ...
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig14
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/brcmstb-avs-cpufreq.c4
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c136
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c1
-rw-r--r--drivers/cpufreq/loongson2_cpufreq.c4
-rw-r--r--drivers/cpufreq/loongson3_cpufreq.c7
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c2
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c4
-rw-r--r--drivers/cpufreq/sun50i-cpufreq-nvmem.c28
-rw-r--r--drivers/cpufreq/ti-cpufreq.c10
-rw-r--r--drivers/cpufreq/virtual-cpufreq.c333
12 files changed, 451 insertions, 93 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2561b215432a..92a83a9bb2e1 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -217,6 +217,20 @@ config CPUFREQ_DT
If in doubt, say N.
+config CPUFREQ_VIRT
+ tristate "Virtual cpufreq driver"
+ depends on GENERIC_ARCH_TOPOLOGY
+ help
+ This adds a virtualized cpufreq driver for guest kernels that
+ read/writes to a MMIO region for a virtualized cpufreq device to
+ communicate with the host. It sends performance requests to the host
+ which gets used as a hint to schedule vCPU threads and select CPU
+ frequency. If a VM does not support a virtualized FIE such as AMUs,
+ it updates the frequency scaling factor by polling host CPU frequency
+ to enable accurate Per-Entity Load Tracking for tasks running in the guest.
+
+ If in doubt, say N.
+
config CPUFREQ_DT_PLATDEV
tristate "Generic DT based cpufreq platdev driver"
depends on OF
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 0f184031dd12..10d7d6e55da8 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
+obj-$(CONFIG_CPUFREQ_VIRT) += virtual-cpufreq.o
# Traces
CFLAGS_amd-pstate-trace.o := -I$(src)
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 5d03a295a085..2fd0f6be6fa3 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -474,8 +474,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv)
rc = brcm_avs_get_pmap(priv, NULL);
magic = readl(priv->base + AVS_MBOX_MAGIC);
- return (magic == AVS_FIRMWARE_MAGIC) && ((rc != -ENOTSUPP) ||
- (rc != -EINVAL));
+ return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) &&
+ (rc != -EINVAL);
}
static unsigned int brcm_avs_cpufreq_get(unsigned int cpu)
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 2b8708475ac7..bd8f75accfa0 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -36,33 +36,15 @@ static LIST_HEAD(cpu_data_list);
static bool boost_supported;
-struct cppc_workaround_oem_info {
- char oem_id[ACPI_OEM_ID_SIZE + 1];
- char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
- u32 oem_revision;
-};
-
-static struct cppc_workaround_oem_info wa_info[] = {
- {
- .oem_id = "HISI ",
- .oem_table_id = "HIP07 ",
- .oem_revision = 0,
- }, {
- .oem_id = "HISI ",
- .oem_table_id = "HIP08 ",
- .oem_revision = 0,
- }
-};
-
static struct cpufreq_driver cppc_cpufreq_driver;
+#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
static enum {
FIE_UNSET = -1,
FIE_ENABLED,
FIE_DISABLED
} fie_disabled = FIE_UNSET;
-#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
module_param(fie_disabled, int, 0444);
MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)");
@@ -78,7 +60,6 @@ struct cppc_freq_invariance {
static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
static struct kthread_worker *kworker_fie;
-static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
struct cppc_perf_fb_ctrs *fb_ctrs_t1);
@@ -118,6 +99,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
&fb_ctrs);
+ if (!perf)
+ return;
+
cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
perf <<= SCHED_CAPACITY_SHIFT;
@@ -420,6 +404,9 @@ static int cppc_get_cpu_power(struct device *cpu_dev,
struct cppc_cpudata *cpu_data;
policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ if (!policy)
+ return -EINVAL;
+
cpu_data = policy->driver_data;
perf_caps = &cpu_data->perf_caps;
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
@@ -487,6 +474,9 @@ static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
int step;
policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ if (!policy)
+ return -EINVAL;
+
cpu_data = policy->driver_data;
perf_caps = &cpu_data->perf_caps;
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
@@ -724,13 +714,31 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
delta_delivered = get_delta(fb_ctrs_t1->delivered,
fb_ctrs_t0->delivered);
- /* Check to avoid divide-by zero and invalid delivered_perf */
+ /*
+ * Avoid divide-by zero and unchanged feedback counters.
+ * Leave it for callers to handle.
+ */
if (!delta_reference || !delta_delivered)
- return cpu_data->perf_ctrls.desired_perf;
+ return 0;
return (reference_perf * delta_delivered) / delta_reference;
}
+static int cppc_get_perf_ctrs_sample(int cpu,
+ struct cppc_perf_fb_ctrs *fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs *fb_ctrs_t1)
+{
+ int ret;
+
+ ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t0);
+ if (ret)
+ return ret;
+
+ udelay(2); /* 2usec delay between sampling */
+
+ return cppc_get_perf_ctrs(cpu, fb_ctrs_t1);
+}
+
static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
{
struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
@@ -746,18 +754,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
cpufreq_cpu_put(policy);
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
- if (ret)
- return 0;
-
- udelay(2); /* 2usec delay between sampling */
-
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
- if (ret)
- return 0;
+ ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1);
+ if (ret) {
+ if (ret == -EFAULT)
+ /* Any of the associated CPPC regs is 0. */
+ goto out_invalid_counters;
+ else
+ return 0;
+ }
delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
&fb_ctrs_t1);
+ if (!delivered_perf)
+ goto out_invalid_counters;
+
+ return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
+
+out_invalid_counters:
+ /*
+ * Feedback counters could be unchanged or 0 when a cpu enters a
+ * low-power idle state, e.g. clock-gated or power-gated.
+ * Use desired perf for reflecting frequency. Get the latest register
+ * value first as some platforms may update the actual delivered perf
+ * there; if failed, resort to the cached desired perf.
+ */
+ if (cppc_get_desired_perf(cpu, &delivered_perf))
+ delivered_perf = cpu_data->perf_ctrls.desired_perf;
return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
}
@@ -812,57 +834,6 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
.name = "cppc_cpufreq",
};
-/*
- * HISI platform does not support delivered performance counter and
- * reference performance counter. It can calculate the performance using the
- * platform specific mechanism. We reuse the desired performance register to
- * store the real performance calculated by the platform.
- */
-static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
-{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- struct cppc_cpudata *cpu_data;
- u64 desired_perf;
- int ret;
-
- if (!policy)
- return -ENODEV;
-
- cpu_data = policy->driver_data;
-
- cpufreq_cpu_put(policy);
-
- ret = cppc_get_desired_perf(cpu, &desired_perf);
- if (ret < 0)
- return -EIO;
-
- return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf);
-}
-
-static void cppc_check_hisi_workaround(void)
-{
- struct acpi_table_header *tbl;
- acpi_status status = AE_OK;
- int i;
-
- status = acpi_get_table(ACPI_SIG_PCCT, 0, &tbl);
- if (ACPI_FAILURE(status) || !tbl)
- return;
-
- for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
- if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
- !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
- wa_info[i].oem_revision == tbl->oem_revision) {
- /* Overwrite the get() callback */
- cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate;
- fie_disabled = FIE_DISABLED;
- break;
- }
- }
-
- acpi_put_table(tbl);
-}
-
static int __init cppc_cpufreq_init(void)
{
int ret;
@@ -870,7 +841,6 @@ static int __init cppc_cpufreq_init(void)
if (!acpi_cpc_valid())
return -ENODEV;
- cppc_check_hisi_workaround();
cppc_freq_invariance_init();
populate_efficiency_class();
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 18942bfe9c95..2a3e8bd317c9 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -103,6 +103,7 @@ static const struct of_device_id allowlist[] __initconst = {
* platforms using "operating-points-v2" property.
*/
static const struct of_device_id blocklist[] __initconst = {
+ { .compatible = "allwinner,sun50i-a100" },
{ .compatible = "allwinner,sun50i-h6", },
{ .compatible = "allwinner,sun50i-h616", },
{ .compatible = "allwinner,sun50i-h618", },
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 6a8e97896d38..ed1a6dbad638 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -148,7 +148,9 @@ static int __init cpufreq_init(void)
ret = cpufreq_register_driver(&loongson2_cpufreq_driver);
- if (!ret && !nowait) {
+ if (ret) {
+ platform_driver_unregister(&platform_driver);
+ } else if (!nowait) {
saved_cpu_wait = cpu_wait;
cpu_wait = loongson2_cpu_wait;
}
diff --git a/drivers/cpufreq/loongson3_cpufreq.c b/drivers/cpufreq/loongson3_cpufreq.c
index 61ebebf69455..bd34bf0fafa5 100644
--- a/drivers/cpufreq/loongson3_cpufreq.c
+++ b/drivers/cpufreq/loongson3_cpufreq.c
@@ -346,8 +346,11 @@ static int loongson3_cpufreq_probe(struct platform_device *pdev)
{
int i, ret;
- for (i = 0; i < MAX_PACKAGES; i++)
- devm_mutex_init(&pdev->dev, &cpufreq_mutex[i]);
+ for (i = 0; i < MAX_PACKAGES; i++) {
+ ret = devm_mutex_init(&pdev->dev, &cpufreq_mutex[i]);
+ if (ret)
+ return ret;
+ }
ret = do_service_request(0, 0, CMD_GET_VERSION, 0, 0);
if (ret <= 0)
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index f7db5f4ad306..9252ebd60373 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -62,7 +62,7 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW,
policy = cpufreq_cpu_get_raw(cpu_dev->id);
if (!policy)
- return 0;
+ return -EINVAL;
data = policy->driver_data;
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 5892c73e129d..07d6f9a9b7c8 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -287,7 +287,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
ret = cpufreq_enable_boost_support();
if (ret) {
dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
- goto out_free_opp;
+ goto out_free_table;
} else {
scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
scmi_cpufreq_driver.boost_enabled = true;
@@ -296,6 +296,8 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
return 0;
+out_free_table:
+ dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
out_free_opp:
dev_pm_opp_remove_all_dynamic(cpu_dev);
diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
index 352e1a69a85e..17d6a149f580 100644
--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
@@ -22,6 +22,9 @@
#define NVMEM_MASK 0x7
#define NVMEM_SHIFT 5
+#define SUN50I_A100_NVMEM_MASK 0xf
+#define SUN50I_A100_NVMEM_SHIFT 12
+
static struct platform_device *cpufreq_dt_pdev, *sun50i_cpufreq_pdev;
struct sunxi_cpufreq_data {
@@ -45,6 +48,23 @@ static u32 sun50i_h6_efuse_xlate(u32 speedbin)
return 0;
}
+static u32 sun50i_a100_efuse_xlate(u32 speedbin)
+{
+ u32 efuse_value;
+
+ efuse_value = (speedbin >> SUN50I_A100_NVMEM_SHIFT) &
+ SUN50I_A100_NVMEM_MASK;
+
+ switch (efuse_value) {
+ case 0b100:
+ return 2;
+ case 0b010:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static int get_soc_id_revision(void)
{
#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
@@ -108,6 +128,10 @@ static struct sunxi_cpufreq_data sun50i_h6_cpufreq_data = {
.efuse_xlate = sun50i_h6_efuse_xlate,
};
+static struct sunxi_cpufreq_data sun50i_a100_cpufreq_data = {
+ .efuse_xlate = sun50i_a100_efuse_xlate,
+};
+
static struct sunxi_cpufreq_data sun50i_h616_cpufreq_data = {
.efuse_xlate = sun50i_h616_efuse_xlate,
};
@@ -116,6 +140,9 @@ static const struct of_device_id cpu_opp_match_list[] = {
{ .compatible = "allwinner,sun50i-h6-operating-points",
.data = &sun50i_h6_cpufreq_data,
},
+ { .compatible = "allwinner,sun50i-a100-operating-points",
+ .data = &sun50i_a100_cpufreq_data,
+ },
{ .compatible = "allwinner,sun50i-h616-operating-points",
.data = &sun50i_h616_cpufreq_data,
},
@@ -291,6 +318,7 @@ static struct platform_driver sun50i_cpufreq_driver = {
static const struct of_device_id sun50i_cpufreq_match_list[] = {
{ .compatible = "allwinner,sun50i-h6" },
+ { .compatible = "allwinner,sun50i-a100" },
{ .compatible = "allwinner,sun50i-h616" },
{ .compatible = "allwinner,sun50i-h618" },
{ .compatible = "allwinner,sun50i-h700" },
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index ba621ce1cdda..5a5147277cd0 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -93,6 +93,8 @@ struct ti_cpufreq_soc_data {
bool multi_regulator;
/* Backward compatibility hack: Might have missing syscon */
#define TI_QUIRK_SYSCON_MAY_BE_MISSING 0x1
+/* Backward compatibility hack: new syscon size is 1 register wide */
+#define TI_QUIRK_SYSCON_IS_SINGLE_REG 0x2
u8 quirks;
};
@@ -316,8 +318,8 @@ static struct ti_cpufreq_soc_data am625_soc_data = {
.efuse_offset = 0x0018,
.efuse_mask = 0x07c0,
.efuse_shift = 0x6,
- .rev_offset = 0x0014,
.multi_regulator = false,
+ .quirks = TI_QUIRK_SYSCON_IS_SINGLE_REG,
};
static struct ti_cpufreq_soc_data am62a7_soc_data = {
@@ -325,7 +327,6 @@ static struct ti_cpufreq_soc_data am62a7_soc_data = {
.efuse_offset = 0x0,
.efuse_mask = 0x07c0,
.efuse_shift = 0x6,
- .rev_offset = 0x0014,
.multi_regulator = false,
};
@@ -334,7 +335,6 @@ static struct ti_cpufreq_soc_data am62p5_soc_data = {
.efuse_offset = 0x0,
.efuse_mask = 0x07c0,
.efuse_shift = 0x6,
- .rev_offset = 0x0014,
.multi_regulator = false,
};
@@ -354,6 +354,10 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data,
ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset,
&efuse);
+
+ if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_IS_SINGLE_REG && ret == -EIO)
+ ret = regmap_read(opp_data->syscon, 0x0, &efuse);
+
if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) {
/* not a syscon register! */
void __iomem *regs = ioremap(OMAP3_SYSCON_BASE +
diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c
new file mode 100644
index 000000000000..a050b3a6737f
--- /dev/null
+++ b/drivers/cpufreq/virtual-cpufreq.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <linux/arch_topology.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/*
+ * CPU0..CPUn
+ * +-------------+-------------------------------+--------+-------+
+ * | Register | Description | Offset | Len |
+ * +-------------+-------------------------------+--------+-------+
+ * | cur_perf | read this register to get | 0x0 | 0x4 |
+ * | | the current perf (integer val | | |
+ * | | representing perf relative to | | |
+ * | | max performance) | | |
+ * | | that vCPU is running at | | |
+ * +-------------+-------------------------------+--------+-------+
+ * | set_perf | write to this register to set | 0x4 | 0x4 |
+ * | | perf value of the vCPU | | |
+ * +-------------+-------------------------------+--------+-------+
+ * | perftbl_len | number of entries in perf | 0x8 | 0x4 |
+ * | | table. A single entry in the | | |
+ * | | perf table denotes no table | | |
+ * | | and the entry contains | | |
+ * | | the maximum perf value | | |
+ * | | that this vCPU supports. | | |
+ * | | The guest can request any | | |
+ * | | value between 1 and max perf | | |
+ * | | when perftbls are not used. | | |
+ * +---------------------------------------------+--------+-------+
+ * | perftbl_sel | write to this register to | 0xc | 0x4 |
+ * | | select perf table entry to | | |
+ * | | read from | | |
+ * +---------------------------------------------+--------+-------+
+ * | perftbl_rd | read this register to get | 0x10 | 0x4 |
+ * | | perf value of the selected | | |
+ * | | entry based on perftbl_sel | | |
+ * +---------------------------------------------+--------+-------+
+ * | perf_domain | performance domain number | 0x14 | 0x4 |
+ * | | that this vCPU belongs to. | | |
+ * | | vCPUs sharing the same perf | | |
+ * | | domain number are part of the | | |
+ * | | same performance domain. | | |
+ * +-------------+-------------------------------+--------+-------+
+ */
+
+#define REG_CUR_PERF_STATE_OFFSET 0x0
+#define REG_SET_PERF_STATE_OFFSET 0x4
+#define REG_PERFTBL_LEN_OFFSET 0x8
+#define REG_PERFTBL_SEL_OFFSET 0xc
+#define REG_PERFTBL_RD_OFFSET 0x10
+#define REG_PERF_DOMAIN_OFFSET 0x14
+#define PER_CPU_OFFSET 0x1000
+
+#define PERFTBL_MAX_ENTRIES 64U
+
+static void __iomem *base;
+static DEFINE_PER_CPU(u32, perftbl_num_entries);
+
+static void virt_scale_freq_tick(void)
+{
+ int cpu = smp_processor_id();
+ u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu);
+ u64 cur_freq;
+ unsigned long scale;
+
+ cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET
+ + REG_CUR_PERF_STATE_OFFSET);
+
+ cur_freq <<= SCHED_CAPACITY_SHIFT;
+ scale = (unsigned long)div_u64(cur_freq, max_freq);
+ scale = min(scale, SCHED_CAPACITY_SCALE);
+
+ this_cpu_write(arch_freq_scale, scale);
+}
+
+static struct scale_freq_data virt_sfd = {
+ .source = SCALE_FREQ_SOURCE_VIRT,
+ .set_freq_scale = virt_scale_freq_tick,
+};
+
+static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ writel_relaxed(target_freq,
+ base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET);
+ return 0;
+}
+
+static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ virt_cpufreq_set_perf(policy, target_freq);
+ return target_freq;
+}
+
+static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx)
+{
+ writel_relaxed(idx, base + cpu * PER_CPU_OFFSET +
+ REG_PERFTBL_SEL_OFFSET);
+ return readl_relaxed(base + cpu * PER_CPU_OFFSET +
+ REG_PERFTBL_RD_OFFSET);
+}
+
+static int virt_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ struct cpufreq_freqs freqs;
+ int ret = 0;
+
+ freqs.old = policy->cur;
+ freqs.new = target_freq;
+
+ cpufreq_freq_transition_begin(policy, &freqs);
+ ret = virt_cpufreq_set_perf(policy, target_freq);
+ cpufreq_freq_transition_end(policy, &freqs, ret != 0);
+
+ return ret;
+}
+
+static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy)
+{
+ u32 cur_perf_domain, perf_domain;
+ struct device *cpu_dev;
+ int cpu;
+
+ cur_perf_domain = readl_relaxed(base + policy->cpu *
+ PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
+
+ for_each_possible_cpu(cpu) {
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ continue;
+
+ perf_domain = readl_relaxed(base + cpu *
+ PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
+
+ if (perf_domain == cur_perf_domain)
+ cpumask_set_cpu(cpu, policy->cpus);
+ }
+
+ return 0;
+}
+
+static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy)
+{
+ struct cpufreq_frequency_table *table;
+ u32 num_perftbl_entries, idx;
+
+ num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu);
+
+ if (num_perftbl_entries == 1) {
+ policy->cpuinfo.min_freq = 1;
+ policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0);
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+
+ policy->cur = policy->max;
+ return 0;
+ }
+
+ table = kcalloc(num_perftbl_entries + 1, sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ for (idx = 0; idx < num_perftbl_entries; idx++)
+ table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx);
+
+ table[idx].frequency = CPUFREQ_TABLE_END;
+ policy->freq_table = table;
+
+ return 0;
+}
+
+static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ struct device *cpu_dev;
+ int ret;
+
+ cpu_dev = get_cpu_device(policy->cpu);
+ if (!cpu_dev)
+ return -ENODEV;
+
+ ret = virt_cpufreq_get_freq_info(policy);
+ if (ret) {
+ dev_warn(cpu_dev, "failed to get cpufreq info\n");
+ return ret;
+ }
+
+ ret = virt_cpufreq_get_sharing_cpus(policy);
+ if (ret) {
+ dev_warn(cpu_dev, "failed to get sharing cpumask\n");
+ return ret;
+ }
+
+ /*
+ * To simplify and improve latency of handling frequency requests on
+ * the host side, this ensures that the vCPU thread triggering the MMIO
+ * abort is the same thread whose performance constraints (Ex. uclamp
+ * settings) need to be updated. This simplifies the VMM (Virtual
+ * Machine Manager) having to find the correct vCPU thread and/or
+ * facing permission issues when configuring other threads.
+ */
+ policy->dvfs_possible_from_any_cpu = false;
+ policy->fast_switch_possible = true;
+
+ /*
+ * Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since
+ * the actual physical CPU frequency may not match requested frequency
+ * from the vCPU thread due to frequency update latencies or other
+ * inputs to the physical CPU frequency selection. This additional FIE
+ * source allows for more accurate freq_scale updates and only takes
+ * effect if another FIE source such as AMUs have not been registered.
+ */
+ topology_set_scale_freq_source(&virt_sfd, policy->cpus);
+
+ return 0;
+}
+
+static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus);
+ kfree(policy->freq_table);
+}
+
+static int virt_cpufreq_online(struct cpufreq_policy *policy)
+{
+ /* Nothing to restore. */
+ return 0;
+}
+
+static int virt_cpufreq_offline(struct cpufreq_policy *policy)
+{
+ /* Dummy offline() to avoid exit() being called and freeing resources. */
+ return 0;
+}
+
+static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
+{
+ if (policy->freq_table)
+ return cpufreq_frequency_table_verify(policy, policy->freq_table);
+
+ cpufreq_verify_within_cpu_limits(policy);
+ return 0;
+}
+
+static struct cpufreq_driver cpufreq_virt_driver = {
+ .name = "virt-cpufreq",
+ .init = virt_cpufreq_cpu_init,
+ .exit = virt_cpufreq_cpu_exit,
+ .online = virt_cpufreq_online,
+ .offline = virt_cpufreq_offline,
+ .verify = virt_cpufreq_verify_policy,
+ .target = virt_cpufreq_target,
+ .fast_switch = virt_cpufreq_fast_switch,
+ .attr = cpufreq_generic_attr,
+};
+
+static int virt_cpufreq_driver_probe(struct platform_device *pdev)
+{
+ u32 num_perftbl_entries;
+ int ret, cpu;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ for_each_possible_cpu(cpu) {
+ num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET +
+ REG_PERFTBL_LEN_OFFSET);
+
+ if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES)
+ return -ENODEV;
+
+ per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries;
+ }
+
+ ret = cpufreq_register_driver(&cpufreq_virt_driver);
+ if (ret) {
+ dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret);
+ return ret;
+ }
+
+ dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n");
+ return 0;
+}
+
+static void virt_cpufreq_driver_remove(struct platform_device *pdev)
+{
+ cpufreq_unregister_driver(&cpufreq_virt_driver);
+}
+
+static const struct of_device_id virt_cpufreq_match[] = {
+ { .compatible = "qemu,virtual-cpufreq", .data = NULL},
+ {}
+};
+MODULE_DEVICE_TABLE(of, virt_cpufreq_match);
+
+static struct platform_driver virt_cpufreq_driver = {
+ .probe = virt_cpufreq_driver_probe,
+ .remove = virt_cpufreq_driver_remove,
+ .driver = {
+ .name = "virt-cpufreq",
+ .of_match_table = virt_cpufreq_match,
+ },
+};
+
+static int __init virt_cpufreq_init(void)
+{
+ return platform_driver_register(&virt_cpufreq_driver);
+}
+postcore_initcall(virt_cpufreq_init);
+
+static void __exit virt_cpufreq_exit(void)
+{
+ platform_driver_unregister(&virt_cpufreq_driver);
+}
+module_exit(virt_cpufreq_exit);
+
+MODULE_DESCRIPTION("Virtual cpufreq driver");
+MODULE_LICENSE("GPL");