From 0f5c890e9b9754d9aa5bf6ae2fc00cae65780d23 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 5 May 2014 08:33:49 -0500 Subject: PM / OPP: Remove cpufreq wrapper dependency on internal data organization CPUFREQ custom functions for OPP (Operating Performance Points) currently exist inside the OPP library. These custom functions currently depend on internal data structures to pick up OPP information to create the cpufreq table. For example, the cpufreq table is created precisely in the same order of how OPP entries are stored inside the list implementation. This kind of tight interdependency is purely artificial since the same functionality can be achieved using the generic OPP functions meant to do the same. This interdependency also limits the independent modification of cpufreq and OPP library. So use the generic dev_pm_opp_find_freq_ceil function that achieves the table organization as we currently use. As a result of this, we dont need to use the internal device_opp structure anymore, and we hence we can switch over to rcu lock instead of the mutex holding the internal list lock. This breaking of dependency on internal data structure imposes no change to usage of these. NOTE: This change is a precursor to moving this cpufreq specific logic out of the generic library into cpufreq. Cc: Kevin Hilman Signed-off-by: Nishanth Menon Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 55 ++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 25538675d59e..38b43bb20878 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -617,53 +617,54 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_disable); * the table if any of the mentioned functions have been invoked in the interim. * * Locking: The internal device_opp and opp structures are RCU protected. - * To simplify the logic, we pretend we are updater and hold relevant mutex here - * Callers should ensure that this function is *NOT* called under RCU protection - * or in contexts where mutex locking cannot be used. + * Since we just use the regular accessor functions to access the internal data + * structures, we use RCU read lock inside this function. As a result, users of + * this function DONOT need to use explicit locks for invoking. */ int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) { - struct device_opp *dev_opp; struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table; - int i = 0; + struct cpufreq_frequency_table *freq_table = NULL; + int i, max_opps, ret = 0; + unsigned long rate; - /* Pretend as if I am an updater */ - mutex_lock(&dev_opp_list_lock); + rcu_read_lock(); - dev_opp = find_device_opp(dev); - if (IS_ERR(dev_opp)) { - int r = PTR_ERR(dev_opp); - mutex_unlock(&dev_opp_list_lock); - dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r); - return r; + max_opps = dev_pm_opp_get_opp_count(dev); + if (max_opps <= 0) { + ret = max_opps ? max_opps : -ENODATA; + goto out; } - freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) * - (dev_pm_opp_get_opp_count(dev) + 1), GFP_KERNEL); + freq_table = kzalloc(sizeof(*freq_table) * (max_opps + 1), GFP_KERNEL); if (!freq_table) { - mutex_unlock(&dev_opp_list_lock); - dev_warn(dev, "%s: Unable to allocate frequency table\n", - __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - list_for_each_entry(opp, &dev_opp->opp_list, node) { - if (opp->available) { - freq_table[i].driver_data = i; - freq_table[i].frequency = opp->rate / 1000; - i++; + for (i = 0, rate = 0; i < max_opps; i++, rate++) { + /* find next rate */ + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out; } + freq_table[i].driver_data = i; + freq_table[i].frequency = rate / 1000; } - mutex_unlock(&dev_opp_list_lock); freq_table[i].driver_data = i; freq_table[i].frequency = CPUFREQ_TABLE_END; *table = &freq_table[0]; - return 0; +out: + rcu_read_unlock(); + if (ret) + kfree(freq_table); + + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table); -- cgit v1.2.3-70-g09d2 From a0dd7b79657bd6644b914d16ce7f23468c44a7b4 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 5 May 2014 08:33:50 -0500 Subject: PM / OPP: Move cpufreq specific OPP functions out of generic OPP library CPUFreq specific helper functions for OPP (Operating Performance Points) now use generic OPP functions that allow CPUFreq to be be moved back into CPUFreq framework. This allows for independent modifications or future enhancements as needed isolated to just CPUFreq framework alone. Here, we just move relevant code and documentation to make this part of CPUFreq infrastructure. Cc: Kevin Hilman Signed-off-by: Nishanth Menon Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.txt | 29 +++++++++++ Documentation/power/opp.txt | 40 ++------------- drivers/base/power/opp.c | 92 --------------------------------- drivers/cpufreq/Makefile | 2 + drivers/cpufreq/cpufreq_opp.c | 110 ++++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 21 ++++++++ include/linux/pm_opp.h | 20 -------- 7 files changed, 167 insertions(+), 147 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_opp.c (limited to 'drivers/base') diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index 0060d76b445f..70933eadc308 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -20,6 +20,7 @@ Contents: --------- 1. CPUFreq core and interfaces 2. CPUFreq notifiers +3. CPUFreq Table Generation with Operating Performance Point (OPP) 1. General Information ======================= @@ -92,3 +93,31 @@ values: cpu - number of the affected CPU old - old frequency new - new frequency + +3. CPUFreq Table Generation with Operating Performance Point (OPP) +================================================================== +For details about OPP, see Documentation/power/opp.txt + +dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with + cpufreq_frequency_table_cpuinfo which is provided with the list of + frequencies that are available for operation. This function provides + a ready to use conversion routine to translate the OPP layer's internal + information about the available frequencies into a format readily + providable to cpufreq. + + WARNING: Do not use this function in interrupt context. + + Example: + soc_pm_init() + { + /* Do things */ + r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); + if (!r) + cpufreq_frequency_table_cpuinfo(policy, freq_table); + /* Do other things */ + } + + NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in + addition to CONFIG_PM_OPP. + +dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt index b8a907dc0169..a9adad828cdc 100644 --- a/Documentation/power/opp.txt +++ b/Documentation/power/opp.txt @@ -10,8 +10,7 @@ Contents 3. OPP Search Functions 4. OPP Availability Control Functions 5. OPP Data Retrieval Functions -6. Cpufreq Table Generation -7. Data Structures +6. Data Structures 1. Introduction =============== @@ -72,7 +71,6 @@ operations until that OPP could be re-enabled if possible. OPP library facilitates this concept in it's implementation. The following operational functions operate only on available opps: opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count -and dev_pm_opp_init_cpufreq_table dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then be used for dev_pm_opp_enable/disable functions to make an opp available as required. @@ -96,10 +94,9 @@ using RCU read locks. The opp_find_freq_{exact,ceil,floor}, opp_get_{voltage, freq, opp_count} fall into this category. opp_{add,enable,disable} are updaters which use mutex and implement it's own -RCU locking mechanisms. dev_pm_opp_init_cpufreq_table acts as an updater and uses -mutex to implment RCU updater strategy. These functions should *NOT* be called -under RCU locks and other contexts that prevent blocking functions in RCU or -mutex operations from working. +RCU locking mechanisms. These functions should *NOT* be called under RCU locks +and other contexts that prevent blocking functions in RCU or mutex operations +from working. 2. Initial OPP List Registration ================================ @@ -311,34 +308,7 @@ dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device /* Do other things */ } -6. Cpufreq Table Generation -=========================== -dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with - cpufreq_frequency_table_cpuinfo which is provided with the list of - frequencies that are available for operation. This function provides - a ready to use conversion routine to translate the OPP layer's internal - information about the available frequencies into a format readily - providable to cpufreq. - - WARNING: Do not use this function in interrupt context. - - Example: - soc_pm_init() - { - /* Do things */ - r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); - if (!r) - cpufreq_frequency_table_cpuinfo(policy, freq_table); - /* Do other things */ - } - - NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in - addition to CONFIG_PM as power management feature is required to - dynamically scale voltage and frequency in a system. - -dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table - -7. Data Structures +6. Data Structures ================== Typically an SoC contains multiple voltage domains which are variable. Each domain is represented by a device pointer. The relationship to OPP can be diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 38b43bb20878..d9e376a6d19d 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -596,97 +595,6 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq) } EXPORT_SYMBOL_GPL(dev_pm_opp_disable); -#ifdef CONFIG_CPU_FREQ -/** - * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device - * @dev: device for which we do this operation - * @table: Cpufreq table returned back to caller - * - * Generate a cpufreq table for a provided device- this assumes that the - * opp list is already initialized and ready for usage. - * - * This function allocates required memory for the cpufreq table. It is - * expected that the caller does the required maintenance such as freeing - * the table as required. - * - * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM - * if no memory available for the operation (table is not populated), returns 0 - * if successful and table is populated. - * - * WARNING: It is important for the callers to ensure refreshing their copy of - * the table if any of the mentioned functions have been invoked in the interim. - * - * Locking: The internal device_opp and opp structures are RCU protected. - * Since we just use the regular accessor functions to access the internal data - * structures, we use RCU read lock inside this function. As a result, users of - * this function DONOT need to use explicit locks for invoking. - */ -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ - struct dev_pm_opp *opp; - struct cpufreq_frequency_table *freq_table = NULL; - int i, max_opps, ret = 0; - unsigned long rate; - - rcu_read_lock(); - - max_opps = dev_pm_opp_get_opp_count(dev); - if (max_opps <= 0) { - ret = max_opps ? max_opps : -ENODATA; - goto out; - } - - freq_table = kzalloc(sizeof(*freq_table) * (max_opps + 1), GFP_KERNEL); - if (!freq_table) { - ret = -ENOMEM; - goto out; - } - - for (i = 0, rate = 0; i < max_opps; i++, rate++) { - /* find next rate */ - opp = dev_pm_opp_find_freq_ceil(dev, &rate); - if (IS_ERR(opp)) { - ret = PTR_ERR(opp); - goto out; - } - freq_table[i].driver_data = i; - freq_table[i].frequency = rate / 1000; - } - - freq_table[i].driver_data = i; - freq_table[i].frequency = CPUFREQ_TABLE_END; - - *table = &freq_table[0]; - -out: - rcu_read_unlock(); - if (ret) - kfree(freq_table); - - return ret; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table); - -/** - * dev_pm_opp_free_cpufreq_table() - free the cpufreq table - * @dev: device for which we do this operation - * @table: table to free - * - * Free up the table allocated by dev_pm_opp_init_cpufreq_table - */ -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ - if (!table) - return; - - kfree(*table); - *table = NULL; -} -EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); -#endif /* CONFIG_CPU_FREQ */ - /** * dev_pm_opp_get_notifier() - find notifier_head of the device with opp * @dev: device pointer used to lookup device OPPs. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0dbb963c1aef..738c8b7b17dc 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -1,5 +1,7 @@ # CPUfreq core obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o +obj-$(CONFIG_PM_OPP) += cpufreq_opp.o + # CPUfreq stats obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o diff --git a/drivers/cpufreq/cpufreq_opp.c b/drivers/cpufreq/cpufreq_opp.c new file mode 100644 index 000000000000..c0c6f4a4eccf --- /dev/null +++ b/drivers/cpufreq/cpufreq_opp.c @@ -0,0 +1,110 @@ +/* + * Generic OPP helper interface for CPUFreq drivers + * + * Copyright (C) 2009-2014 Texas Instruments Incorporated. + * Nishanth Menon + * Romit Dasgupta + * Kevin Hilman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device + * @dev: device for which we do this operation + * @table: Cpufreq table returned back to caller + * + * Generate a cpufreq table for a provided device- this assumes that the + * opp list is already initialized and ready for usage. + * + * This function allocates required memory for the cpufreq table. It is + * expected that the caller does the required maintenance such as freeing + * the table as required. + * + * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM + * if no memory available for the operation (table is not populated), returns 0 + * if successful and table is populated. + * + * WARNING: It is important for the callers to ensure refreshing their copy of + * the table if any of the mentioned functions have been invoked in the interim. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Since we just use the regular accessor functions to access the internal data + * structures, we use RCU read lock inside this function. As a result, users of + * this function DONOT need to use explicit locks for invoking. + */ +int dev_pm_opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + struct dev_pm_opp *opp; + struct cpufreq_frequency_table *freq_table = NULL; + int i, max_opps, ret = 0; + unsigned long rate; + + rcu_read_lock(); + + max_opps = dev_pm_opp_get_opp_count(dev); + if (max_opps <= 0) { + ret = max_opps ? max_opps : -ENODATA; + goto out; + } + + freq_table = kzalloc(sizeof(*freq_table) * (max_opps + 1), GFP_KERNEL); + if (!freq_table) { + ret = -ENOMEM; + goto out; + } + + for (i = 0, rate = 0; i < max_opps; i++, rate++) { + /* find next rate */ + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto out; + } + freq_table[i].driver_data = i; + freq_table[i].frequency = rate / 1000; + } + + freq_table[i].driver_data = i; + freq_table[i].frequency = CPUFREQ_TABLE_END; + + *table = &freq_table[0]; + +out: + rcu_read_unlock(); + if (ret) + kfree(freq_table); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table); + +/** + * dev_pm_opp_free_cpufreq_table() - free the cpufreq table + * @dev: device for which we do this operation + * @table: table to free + * + * Free up the table allocated by dev_pm_opp_init_cpufreq_table + */ +void dev_pm_opp_free_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + if (!table) + return; + + kfree(*table); + *table = NULL; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f3822f836e14..9d803b529ac2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -469,6 +469,27 @@ struct cpufreq_frequency_table { * order */ }; +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int dev_pm_opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table); +void dev_pm_opp_free_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table); +#else +static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table + **table) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table + **table) +{ +} +#endif + + bool cpufreq_next_valid(struct cpufreq_frequency_table **pos); /* diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5151b0059585..0330217abfad 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -15,7 +15,6 @@ #define __LINUX_OPP_H__ #include -#include #include struct dev_pm_opp; @@ -117,23 +116,4 @@ static inline int of_init_opp_table(struct device *dev) } #endif -#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -#else -static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ - return -EINVAL; -} - -static inline -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table) -{ -} -#endif /* CONFIG_CPU_FREQ */ - #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3-70-g09d2 From aae4518b3124b29f8dc81c829c704fd2df72e98b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 May 2014 02:46:50 +0200 Subject: PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to resume all runtime-suspended devices during system suspend, mostly because those devices may need to be reprogrammed due to different wakeup settings for system sleep and for runtime PM. For some devices, though, it's OK to remain in runtime suspend throughout a complete system suspend/resume cycle (if the device was in runtime suspend at the start of the cycle). We would like to do this whenever possible, to avoid the overhead of extra power-up and power-down events. However, problems may arise because the device's descendants may require it to be at full power at various points during the cycle. Therefore the most straightforward way to do this safely is if the device and all its descendants can remain runtime suspended until the complete stage of system resume. To this end, introduce a new device PM flag, power.direct_complete and modify the PM core to use that flag as follows. If the ->prepare() callback of a device returns a positive number, the PM core will regard that as an indication that it may leave the device runtime-suspended. It will then check if the system power transition in progress is a suspend (and not hibernation in particular) and if the device is, indeed, runtime-suspended. In that case, the PM core will set the device's power.direct_complete flag. Otherwise it will clear power.direct_complete for the device and it also will later clear it for the device's parent (if there's one). Next, the PM core will not invoke the ->suspend() ->suspend_late(), ->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume() callbacks for all devices having power.direct_complete set. It will invoke their ->complete() callbacks, however, and those callbacks are then responsible for resuming the devices as appropriate, if necessary. For example, in some cases they may need to queue up runtime resume requests for the devices using pm_request_resume(). Changelog partly based on an Alan Stern's description of the idea (http://marc.info/?l=linux-pm&m=139940466625569&w=2). Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern --- drivers/base/power/main.c | 66 +++++++++++++++++++++++++++++++++++----------- include/linux/pm.h | 36 +++++++++++++++++++------ include/linux/pm_runtime.h | 6 +++++ 3 files changed, 85 insertions(+), 23 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 86d5e4fb5b98..343ffad59377 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -479,7 +479,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_noirq_suspended) @@ -605,7 +605,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_RESUME(0); - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_late_suspended) @@ -735,6 +735,12 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (dev->power.direct_complete) { + /* Match the pm_runtime_disable() in __device_suspend(). */ + pm_runtime_enable(dev); + goto Complete; + } + dpm_wait(dev->parent, async); dpm_watchdog_set(&wd, dev); device_lock(dev); @@ -1007,7 +1013,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a goto Complete; } - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Complete; dpm_wait_for_children(dev, async); @@ -1146,7 +1152,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as goto Complete; } - if (dev->power.syscore) + if (dev->power.syscore || dev->power.direct_complete) goto Complete; dpm_wait_for_children(dev, async); @@ -1332,6 +1338,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (dev->power.direct_complete) { + if (pm_runtime_status_suspended(dev)) { + pm_runtime_disable(dev); + if (pm_runtime_suspended_if_enabled(dev)) + goto Complete; + + pm_runtime_enable(dev); + } + dev->power.direct_complete = false; + } + dpm_watchdog_set(&wd, dev); device_lock(dev); @@ -1382,10 +1399,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) End: if (!error) { + struct device *parent = dev->parent; + dev->power.is_suspended = true; - if (dev->power.wakeup_path - && dev->parent && !dev->parent->power.ignore_children) - dev->parent->power.wakeup_path = true; + if (parent) { + spin_lock_irq(&parent->power.lock); + + dev->parent->power.direct_complete = false; + if (dev->power.wakeup_path + && !dev->parent->power.ignore_children) + dev->parent->power.wakeup_path = true; + + spin_unlock_irq(&parent->power.lock); + } } device_unlock(dev); @@ -1487,7 +1513,7 @@ static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; char *info = NULL; - int error = 0; + int ret = 0; if (dev->power.syscore) return 0; @@ -1523,17 +1549,27 @@ static int device_prepare(struct device *dev, pm_message_t state) callback = dev->driver->pm->prepare; } - if (callback) { - error = callback(dev); - suspend_report_result(callback, error); - } + if (callback) + ret = callback(dev); device_unlock(dev); - if (error) + if (ret < 0) { + suspend_report_result(callback, ret); pm_runtime_put(dev); - - return error; + return ret; + } + /* + * A positive return value from ->prepare() means "this device appears + * to be runtime-suspended and its state is fine, so if it really is + * runtime-suspended, you can leave it in that state provided that you + * will do the same thing with all of its descendants". This only + * applies to suspend transitions, however. + */ + spin_lock_irq(&dev->power.lock); + dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND; + spin_unlock_irq(&dev->power.lock); + return 0; } /** diff --git a/include/linux/pm.h b/include/linux/pm.h index d915d0345fa1..72c0fe098a27 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -93,13 +93,23 @@ typedef struct pm_message { * been registered) to recover from the race condition. * This method is executed for all kinds of suspend transitions and is * followed by one of the suspend callbacks: @suspend(), @freeze(), or - * @poweroff(). The PM core executes subsystem-level @prepare() for all - * devices before starting to invoke suspend callbacks for any of them, so - * generally devices may be assumed to be functional or to respond to - * runtime resume requests while @prepare() is being executed. However, - * device drivers may NOT assume anything about the availability of user - * space at that time and it is NOT valid to request firmware from within - * @prepare() (it's too late to do that). It also is NOT valid to allocate + * @poweroff(). If the transition is a suspend to memory or standby (that + * is, not related to hibernation), the return value of @prepare() may be + * used to indicate to the PM core to leave the device in runtime suspend + * if applicable. Namely, if @prepare() returns a positive number, the PM + * core will understand that as a declaration that the device appears to be + * runtime-suspended and it may be left in that state during the entire + * transition and during the subsequent resume if all of its descendants + * are left in runtime suspend too. If that happens, @complete() will be + * executed directly after @prepare() and it must ensure the proper + * functioning of the device after the system resume. + * The PM core executes subsystem-level @prepare() for all devices before + * starting to invoke suspend callbacks for any of them, so generally + * devices may be assumed to be functional or to respond to runtime resume + * requests while @prepare() is being executed. However, device drivers + * may NOT assume anything about the availability of user space at that + * time and it is NOT valid to request firmware from within @prepare() + * (it's too late to do that). It also is NOT valid to allocate * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. * [To work around these limitations, drivers may register suspend and * hibernation notifiers to be executed before the freezing of tasks.] @@ -112,7 +122,16 @@ typedef struct pm_message { * of the other devices that the PM core has unsuccessfully attempted to * suspend earlier). * The PM core executes subsystem-level @complete() after it has executed - * the appropriate resume callbacks for all devices. + * the appropriate resume callbacks for all devices. If the corresponding + * @prepare() at the beginning of the suspend transition returned a + * positive number and the device was left in runtime suspend (without + * executing any suspend and resume callbacks for it), @complete() will be + * the only callback executed for the device during resume. In that case, + * @complete() must be prepared to do whatever is necessary to ensure the + * proper functioning of the device after the system resume. To this end, + * @complete() can check the power.direct_complete flag of the device to + * learn whether (unset) or not (set) the previous suspend and resume + * callbacks have been executed for it. * * @suspend: Executed before putting the system into a sleep state in which the * contents of main memory are preserved. The exact action to perform @@ -546,6 +565,7 @@ struct dev_pm_info { bool is_late_suspended:1; bool ignore_children:1; bool early_init:1; /* Owned by the PM core */ + bool direct_complete:1; /* Owned by the PM core */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2a5897a4afbc..43fd6716f662 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -101,6 +101,11 @@ static inline bool pm_runtime_status_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } +static inline bool pm_runtime_suspended_if_enabled(struct device *dev) +{ + return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1; +} + static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; @@ -150,6 +155,7 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } +static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} -- cgit v1.2.3-70-g09d2 From 086abb58590a4df73e8a6ed71fd418826937cd46 Mon Sep 17 00:00:00 2001 From: Chander Kashyap Date: Fri, 16 May 2014 16:21:17 +0530 Subject: PM / OPP: fix incorrect OPP count handling in of_init_opp_table In of_init_opp_table function, if a failure to add an OPP is detected, the count of OPPs, yet to be added is not updated. Fix this by decrementing this count on failure as well. Signed-off-by: Chander Kashyap Signed-off-by: Inderpal Singh Acked-by: Viresh Kumar Cc: 3.7+ # 3.7+ Acked-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 25538675d59e..c539d70b97ab 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -734,11 +734,9 @@ int of_init_opp_table(struct device *dev) unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long volt = be32_to_cpup(val++); - if (dev_pm_opp_add(dev, freq, volt)) { + if (dev_pm_opp_add(dev, freq, volt)) dev_warn(dev, "%s: Failed to add OPP %ld\n", __func__, freq); - continue; - } nr -= 2; } -- cgit v1.2.3-70-g09d2 From 64ce854578de82e9e16280298562721ced971668 Mon Sep 17 00:00:00 2001 From: Chander Kashyap Date: Thu, 22 May 2014 10:36:26 +0530 Subject: PM / OPP: discard duplicate OPPs We don't have any protection against addition of duplicate OPPs currently and in case some code tries to add them, it will end up corrupting OPP tables. We need to handle some duplication cases separately as returning error might not be the right thing always. The new list of return values for dev_pm_opp_add() are: 0: On success OR Duplicate OPPs (both freq and volt are same) and opp->available -EEXIST: Freq are same and volt are different OR Duplicate OPPs (both freq and volt are same) and !opp->available -ENOMEM: Memory allocation failure Acked-by: Nishanth Menon Signed-off-by: Chander Kashyap Signed-off-by: Inderpal Singh Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index c539d70b97ab..39412c15db70 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -394,6 +394,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); * to keep the integrity of the internal data structures. Callers should ensure * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. + * + * Return: + * 0: On success OR + * Duplicate OPPs (both freq and volt are same) and opp->available + * -EEXIST: Freq are same and volt are different OR + * Duplicate OPPs (both freq and volt are same) and !opp->available + * -ENOMEM: Memory allocation failure */ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { @@ -443,15 +450,31 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) new_opp->u_volt = u_volt; new_opp->available = true; - /* Insert new OPP in order of increasing frequency */ + /* + * Insert new OPP in order of increasing frequency + * and discard if already present + */ head = &dev_opp->opp_list; list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { - if (new_opp->rate < opp->rate) + if (new_opp->rate <= opp->rate) break; else head = &opp->node; } + /* Duplicate OPPs ? */ + if (new_opp->rate == opp->rate) { + int ret = opp->available && new_opp->u_volt == opp->u_volt ? + 0 : -EEXIST; + + dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", + __func__, opp->rate, opp->u_volt, opp->available, + new_opp->rate, new_opp->u_volt, new_opp->available); + mutex_unlock(&dev_opp_list_lock); + kfree(new_opp); + return ret; + } + list_add_rcu(&new_opp->node, head); mutex_unlock(&dev_opp_list_lock); -- cgit v1.2.3-70-g09d2 From 0c5ff0ef80c2561ef20721299ecfc39c5a42f694 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 28 May 2014 15:23:35 +0800 Subject: PM / sleep: unregister wakeup source when disabling device wakeup When enabling a device' wakeup capability, a wakeup source is created for the device automatically. But the wakeup source is not unregistered when disabling the device' wakeup capability. This results in zombie wakeup sources, after devices/drivers are unregistered. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2d56f4113ae7..eb1bd2ecad8b 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -318,10 +318,16 @@ int device_init_wakeup(struct device *dev, bool enable) { int ret = 0; + if (!dev) + return -EINVAL; + if (enable) { device_set_wakeup_capable(dev, true); ret = device_wakeup_enable(dev); } else { + if (dev->power.can_wakeup) + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); } -- cgit v1.2.3-70-g09d2