提交 589e18a9 编写于 作者: R Rafael J. Wysocki

Merge branch 'pm-cpufreq'

* pm-cpufreq:
  cpufreq: cpufreq-cpu0: remove dependency on THERMAL and REGULATOR
  cpufreq: tegra: update comment for clarity
  cpufreq: intel_pstate: Remove duplicate CPU ID check
  cpufreq: Mark CPU0 driver with CPUFREQ_NEED_INITIAL_FREQ_CHECK flag
  cpufreq: governor: remove copy_prev_load from 'struct cpu_dbs_common_info'
  cpufreq: governor: Be friendly towards latency-sensitive bursty workloads
  cpufreq: ppc-corenet-cpu-freq: do_div use quotient
  Revert "cpufreq: Enable big.LITTLE cpufreq driver on arm64"
  cpufreq: Tegra: implement intermediate frequency callbacks
  cpufreq: add support for intermediate (stable) frequencies
......@@ -26,6 +26,7 @@ Contents:
1.4 target/target_index or setpolicy?
1.5 target/target_index
1.6 setpolicy
1.7 get_intermediate and target_intermediate
2. Frequency Table Helpers
......@@ -79,6 +80,10 @@ cpufreq_driver.attr - A pointer to a NULL-terminated list of
"struct freq_attr" which allow to
export values to sysfs.
cpufreq_driver.get_intermediate
and target_intermediate Used to switch to stable frequency while
changing CPU frequency.
1.2 Per-CPU Initialization
--------------------------
......@@ -151,7 +156,7 @@ Some cpufreq-capable processors switch the frequency between certain
limits on their own. These shall use the ->setpolicy call
1.4. target/target_index
1.5. target/target_index
-------------
The target_index call has two arguments: struct cpufreq_policy *policy,
......@@ -160,6 +165,9 @@ and unsigned int index (into the exposed frequency table).
The CPUfreq driver must set the new frequency when called here. The
actual frequency must be determined by freq_table[index].frequency.
It should always restore to earlier frequency (i.e. policy->restore_freq) in
case of errors, even if we switched to intermediate frequency earlier.
Deprecated:
----------
The target call has three arguments: struct cpufreq_policy *policy,
......@@ -179,7 +187,7 @@ Here again the frequency table helper might assist you - see section 2
for details.
1.5 setpolicy
1.6 setpolicy
---------------
The setpolicy call only takes a struct cpufreq_policy *policy as
......@@ -190,6 +198,23 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
the reference implementation in drivers/cpufreq/longrun.c
1.7 get_intermediate and target_intermediate
--------------------------------------------
Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
get_intermediate should return a stable intermediate frequency platform wants to
switch to, and target_intermediate() should set CPU to to that frequency, before
jumping to the frequency corresponding to 'index'. Core will take care of
sending notifications and driver doesn't have to handle them in
target_intermediate() or target_index().
Drivers can return '0' from get_intermediate() in case they don't wish to switch
to intermediate frequency for some target frequency. In that case core will
directly call ->target_index().
NOTE: ->target_index() should restore to policy->restore_freq in case of
failures as core would send notifications for that.
2. Frequency Table Helpers
......
......@@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
config GENERIC_CPUFREQ_CPU0
tristate "Generic CPU0 cpufreq driver"
depends on HAVE_CLK && REGULATOR && OF && THERMAL && CPU_THERMAL
depends on HAVE_CLK && OF
select PM_OPP
help
This adds a generic cpufreq driver for CPU0 frequency management.
......
......@@ -5,8 +5,7 @@
# big LITTLE core layer and glue drivers
config ARM_BIG_LITTLE_CPUFREQ
tristate "Generic ARM big LITTLE CPUfreq driver"
depends on (BIG_LITTLE && ARM_CPU_TOPOLOGY) || (ARM64 && SMP)
depends on HAVE_CLK
depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK
select PM_OPP
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
......
......@@ -104,7 +104,7 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
}
static struct cpufreq_driver cpu0_cpufreq_driver = {
.flags = CPUFREQ_STICKY,
.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = cpu0_set_target,
.get = cpufreq_generic_get,
......
......@@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
* GOVERNORS *
*********************************************************************/
/* Must set freqs->new to intermediate frequency */
static int __target_intermediate(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, int index)
{
int ret;
freqs->new = cpufreq_driver->get_intermediate(policy, index);
/* We don't need to switch to intermediate freq */
if (!freqs->new)
return 0;
pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
__func__, policy->cpu, freqs->old, freqs->new);
cpufreq_freq_transition_begin(policy, freqs);
ret = cpufreq_driver->target_intermediate(policy, index);
cpufreq_freq_transition_end(policy, freqs, ret);
if (ret)
pr_err("%s: Failed to change to intermediate frequency: %d\n",
__func__, ret);
return ret;
}
static int __target_index(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *freq_table, int index)
{
struct cpufreq_freqs freqs;
struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
unsigned int intermediate_freq = 0;
int retval = -EINVAL;
bool notify;
notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
if (notify) {
freqs.old = policy->cur;
freqs.new = freq_table[index].frequency;
freqs.flags = 0;
/* Handle switching to intermediate frequency */
if (cpufreq_driver->get_intermediate) {
retval = __target_intermediate(policy, &freqs, index);
if (retval)
return retval;
intermediate_freq = freqs.new;
/* Set old freq to intermediate */
if (intermediate_freq)
freqs.old = freqs.new;
}
freqs.new = freq_table[index].frequency;
pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
__func__, policy->cpu, freqs.old, freqs.new);
......@@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy,
pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
retval);
if (notify)
if (notify) {
cpufreq_freq_transition_end(policy, &freqs, retval);
/*
* Failed after setting to intermediate freq? Driver should have
* reverted back to initial frequency and so should we. Check
* here for intermediate_freq instead of get_intermediate, in
* case we have't switched to intermediate freq at all.
*/
if (unlikely(retval && intermediate_freq)) {
freqs.old = intermediate_freq;
freqs.new = policy->restore_freq;
cpufreq_freq_transition_begin(policy, &freqs);
cpufreq_freq_transition_end(policy, &freqs, 0);
}
}
return retval;
}
......@@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
if (target_freq == policy->cur)
return 0;
/* Save last value to restore later on errors */
policy->restore_freq = policy->cur;
if (cpufreq_driver->target)
retval = cpufreq_driver->target(policy, target_freq, relation);
else if (cpufreq_driver->target_index) {
......@@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
!(driver_data->setpolicy || driver_data->target_index ||
driver_data->target) ||
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)))
driver_data->target)) ||
(!!driver_data->get_intermediate != !!driver_data->target_intermediate))
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
......
......@@ -36,14 +36,29 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
struct cpufreq_policy *policy;
unsigned int sampling_rate;
unsigned int max_load = 0;
unsigned int ignore_nice;
unsigned int j;
if (dbs_data->cdata->governor == GOV_ONDEMAND)
if (dbs_data->cdata->governor == GOV_ONDEMAND) {
struct od_cpu_dbs_info_s *od_dbs_info =
dbs_data->cdata->get_cpu_dbs_info_s(cpu);
/*
* Sometimes, the ondemand governor uses an additional
* multiplier to give long delays. So apply this multiplier to
* the 'sampling_rate', so as to keep the wake-up-from-idle
* detection logic a bit conservative.
*/
sampling_rate = od_tuners->sampling_rate;
sampling_rate *= od_dbs_info->rate_mult;
ignore_nice = od_tuners->ignore_nice_load;
else
} else {
sampling_rate = cs_tuners->sampling_rate;
ignore_nice = cs_tuners->ignore_nice_load;
}
policy = cdbs->cur_policy;
......@@ -96,7 +111,46 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
if (unlikely(!wall_time || wall_time < idle_time))
continue;
/*
* If the CPU had gone completely idle, and a task just woke up
* on this CPU now, it would be unfair to calculate 'load' the
* usual way for this elapsed time-window, because it will show
* near-zero load, irrespective of how CPU intensive that task
* actually is. This is undesirable for latency-sensitive bursty
* workloads.
*
* To avoid this, we reuse the 'load' from the previous
* time-window and give this task a chance to start with a
* reasonably high CPU frequency. (However, we shouldn't over-do
* this copy, lest we get stuck at a high load (high frequency)
* for too long, even when the current system load has actually
* dropped down. So we perform the copy only once, upon the
* first wake-up from idle.)
*
* Detecting this situation is easy: the governor's deferrable
* timer would not have fired during CPU-idle periods. Hence
* an unusually large 'wall_time' (as compared to the sampling
* rate) indicates this scenario.
*
* prev_load can be zero in two cases and we must recalculate it
* for both cases:
* - during long idle intervals
* - explicitly set to zero
*/
if (unlikely(wall_time > (2 * sampling_rate) &&
j_cdbs->prev_load)) {
load = j_cdbs->prev_load;
/*
* Perform a destructive copy, to ensure that we copy
* the previous load only once, upon the first wake-up
* from idle.
*/
j_cdbs->prev_load = 0;
} else {
load = 100 * (wall_time - idle_time) / wall_time;
j_cdbs->prev_load = load;
}
if (load > max_load)
max_load = load;
......@@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_common_info *j_cdbs =
dbs_data->cdata->get_cpu_cdbs(j);
unsigned int prev_load;
j_cdbs->cpu = j;
j_cdbs->cur_policy = policy;
j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
&j_cdbs->prev_cpu_wall, io_busy);
prev_load = (unsigned int)
(j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle);
j_cdbs->prev_load = 100 * prev_load /
(unsigned int) j_cdbs->prev_cpu_wall;
if (ignore_nice)
j_cdbs->prev_cpu_nice =
kcpustat_cpu(j).cpustat[CPUTIME_NICE];
......
......@@ -134,6 +134,13 @@ struct cpu_dbs_common_info {
u64 prev_cpu_idle;
u64 prev_cpu_wall;
u64 prev_cpu_nice;
/*
* Used to keep track of load in the previous interval. However, when
* explicitly set to zero, it is used as a flag to ensure that we copy
* the previous load to the current interval only once, upon the first
* wake-up from idle.
*/
unsigned int prev_load;
struct cpufreq_policy *cur_policy;
struct delayed_work work;
/*
......
......@@ -691,14 +691,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
static int intel_pstate_init_cpu(unsigned int cpunum)
{
const struct x86_cpu_id *id;
struct cpudata *cpu;
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id)
return -ENODEV;
all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
if (!all_cpu_data[cpunum])
return -ENOMEM;
......
......@@ -138,7 +138,7 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy)
struct cpufreq_frequency_table *table;
struct cpu_data *data;
unsigned int cpu = policy->cpu;
u64 transition_latency_hz;
u64 u64temp;
np = of_get_cpu_node(cpu, NULL);
if (!np)
......@@ -206,9 +206,10 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy)
for_each_cpu(i, per_cpu(cpu_mask, cpu))
per_cpu(cpu_data, i) = data;
transition_latency_hz = 12ULL * NSEC_PER_SEC;
policy->cpuinfo.transition_latency =
do_div(transition_latency_hz, fsl_get_sys_freq());
/* Minimum transition latency is 12 platform clocks */
u64temp = 12ULL * NSEC_PER_SEC;
do_div(u64temp, fsl_get_sys_freq());
policy->cpuinfo.transition_latency = u64temp + 1;
of_node_put(np);
......
......@@ -45,46 +45,54 @@ static struct clk *cpu_clk;
static struct clk *pll_x_clk;
static struct clk *pll_p_clk;
static struct clk *emc_clk;
static bool pll_x_prepared;
static int tegra_cpu_clk_set_rate(unsigned long rate)
static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy,
unsigned int index)
{
unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
/*
* Don't switch to intermediate freq if:
* - we are already at it, i.e. policy->cur == ifreq
* - index corresponds to ifreq
*/
if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq))
return 0;
return ifreq;
}
static int tegra_target_intermediate(struct cpufreq_policy *policy,
unsigned int index)
{
int ret;
/*
* Take an extra reference to the main pll so it doesn't turn
* off when we move the cpu off of it
* off when we move the cpu off of it as enabling it again while we
* switch to it from tegra_target() would take additional time.
*
* When target-freq is equal to intermediate freq we don't need to
* switch to an intermediate freq and so this routine isn't called.
* Also, we wouldn't be using pll_x anymore and must not take extra
* reference to it, as it can be disabled now to save some power.
*/
clk_prepare_enable(pll_x_clk);
ret = clk_set_parent(cpu_clk, pll_p_clk);
if (ret) {
pr_err("Failed to switch cpu to clock pll_p\n");
goto out;
}
if (rate == clk_get_rate(pll_p_clk))
goto out;
ret = clk_set_rate(pll_x_clk, rate);
if (ret) {
pr_err("Failed to change pll_x to %lu\n", rate);
goto out;
}
ret = clk_set_parent(cpu_clk, pll_x_clk);
if (ret) {
pr_err("Failed to switch cpu to clock pll_x\n");
goto out;
}
out:
if (ret)
clk_disable_unprepare(pll_x_clk);
else
pll_x_prepared = true;
return ret;
}
static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
{
unsigned long rate = freq_table[index].frequency;
unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
int ret = 0;
/*
......@@ -98,10 +106,30 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
else
clk_set_rate(emc_clk, 100000000); /* emc 50Mhz */
ret = tegra_cpu_clk_set_rate(rate * 1000);
/*
* target freq == pll_p, don't need to take extra reference to pll_x_clk
* as it isn't used anymore.
*/
if (rate == ifreq)
return clk_set_parent(cpu_clk, pll_p_clk);
ret = clk_set_rate(pll_x_clk, rate * 1000);
/* Restore to earlier frequency on error, i.e. pll_x */
if (ret)
pr_err("cpu-tegra: Failed to set cpu frequency to %lu kHz\n",
rate);
pr_err("Failed to change pll_x to %lu\n", rate);
ret = clk_set_parent(cpu_clk, pll_x_clk);
/* This shouldn't fail while changing or restoring */
WARN_ON(ret);
/*
* Drop count to pll_x clock only if we switched to intermediate freq
* earlier while transitioning to a target frequency.
*/
if (pll_x_prepared) {
clk_disable_unprepare(pll_x_clk);
pll_x_prepared = false;
}
return ret;
}
......@@ -139,6 +167,8 @@ static int tegra_cpu_exit(struct cpufreq_policy *policy)
static struct cpufreq_driver tegra_cpufreq_driver = {
.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.get_intermediate = tegra_get_intermediate,
.target_intermediate = tegra_target_intermediate,
.target_index = tegra_target,
.get = cpufreq_generic_get,
.init = tegra_cpu_init,
......
......@@ -75,6 +75,7 @@ struct cpufreq_policy {
unsigned int max; /* in kHz */
unsigned int cur; /* in kHz, only needed if cpufreq
* governors are used */
unsigned int restore_freq; /* = policy->cur before transition */
unsigned int suspend_freq; /* freq to set during suspend */
unsigned int policy; /* see above */
......@@ -221,11 +222,35 @@ struct cpufreq_driver {
/* define one out of two */
int (*setpolicy) (struct cpufreq_policy *policy);
/*
* On failure, should always restore frequency to policy->restore_freq
* (i.e. old freq).
*/
int (*target) (struct cpufreq_policy *policy, /* Deprecated */
unsigned int target_freq,
unsigned int relation);
int (*target_index) (struct cpufreq_policy *policy,
unsigned int index);
/*
* Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
* unset.
*
* get_intermediate should return a stable intermediate frequency
* platform wants to switch to and target_intermediate() should set CPU
* to to that frequency, before jumping to the frequency corresponding
* to 'index'. Core will take care of sending notifications and driver
* doesn't have to handle them in target_intermediate() or
* target_index().
*
* Drivers can return '0' from get_intermediate() in case they don't
* wish to switch to intermediate frequency for some target frequency.
* In that case core will directly call ->target_index().
*/
unsigned int (*get_intermediate)(struct cpufreq_policy *policy,
unsigned int index);
int (*target_intermediate)(struct cpufreq_policy *policy,
unsigned int index);
/* should be defined, if possible */
unsigned int (*get) (unsigned int cpu);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册