提交 4ed39004 编写于 作者: R Rafael J. Wysocki

Merge branch 'pm-cpufreq'

* pm-cpufreq: (94 commits)
  intel_pstate: Do not skip samples partially
  intel_pstate: Remove freq calculation from intel_pstate_calc_busy()
  intel_pstate: Move intel_pstate_calc_busy() into get_target_pstate_use_performance()
  intel_pstate: Optimize calculation for max/min_perf_adj
  intel_pstate: Remove extra conversions in pid calculation
  cpufreq: Move scheduler-related code to the sched directory
  Revert "cpufreq: postfix policy directory with the first CPU in related_cpus"
  cpufreq: Reduce cpufreq_update_util() overhead a bit
  cpufreq: Select IRQ_WORK if CPU_FREQ_GOV_COMMON is set
  cpufreq: Remove 'policy->governor_enabled'
  cpufreq: Rename __cpufreq_governor() to cpufreq_governor()
  cpufreq: Relocate handle_update() to kill its declaration
  cpufreq: governor: Drop unnecessary checks from show() and store()
  cpufreq: governor: Fix race in dbs_update_util_handler()
  cpufreq: governor: Make gov_set_update_util() static
  cpufreq: governor: Narrow down the dbs_data_mutex coverage
  cpufreq: governor: Make dbs_data_mutex static
  cpufreq: governor: Relocate definitions of tuners structures
  cpufreq: governor: Move per-CPU data to the common code
  cpufreq: governor: Make governor private data per-policy
  ...
...@@ -25,7 +25,7 @@ callback, so cpufreq core can't request a transition to a specific frequency. ...@@ -25,7 +25,7 @@ callback, so cpufreq core can't request a transition to a specific frequency.
The driver provides minimum and maximum frequency limits and callbacks to set a The driver provides minimum and maximum frequency limits and callbacks to set a
policy. The policy in cpufreq sysfs is referred to as the "scaling governor". policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
The cpufreq core can request the driver to operate in any of the two policies: The cpufreq core can request the driver to operate in any of the two policies:
"performance: and "powersave". The driver decides which frequency to use based "performance" and "powersave". The driver decides which frequency to use based
on the above policy selection considering minimum and maximum frequency limits. on the above policy selection considering minimum and maximum frequency limits.
The Intel P-State driver falls under the latter category, which implements the The Intel P-State driver falls under the latter category, which implements the
......
...@@ -19,6 +19,7 @@ config CPU_FREQ ...@@ -19,6 +19,7 @@ config CPU_FREQ
if CPU_FREQ if CPU_FREQ
config CPU_FREQ_GOV_COMMON config CPU_FREQ_GOV_COMMON
select IRQ_WORK
bool bool
config CPU_FREQ_BOOST_SW config CPU_FREQ_BOOST_SW
......
...@@ -70,6 +70,8 @@ struct acpi_cpufreq_data { ...@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
unsigned int cpu_feature; unsigned int cpu_feature;
unsigned int acpi_perf_cpu; unsigned int acpi_perf_cpu;
cpumask_var_t freqdomain_cpus; cpumask_var_t freqdomain_cpus;
void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
}; };
/* acpi_perf_data is a pointer to percpu data. */ /* acpi_perf_data is a pointer to percpu data. */
...@@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) ...@@ -243,125 +245,119 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
} }
} }
struct msr_addr { u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
u32 reg; {
}; u32 val, dummy;
struct io_addr { rdmsr(MSR_IA32_PERF_CTL, val, dummy);
u16 port; return val;
u8 bit_width; }
};
void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
{
u32 lo, hi;
rdmsr(MSR_IA32_PERF_CTL, lo, hi);
lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
wrmsr(MSR_IA32_PERF_CTL, lo, hi);
}
u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
{
u32 val, dummy;
rdmsr(MSR_AMD_PERF_CTL, val, dummy);
return val;
}
void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
{
wrmsr(MSR_AMD_PERF_CTL, val, 0);
}
u32 cpu_freq_read_io(struct acpi_pct_register *reg)
{
u32 val;
acpi_os_read_port(reg->address, &val, reg->bit_width);
return val;
}
void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
{
acpi_os_write_port(reg->address, val, reg->bit_width);
}
struct drv_cmd { struct drv_cmd {
unsigned int type; struct acpi_pct_register *reg;
const struct cpumask *mask;
union {
struct msr_addr msr;
struct io_addr io;
} addr;
u32 val; u32 val;
union {
void (*write)(struct acpi_pct_register *reg, u32 val);
u32 (*read)(struct acpi_pct_register *reg);
} func;
}; };
/* Called via smp_call_function_single(), on the target CPU */ /* Called via smp_call_function_single(), on the target CPU */
static void do_drv_read(void *_cmd) static void do_drv_read(void *_cmd)
{ {
struct drv_cmd *cmd = _cmd; struct drv_cmd *cmd = _cmd;
u32 h;
switch (cmd->type) { cmd->val = cmd->func.read(cmd->reg);
case SYSTEM_INTEL_MSR_CAPABLE:
case SYSTEM_AMD_MSR_CAPABLE:
rdmsr(cmd->addr.msr.reg, cmd->val, h);
break;
case SYSTEM_IO_CAPABLE:
acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
&cmd->val,
(u32)cmd->addr.io.bit_width);
break;
default:
break;
}
} }
/* Called via smp_call_function_many(), on the target CPUs */ static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
static void do_drv_write(void *_cmd)
{ {
struct drv_cmd *cmd = _cmd; struct acpi_processor_performance *perf = to_perf_data(data);
u32 lo, hi; struct drv_cmd cmd = {
.reg = &perf->control_register,
.func.read = data->cpu_freq_read,
};
int err;
switch (cmd->type) { err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
case SYSTEM_INTEL_MSR_CAPABLE: WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
rdmsr(cmd->addr.msr.reg, lo, hi); return cmd.val;
lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
wrmsr(cmd->addr.msr.reg, lo, hi);
break;
case SYSTEM_AMD_MSR_CAPABLE:
wrmsr(cmd->addr.msr.reg, cmd->val, 0);
break;
case SYSTEM_IO_CAPABLE:
acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
cmd->val,
(u32)cmd->addr.io.bit_width);
break;
default:
break;
}
} }
static void drv_read(struct drv_cmd *cmd) /* Called via smp_call_function_many(), on the target CPUs */
static void do_drv_write(void *_cmd)
{ {
int err; struct drv_cmd *cmd = _cmd;
cmd->val = 0;
err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); cmd->func.write(cmd->reg, cmd->val);
WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
} }
static void drv_write(struct drv_cmd *cmd) static void drv_write(struct acpi_cpufreq_data *data,
const struct cpumask *mask, u32 val)
{ {
struct acpi_processor_performance *perf = to_perf_data(data);
struct drv_cmd cmd = {
.reg = &perf->control_register,
.val = val,
.func.write = data->cpu_freq_write,
};
int this_cpu; int this_cpu;
this_cpu = get_cpu(); this_cpu = get_cpu();
if (cpumask_test_cpu(this_cpu, cmd->mask)) if (cpumask_test_cpu(this_cpu, mask))
do_drv_write(cmd); do_drv_write(&cmd);
smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
smp_call_function_many(mask, do_drv_write, &cmd, 1);
put_cpu(); put_cpu();
} }
static u32 static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
{ {
struct acpi_processor_performance *perf; u32 val;
struct drv_cmd cmd;
if (unlikely(cpumask_empty(mask))) if (unlikely(cpumask_empty(mask)))
return 0; return 0;
switch (data->cpu_feature) { val = drv_read(data, mask);
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
break;
case SYSTEM_AMD_MSR_CAPABLE:
cmd.type = SYSTEM_AMD_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
perf = to_perf_data(data);
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
default:
return 0;
}
cmd.mask = mask;
drv_read(&cmd);
pr_debug("get_cur_val = %u\n", cmd.val); pr_debug("get_cur_val = %u\n", val);
return cmd.val; return val;
} }
static unsigned int get_cur_freq_on_cpu(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
...@@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -416,7 +412,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
{ {
struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_cpufreq_data *data = policy->driver_data;
struct acpi_processor_performance *perf; struct acpi_processor_performance *perf;
struct drv_cmd cmd; const struct cpumask *mask;
unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int next_perf_state = 0; /* Index into perf table */
int result = 0; int result = 0;
...@@ -434,42 +430,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -434,42 +430,21 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
} else { } else {
pr_debug("Already at target state (P%d)\n", pr_debug("Already at target state (P%d)\n",
next_perf_state); next_perf_state);
goto out; return 0;
} }
} }
switch (data->cpu_feature) { /*
case SYSTEM_INTEL_MSR_CAPABLE: * The core won't allow CPUs to go away until the governor has been
cmd.type = SYSTEM_INTEL_MSR_CAPABLE; * stopped, so we can rely on the stability of policy->cpus.
cmd.addr.msr.reg = MSR_IA32_PERF_CTL; */
cmd.val = (u32) perf->states[next_perf_state].control; mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
break; cpumask_of(policy->cpu) : policy->cpus;
case SYSTEM_AMD_MSR_CAPABLE:
cmd.type = SYSTEM_AMD_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
cmd.val = (u32) perf->states[next_perf_state].control;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
cmd.val = (u32) perf->states[next_perf_state].control;
break;
default:
result = -ENODEV;
goto out;
}
/* cpufreq holds the hotplug lock, so we are safe from here on */
if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
cmd.mask = policy->cpus;
else
cmd.mask = cpumask_of(policy->cpu);
drv_write(&cmd); drv_write(data, mask, perf->states[next_perf_state].control);
if (acpi_pstate_strict) { if (acpi_pstate_strict) {
if (!check_freqs(cmd.mask, data->freq_table[index].frequency, if (!check_freqs(mask, data->freq_table[index].frequency,
data)) { data)) {
pr_debug("acpi_cpufreq_target failed (%d)\n", pr_debug("acpi_cpufreq_target failed (%d)\n",
policy->cpu); policy->cpu);
...@@ -480,7 +455,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, ...@@ -480,7 +455,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
if (!result) if (!result)
perf->state = next_perf_state; perf->state = next_perf_state;
out:
return result; return result;
} }
...@@ -740,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) ...@@ -740,15 +714,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
} }
pr_debug("SYSTEM IO addr space\n"); pr_debug("SYSTEM IO addr space\n");
data->cpu_feature = SYSTEM_IO_CAPABLE; data->cpu_feature = SYSTEM_IO_CAPABLE;
data->cpu_freq_read = cpu_freq_read_io;
data->cpu_freq_write = cpu_freq_write_io;
break; break;
case ACPI_ADR_SPACE_FIXED_HARDWARE: case ACPI_ADR_SPACE_FIXED_HARDWARE:
pr_debug("HARDWARE addr space\n"); pr_debug("HARDWARE addr space\n");
if (check_est_cpu(cpu)) { if (check_est_cpu(cpu)) {
data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
data->cpu_freq_read = cpu_freq_read_intel;
data->cpu_freq_write = cpu_freq_write_intel;
break; break;
} }
if (check_amd_hwpstate_cpu(cpu)) { if (check_amd_hwpstate_cpu(cpu)) {
data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
data->cpu_freq_read = cpu_freq_read_amd;
data->cpu_freq_write = cpu_freq_write_amd;
break; break;
} }
result = -ENODEV; result = -ENODEV;
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include "cpufreq_governor.h" #include "cpufreq_ondemand.h"
#define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080
#define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081
...@@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, ...@@ -45,10 +45,10 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
long d_actual, d_reference; long d_actual, d_reference;
struct msr actual, reference; struct msr actual, reference;
struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu);
struct dbs_data *od_data = policy->governor_data; struct policy_dbs_info *policy_dbs = policy->governor_data;
struct dbs_data *od_data = policy_dbs->dbs_data;
struct od_dbs_tuners *od_tuners = od_data->tuners; struct od_dbs_tuners *od_tuners = od_data->tuners;
struct od_cpu_dbs_info_s *od_info = struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs);
od_data->cdata->get_cpu_dbs_info_s(policy->cpu);
if (!od_info->freq_table) if (!od_info->freq_table)
return freq_next; return freq_next;
......
...@@ -31,9 +31,8 @@ ...@@ -31,9 +31,8 @@
struct private_data { struct private_data {
struct device *cpu_dev; struct device *cpu_dev;
struct regulator *cpu_reg;
struct thermal_cooling_device *cdev; struct thermal_cooling_device *cdev;
unsigned int voltage_tolerance; /* in percentage */ const char *reg_name;
}; };
static struct freq_attr *cpufreq_dt_attr[] = { static struct freq_attr *cpufreq_dt_attr[] = {
...@@ -44,175 +43,128 @@ static struct freq_attr *cpufreq_dt_attr[] = { ...@@ -44,175 +43,128 @@ static struct freq_attr *cpufreq_dt_attr[] = {
static int set_target(struct cpufreq_policy *policy, unsigned int index) static int set_target(struct cpufreq_policy *policy, unsigned int index)
{ {
struct dev_pm_opp *opp;
struct cpufreq_frequency_table *freq_table = policy->freq_table;
struct clk *cpu_clk = policy->clk;
struct private_data *priv = policy->driver_data; struct private_data *priv = policy->driver_data;
struct device *cpu_dev = priv->cpu_dev;
struct regulator *cpu_reg = priv->cpu_reg;
unsigned long volt = 0, tol = 0;
int volt_old = 0;
unsigned int old_freq, new_freq;
long freq_Hz, freq_exact;
int ret;
freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
if (freq_Hz <= 0)
freq_Hz = freq_table[index].frequency * 1000;
freq_exact = freq_Hz; return dev_pm_opp_set_rate(priv->cpu_dev,
new_freq = freq_Hz / 1000; policy->freq_table[index].frequency * 1000);
old_freq = clk_get_rate(cpu_clk) / 1000; }
if (!IS_ERR(cpu_reg)) { /*
unsigned long opp_freq; * An earlier version of opp-v1 bindings used to name the regulator
* "cpu0-supply", we still need to handle that for backwards compatibility.
*/
static const char *find_supply_name(struct device *dev)
{
struct device_node *np;
struct property *pp;
int cpu = dev->id;
const char *name = NULL;
rcu_read_lock(); np = of_node_get(dev->of_node);
opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz);
if (IS_ERR(opp)) {
rcu_read_unlock();
dev_err(cpu_dev, "failed to find OPP for %ld\n",
freq_Hz);
return PTR_ERR(opp);
}
volt = dev_pm_opp_get_voltage(opp);
opp_freq = dev_pm_opp_get_freq(opp);
rcu_read_unlock();
tol = volt * priv->voltage_tolerance / 100;
volt_old = regulator_get_voltage(cpu_reg);
dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n",
opp_freq / 1000, volt);
}
dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", /* This must be valid for sure */
old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, if (WARN_ON(!np))
new_freq / 1000, volt ? volt / 1000 : -1); return NULL;
/* scaling up? scale voltage before frequency */ /* Try "cpu0" for older DTs */
if (!IS_ERR(cpu_reg) && new_freq > old_freq) { if (!cpu) {
ret = regulator_set_voltage_tol(cpu_reg, volt, tol); pp = of_find_property(np, "cpu0-supply", NULL);
if (ret) { if (pp) {
dev_err(cpu_dev, "failed to scale voltage up: %d\n", name = "cpu0";
ret); goto node_put;
return ret;
} }
} }
ret = clk_set_rate(cpu_clk, freq_exact); pp = of_find_property(np, "cpu-supply", NULL);
if (ret) { if (pp) {
dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); name = "cpu";
if (!IS_ERR(cpu_reg) && volt_old > 0) goto node_put;
regulator_set_voltage_tol(cpu_reg, volt_old, tol);
return ret;
} }
/* scaling down? scale voltage after frequency */ dev_dbg(dev, "no regulator for cpu%d\n", cpu);
if (!IS_ERR(cpu_reg) && new_freq < old_freq) { node_put:
ret = regulator_set_voltage_tol(cpu_reg, volt, tol); of_node_put(np);
if (ret) { return name;
dev_err(cpu_dev, "failed to scale voltage down: %d\n",
ret);
clk_set_rate(cpu_clk, old_freq * 1000);
}
}
return ret;
} }
static int allocate_resources(int cpu, struct device **cdev, static int resources_available(void)
struct regulator **creg, struct clk **cclk)
{ {
struct device *cpu_dev; struct device *cpu_dev;
struct regulator *cpu_reg; struct regulator *cpu_reg;
struct clk *cpu_clk; struct clk *cpu_clk;
int ret = 0; int ret = 0;
char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; const char *name;
cpu_dev = get_cpu_device(cpu); cpu_dev = get_cpu_device(0);
if (!cpu_dev) { if (!cpu_dev) {
pr_err("failed to get cpu%d device\n", cpu); pr_err("failed to get cpu0 device\n");
return -ENODEV; return -ENODEV;
} }
/* Try "cpu0" for older DTs */ cpu_clk = clk_get(cpu_dev, NULL);
if (!cpu) ret = PTR_ERR_OR_ZERO(cpu_clk);
reg = reg_cpu0;
else
reg = reg_cpu;
try_again:
cpu_reg = regulator_get_optional(cpu_dev, reg);
ret = PTR_ERR_OR_ZERO(cpu_reg);
if (ret) { if (ret) {
/* /*
* If cpu's regulator supply node is present, but regulator is * If cpu's clk node is present, but clock is not yet
* not yet registered, we should try defering probe. * registered, we should try defering probe.
*/ */
if (ret == -EPROBE_DEFER) { if (ret == -EPROBE_DEFER)
dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", dev_dbg(cpu_dev, "clock not ready, retry\n");
cpu); else
return ret; dev_err(cpu_dev, "failed to get clock: %d\n", ret);
}
/* Try with "cpu-supply" */
if (reg == reg_cpu0) {
reg = reg_cpu;
goto try_again;
}
dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); return ret;
} }
cpu_clk = clk_get(cpu_dev, NULL); clk_put(cpu_clk);
ret = PTR_ERR_OR_ZERO(cpu_clk);
if (ret) {
/* put regulator */
if (!IS_ERR(cpu_reg))
regulator_put(cpu_reg);
name = find_supply_name(cpu_dev);
/* Platform doesn't require regulator */
if (!name)
return 0;
cpu_reg = regulator_get_optional(cpu_dev, name);
ret = PTR_ERR_OR_ZERO(cpu_reg);
if (ret) {
/* /*
* If cpu's clk node is present, but clock is not yet * If cpu's regulator supply node is present, but regulator is
* registered, we should try defering probe. * not yet registered, we should try defering probe.
*/ */
if (ret == -EPROBE_DEFER) if (ret == -EPROBE_DEFER)
dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n");
else else
dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret);
ret);
} else { return ret;
*cdev = cpu_dev;
*creg = cpu_reg;
*cclk = cpu_clk;
} }
return ret; regulator_put(cpu_reg);
return 0;
} }
static int cpufreq_init(struct cpufreq_policy *policy) static int cpufreq_init(struct cpufreq_policy *policy)
{ {
struct cpufreq_frequency_table *freq_table; struct cpufreq_frequency_table *freq_table;
struct device_node *np;
struct private_data *priv; struct private_data *priv;
struct device *cpu_dev; struct device *cpu_dev;
struct regulator *cpu_reg;
struct clk *cpu_clk; struct clk *cpu_clk;
struct dev_pm_opp *suspend_opp; struct dev_pm_opp *suspend_opp;
unsigned long min_uV = ~0, max_uV = 0;
unsigned int transition_latency; unsigned int transition_latency;
bool need_update = false; bool opp_v1 = false;
const char *name;
int ret; int ret;
ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); cpu_dev = get_cpu_device(policy->cpu);
if (ret) { if (!cpu_dev) {
pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); pr_err("failed to get cpu%d device\n", policy->cpu);
return ret; return -ENODEV;
} }
np = of_node_get(cpu_dev->of_node); cpu_clk = clk_get(cpu_dev, NULL);
if (!np) { if (IS_ERR(cpu_clk)) {
dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); ret = PTR_ERR(cpu_clk);
ret = -ENOENT; dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret);
goto out_put_reg_clk; return ret;
} }
/* Get OPP-sharing information from "operating-points-v2" bindings */ /* Get OPP-sharing information from "operating-points-v2" bindings */
...@@ -223,9 +175,23 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -223,9 +175,23 @@ static int cpufreq_init(struct cpufreq_policy *policy)
* finding shared-OPPs for backward compatibility. * finding shared-OPPs for backward compatibility.
*/ */
if (ret == -ENOENT) if (ret == -ENOENT)
need_update = true; opp_v1 = true;
else else
goto out_node_put; goto out_put_clk;
}
/*
* OPP layer will be taking care of regulators now, but it needs to know
* the name of the regulator first.
*/
name = find_supply_name(cpu_dev);
if (name) {
ret = dev_pm_opp_set_regulator(cpu_dev, name);
if (ret) {
dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
policy->cpu, ret);
goto out_put_clk;
}
} }
/* /*
...@@ -246,12 +212,12 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -246,12 +212,12 @@ static int cpufreq_init(struct cpufreq_policy *policy)
*/ */
ret = dev_pm_opp_get_opp_count(cpu_dev); ret = dev_pm_opp_get_opp_count(cpu_dev);
if (ret <= 0) { if (ret <= 0) {
pr_debug("OPP table is not ready, deferring probe\n"); dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n");
ret = -EPROBE_DEFER; ret = -EPROBE_DEFER;
goto out_free_opp; goto out_free_opp;
} }
if (need_update) { if (opp_v1) {
struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
if (!pd || !pd->independent_clocks) if (!pd || !pd->independent_clocks)
...@@ -265,10 +231,6 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -265,10 +231,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
if (ret) if (ret)
dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
__func__, ret); __func__, ret);
of_property_read_u32(np, "clock-latency", &transition_latency);
} else {
transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
} }
priv = kzalloc(sizeof(*priv), GFP_KERNEL); priv = kzalloc(sizeof(*priv), GFP_KERNEL);
...@@ -277,62 +239,16 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -277,62 +239,16 @@ static int cpufreq_init(struct cpufreq_policy *policy)
goto out_free_opp; goto out_free_opp;
} }
of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); priv->reg_name = name;
if (!transition_latency)
transition_latency = CPUFREQ_ETERNAL;
if (!IS_ERR(cpu_reg)) {
unsigned long opp_freq = 0;
/*
* Disable any OPPs where the connected regulator isn't able to
* provide the specified voltage and record minimum and maximum
* voltage levels.
*/
while (1) {
struct dev_pm_opp *opp;
unsigned long opp_uV, tol_uV;
rcu_read_lock();
opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq);
if (IS_ERR(opp)) {
rcu_read_unlock();
break;
}
opp_uV = dev_pm_opp_get_voltage(opp);
rcu_read_unlock();
tol_uV = opp_uV * priv->voltage_tolerance / 100;
if (regulator_is_supported_voltage(cpu_reg,
opp_uV - tol_uV,
opp_uV + tol_uV)) {
if (opp_uV < min_uV)
min_uV = opp_uV;
if (opp_uV > max_uV)
max_uV = opp_uV;
} else {
dev_pm_opp_disable(cpu_dev, opp_freq);
}
opp_freq++;
}
ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV);
if (ret > 0)
transition_latency += ret * 1000;
}
ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
if (ret) { if (ret) {
pr_err("failed to init cpufreq table: %d\n", ret); dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
goto out_free_priv; goto out_free_priv;
} }
priv->cpu_dev = cpu_dev; priv->cpu_dev = cpu_dev;
priv->cpu_reg = cpu_reg;
policy->driver_data = priv; policy->driver_data = priv;
policy->clk = cpu_clk; policy->clk = cpu_clk;
rcu_read_lock(); rcu_read_lock();
...@@ -357,9 +273,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -357,9 +273,11 @@ static int cpufreq_init(struct cpufreq_policy *policy)
cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
} }
policy->cpuinfo.transition_latency = transition_latency; transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
if (!transition_latency)
transition_latency = CPUFREQ_ETERNAL;
of_node_put(np); policy->cpuinfo.transition_latency = transition_latency;
return 0; return 0;
...@@ -369,12 +287,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) ...@@ -369,12 +287,10 @@ static int cpufreq_init(struct cpufreq_policy *policy)
kfree(priv); kfree(priv);
out_free_opp: out_free_opp:
dev_pm_opp_of_cpumask_remove_table(policy->cpus); dev_pm_opp_of_cpumask_remove_table(policy->cpus);
out_node_put: if (name)
of_node_put(np); dev_pm_opp_put_regulator(cpu_dev);
out_put_reg_clk: out_put_clk:
clk_put(cpu_clk); clk_put(cpu_clk);
if (!IS_ERR(cpu_reg))
regulator_put(cpu_reg);
return ret; return ret;
} }
...@@ -386,9 +302,10 @@ static int cpufreq_exit(struct cpufreq_policy *policy) ...@@ -386,9 +302,10 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
cpufreq_cooling_unregister(priv->cdev); cpufreq_cooling_unregister(priv->cdev);
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
if (priv->reg_name)
dev_pm_opp_put_regulator(priv->cpu_dev);
clk_put(policy->clk); clk_put(policy->clk);
if (!IS_ERR(priv->cpu_reg))
regulator_put(priv->cpu_reg);
kfree(priv); kfree(priv);
return 0; return 0;
...@@ -441,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = { ...@@ -441,9 +358,6 @@ static struct cpufreq_driver dt_cpufreq_driver = {
static int dt_cpufreq_probe(struct platform_device *pdev) static int dt_cpufreq_probe(struct platform_device *pdev)
{ {
struct device *cpu_dev;
struct regulator *cpu_reg;
struct clk *cpu_clk;
int ret; int ret;
/* /*
...@@ -453,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev) ...@@ -453,19 +367,15 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
* *
* FIXME: Is checking this only for CPU0 sufficient ? * FIXME: Is checking this only for CPU0 sufficient ?
*/ */
ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); ret = resources_available();
if (ret) if (ret)
return ret; return ret;
clk_put(cpu_clk);
if (!IS_ERR(cpu_reg))
regulator_put(cpu_reg);
dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
ret = cpufreq_register_driver(&dt_cpufreq_driver); ret = cpufreq_register_driver(&dt_cpufreq_driver);
if (ret) if (ret)
dev_err(cpu_dev, "failed register driver: %d\n", ret); dev_err(&pdev->dev, "failed register driver: %d\n", ret);
return ret; return ret;
} }
......
...@@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy) ...@@ -38,48 +38,10 @@ static inline bool policy_is_inactive(struct cpufreq_policy *policy)
return cpumask_empty(policy->cpus); return cpumask_empty(policy->cpus);
} }
static bool suitable_policy(struct cpufreq_policy *policy, bool active)
{
return active == !policy_is_inactive(policy);
}
/* Finds Next Acive/Inactive policy */
static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
bool active)
{
do {
/* No more policies in the list */
if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
return NULL;
policy = list_next_entry(policy, policy_list);
} while (!suitable_policy(policy, active));
return policy;
}
static struct cpufreq_policy *first_policy(bool active)
{
struct cpufreq_policy *policy;
/* No policies in the list */
if (list_empty(&cpufreq_policy_list))
return NULL;
policy = list_first_entry(&cpufreq_policy_list, typeof(*policy),
policy_list);
if (!suitable_policy(policy, active))
policy = next_policy(policy, active);
return policy;
}
/* Macros to iterate over CPU policies */ /* Macros to iterate over CPU policies */
#define for_each_suitable_policy(__policy, __active) \ #define for_each_suitable_policy(__policy, __active) \
for (__policy = first_policy(__active); \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \
__policy; \ if ((__active) == !policy_is_inactive(__policy))
__policy = next_policy(__policy, __active))
#define for_each_active_policy(__policy) \ #define for_each_active_policy(__policy) \
for_each_suitable_policy(__policy, true) for_each_suitable_policy(__policy, true)
...@@ -102,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list); ...@@ -102,7 +64,6 @@ static LIST_HEAD(cpufreq_governor_list);
static struct cpufreq_driver *cpufreq_driver; static struct cpufreq_driver *cpufreq_driver;
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
static DEFINE_RWLOCK(cpufreq_driver_lock); static DEFINE_RWLOCK(cpufreq_driver_lock);
DEFINE_MUTEX(cpufreq_governor_lock);
/* Flag to suspend/resume CPUFreq governors */ /* Flag to suspend/resume CPUFreq governors */
static bool cpufreq_suspended; static bool cpufreq_suspended;
...@@ -113,10 +74,8 @@ static inline bool has_target(void) ...@@ -113,10 +74,8 @@ static inline bool has_target(void)
} }
/* internal prototypes */ /* internal prototypes */
static int __cpufreq_governor(struct cpufreq_policy *policy, static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
unsigned int event);
static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
static void handle_update(struct work_struct *work);
/** /**
* Two notifier lists: the "policy" list is involved in the * Two notifier lists: the "policy" list is involved in the
...@@ -818,12 +777,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ...@@ -818,12 +777,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ssize_t ret; ssize_t ret;
down_read(&policy->rwsem); down_read(&policy->rwsem);
ret = fattr->show(policy, buf);
if (fattr->show)
ret = fattr->show(policy, buf);
else
ret = -EIO;
up_read(&policy->rwsem); up_read(&policy->rwsem);
return ret; return ret;
...@@ -838,18 +792,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, ...@@ -838,18 +792,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
get_online_cpus(); get_online_cpus();
if (!cpu_online(policy->cpu)) if (cpu_online(policy->cpu)) {
goto unlock; down_write(&policy->rwsem);
down_write(&policy->rwsem);
if (fattr->store)
ret = fattr->store(policy, buf, count); ret = fattr->store(policy, buf, count);
else up_write(&policy->rwsem);
ret = -EIO; }
up_write(&policy->rwsem);
unlock:
put_online_cpus(); put_online_cpus();
return ret; return ret;
...@@ -959,6 +907,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) ...@@ -959,6 +907,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
return cpufreq_add_dev_symlink(policy); return cpufreq_add_dev_symlink(policy);
} }
__weak struct cpufreq_governor *cpufreq_default_governor(void)
{
return NULL;
}
static int cpufreq_init_policy(struct cpufreq_policy *policy) static int cpufreq_init_policy(struct cpufreq_policy *policy)
{ {
struct cpufreq_governor *gov = NULL; struct cpufreq_governor *gov = NULL;
...@@ -968,11 +921,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) ...@@ -968,11 +921,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
/* Update governor of new_policy to the governor used before hotplug */ /* Update governor of new_policy to the governor used before hotplug */
gov = find_governor(policy->last_governor); gov = find_governor(policy->last_governor);
if (gov) if (gov) {
pr_debug("Restoring governor %s for cpu %d\n", pr_debug("Restoring governor %s for cpu %d\n",
policy->governor->name, policy->cpu); policy->governor->name, policy->cpu);
else } else {
gov = CPUFREQ_DEFAULT_GOVERNOR; gov = cpufreq_default_governor();
if (!gov)
return -ENODATA;
}
new_policy.governor = gov; new_policy.governor = gov;
...@@ -996,36 +952,45 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp ...@@ -996,36 +952,45 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
if (cpumask_test_cpu(cpu, policy->cpus)) if (cpumask_test_cpu(cpu, policy->cpus))
return 0; return 0;
down_write(&policy->rwsem);
if (has_target()) { if (has_target()) {
ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret) { if (ret) {
pr_err("%s: Failed to stop governor\n", __func__); pr_err("%s: Failed to stop governor\n", __func__);
return ret; goto unlock;
} }
} }
down_write(&policy->rwsem);
cpumask_set_cpu(cpu, policy->cpus); cpumask_set_cpu(cpu, policy->cpus);
up_write(&policy->rwsem);
if (has_target()) { if (has_target()) {
ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret) if (!ret)
ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
if (ret) { if (ret)
pr_err("%s: Failed to start governor\n", __func__); pr_err("%s: Failed to start governor\n", __func__);
return ret;
}
} }
return 0; unlock:
up_write(&policy->rwsem);
return ret;
}
static void handle_update(struct work_struct *work)
{
struct cpufreq_policy *policy =
container_of(work, struct cpufreq_policy, update);
unsigned int cpu = policy->cpu;
pr_debug("handle_update for cpu %u called\n", cpu);
cpufreq_update_policy(cpu);
} }
static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
{ {
struct device *dev = get_cpu_device(cpu); struct device *dev = get_cpu_device(cpu);
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
int ret;
if (WARN_ON(!dev)) if (WARN_ON(!dev))
return NULL; return NULL;
...@@ -1043,7 +1008,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) ...@@ -1043,7 +1008,13 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL))
goto err_free_rcpumask; goto err_free_rcpumask;
kobject_init(&policy->kobj, &ktype_cpufreq); ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
cpufreq_global_kobject, "policy%u", cpu);
if (ret) {
pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
goto err_free_real_cpus;
}
INIT_LIST_HEAD(&policy->policy_list); INIT_LIST_HEAD(&policy->policy_list);
init_rwsem(&policy->rwsem); init_rwsem(&policy->rwsem);
spin_lock_init(&policy->transition_lock); spin_lock_init(&policy->transition_lock);
...@@ -1054,6 +1025,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) ...@@ -1054,6 +1025,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
policy->cpu = cpu; policy->cpu = cpu;
return policy; return policy;
err_free_real_cpus:
free_cpumask_var(policy->real_cpus);
err_free_rcpumask: err_free_rcpumask:
free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->related_cpus);
err_free_cpumask: err_free_cpumask:
...@@ -1158,16 +1131,6 @@ static int cpufreq_online(unsigned int cpu) ...@@ -1158,16 +1131,6 @@ static int cpufreq_online(unsigned int cpu)
cpumask_copy(policy->related_cpus, policy->cpus); cpumask_copy(policy->related_cpus, policy->cpus);
/* Remember CPUs present at the policy creation time. */ /* Remember CPUs present at the policy creation time. */
cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
/* Name and add the kobject */
ret = kobject_add(&policy->kobj, cpufreq_global_kobject,
"policy%u",
cpumask_first(policy->related_cpus));
if (ret) {
pr_err("%s: failed to add policy->kobj: %d\n", __func__,
ret);
goto out_exit_policy;
}
} }
/* /*
...@@ -1309,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) ...@@ -1309,9 +1272,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
return ret; return ret;
} }
static void cpufreq_offline_prepare(unsigned int cpu) static void cpufreq_offline(unsigned int cpu)
{ {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
int ret;
pr_debug("%s: unregistering CPU %u\n", __func__, cpu); pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
...@@ -1321,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu) ...@@ -1321,13 +1285,13 @@ static void cpufreq_offline_prepare(unsigned int cpu)
return; return;
} }
down_write(&policy->rwsem);
if (has_target()) { if (has_target()) {
int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret) if (ret)
pr_err("%s: Failed to stop governor\n", __func__); pr_err("%s: Failed to stop governor\n", __func__);
} }
down_write(&policy->rwsem);
cpumask_clear_cpu(cpu, policy->cpus); cpumask_clear_cpu(cpu, policy->cpus);
if (policy_is_inactive(policy)) { if (policy_is_inactive(policy)) {
...@@ -1340,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu) ...@@ -1340,39 +1304,27 @@ static void cpufreq_offline_prepare(unsigned int cpu)
/* Nominate new CPU */ /* Nominate new CPU */
policy->cpu = cpumask_any(policy->cpus); policy->cpu = cpumask_any(policy->cpus);
} }
up_write(&policy->rwsem);
/* Start governor again for active policy */ /* Start governor again for active policy */
if (!policy_is_inactive(policy)) { if (!policy_is_inactive(policy)) {
if (has_target()) { if (has_target()) {
int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret) if (!ret)
ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
if (ret) if (ret)
pr_err("%s: Failed to start governor\n", __func__); pr_err("%s: Failed to start governor\n", __func__);
} }
} else if (cpufreq_driver->stop_cpu) {
cpufreq_driver->stop_cpu(policy);
}
}
static void cpufreq_offline_finish(unsigned int cpu) goto unlock;
{
struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
if (!policy) {
pr_debug("%s: No cpu_data found\n", __func__);
return;
} }
/* Only proceed for inactive policies */ if (cpufreq_driver->stop_cpu)
if (!policy_is_inactive(policy)) cpufreq_driver->stop_cpu(policy);
return;
/* If cpu is last user of policy, free policy */ /* If cpu is last user of policy, free policy */
if (has_target()) { if (has_target()) {
int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
if (ret) if (ret)
pr_err("%s: Failed to exit governor\n", __func__); pr_err("%s: Failed to exit governor\n", __func__);
} }
...@@ -1386,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu) ...@@ -1386,6 +1338,9 @@ static void cpufreq_offline_finish(unsigned int cpu)
cpufreq_driver->exit(policy); cpufreq_driver->exit(policy);
policy->freq_table = NULL; policy->freq_table = NULL;
} }
unlock:
up_write(&policy->rwsem);
} }
/** /**
...@@ -1401,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) ...@@ -1401,10 +1356,8 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if (!policy) if (!policy)
return; return;
if (cpu_online(cpu)) { if (cpu_online(cpu))
cpufreq_offline_prepare(cpu); cpufreq_offline(cpu);
cpufreq_offline_finish(cpu);
}
cpumask_clear_cpu(cpu, policy->real_cpus); cpumask_clear_cpu(cpu, policy->real_cpus);
remove_cpu_dev_symlink(policy, cpu); remove_cpu_dev_symlink(policy, cpu);
...@@ -1413,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) ...@@ -1413,15 +1366,6 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
cpufreq_policy_free(policy, true); cpufreq_policy_free(policy, true);
} }
static void handle_update(struct work_struct *work)
{
struct cpufreq_policy *policy =
container_of(work, struct cpufreq_policy, update);
unsigned int cpu = policy->cpu;
pr_debug("handle_update for cpu %u called\n", cpu);
cpufreq_update_policy(cpu);
}
/** /**
* cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
* in deep trouble. * in deep trouble.
...@@ -1584,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend); ...@@ -1584,6 +1528,7 @@ EXPORT_SYMBOL(cpufreq_generic_suspend);
void cpufreq_suspend(void) void cpufreq_suspend(void)
{ {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
int ret;
if (!cpufreq_driver) if (!cpufreq_driver)
return; return;
...@@ -1594,7 +1539,11 @@ void cpufreq_suspend(void) ...@@ -1594,7 +1539,11 @@ void cpufreq_suspend(void)
pr_debug("%s: Suspending Governors\n", __func__); pr_debug("%s: Suspending Governors\n", __func__);
for_each_active_policy(policy) { for_each_active_policy(policy) {
if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) down_write(&policy->rwsem);
ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
up_write(&policy->rwsem);
if (ret)
pr_err("%s: Failed to stop governor for policy: %p\n", pr_err("%s: Failed to stop governor for policy: %p\n",
__func__, policy); __func__, policy);
else if (cpufreq_driver->suspend else if (cpufreq_driver->suspend
...@@ -1616,6 +1565,7 @@ void cpufreq_suspend(void) ...@@ -1616,6 +1565,7 @@ void cpufreq_suspend(void)
void cpufreq_resume(void) void cpufreq_resume(void)
{ {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
int ret;
if (!cpufreq_driver) if (!cpufreq_driver)
return; return;
...@@ -1628,13 +1578,20 @@ void cpufreq_resume(void) ...@@ -1628,13 +1578,20 @@ void cpufreq_resume(void)
pr_debug("%s: Resuming Governors\n", __func__); pr_debug("%s: Resuming Governors\n", __func__);
for_each_active_policy(policy) { for_each_active_policy(policy) {
if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) {
pr_err("%s: Failed to resume driver: %p\n", __func__, pr_err("%s: Failed to resume driver: %p\n", __func__,
policy); policy);
else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) } else {
|| __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) down_write(&policy->rwsem);
pr_err("%s: Failed to start governor for policy: %p\n", ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
__func__, policy); if (!ret)
cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
up_write(&policy->rwsem);
if (ret)
pr_err("%s: Failed to start governor for policy: %p\n",
__func__, policy);
}
} }
/* /*
...@@ -1846,7 +1803,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, ...@@ -1846,7 +1803,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int relation) unsigned int relation)
{ {
unsigned int old_target_freq = target_freq; unsigned int old_target_freq = target_freq;
int retval = -EINVAL; struct cpufreq_frequency_table *freq_table;
int index, retval;
if (cpufreq_disabled()) if (cpufreq_disabled())
return -ENODEV; return -ENODEV;
...@@ -1873,34 +1831,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, ...@@ -1873,34 +1831,28 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
policy->restore_freq = policy->cur; policy->restore_freq = policy->cur;
if (cpufreq_driver->target) if (cpufreq_driver->target)
retval = cpufreq_driver->target(policy, target_freq, relation); return cpufreq_driver->target(policy, target_freq, relation);
else if (cpufreq_driver->target_index) {
struct cpufreq_frequency_table *freq_table;
int index;
freq_table = cpufreq_frequency_get_table(policy->cpu);
if (unlikely(!freq_table)) {
pr_err("%s: Unable to find freq_table\n", __func__);
goto out;
}
retval = cpufreq_frequency_table_target(policy, freq_table, if (!cpufreq_driver->target_index)
target_freq, relation, &index); return -EINVAL;
if (unlikely(retval)) {
pr_err("%s: Unable to find matching freq\n", __func__);
goto out;
}
if (freq_table[index].frequency == policy->cur) { freq_table = cpufreq_frequency_get_table(policy->cpu);
retval = 0; if (unlikely(!freq_table)) {
goto out; pr_err("%s: Unable to find freq_table\n", __func__);
} return -EINVAL;
}
retval = __target_index(policy, freq_table, index); retval = cpufreq_frequency_table_target(policy, freq_table, target_freq,
relation, &index);
if (unlikely(retval)) {
pr_err("%s: Unable to find matching freq\n", __func__);
return retval;
} }
out: if (freq_table[index].frequency == policy->cur)
return retval; return 0;
return __target_index(policy, freq_table, index);
} }
EXPORT_SYMBOL_GPL(__cpufreq_driver_target); EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
...@@ -1920,20 +1872,14 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, ...@@ -1920,20 +1872,14 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
} }
EXPORT_SYMBOL_GPL(cpufreq_driver_target); EXPORT_SYMBOL_GPL(cpufreq_driver_target);
static int __cpufreq_governor(struct cpufreq_policy *policy, __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
unsigned int event)
{ {
int ret; return NULL;
}
/* Only must be defined when default governor is known to have latency static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
restrictions, like e.g. conservative or ondemand. {
That this is the case is already ensured in Kconfig int ret;
*/
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
struct cpufreq_governor *gov = &cpufreq_gov_performance;
#else
struct cpufreq_governor *gov = NULL;
#endif
/* Don't start any governor operations if we are entering suspend */ /* Don't start any governor operations if we are entering suspend */
if (cpufreq_suspended) if (cpufreq_suspended)
...@@ -1948,12 +1894,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, ...@@ -1948,12 +1894,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
if (policy->governor->max_transition_latency && if (policy->governor->max_transition_latency &&
policy->cpuinfo.transition_latency > policy->cpuinfo.transition_latency >
policy->governor->max_transition_latency) { policy->governor->max_transition_latency) {
if (!gov) struct cpufreq_governor *gov = cpufreq_fallback_governor();
return -EINVAL;
else { if (gov) {
pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n",
policy->governor->name, gov->name); policy->governor->name, gov->name);
policy->governor = gov; policy->governor = gov;
} else {
return -EINVAL;
} }
} }
...@@ -1963,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, ...@@ -1963,21 +1911,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
mutex_lock(&cpufreq_governor_lock);
if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
|| (!policy->governor_enabled
&& (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
mutex_unlock(&cpufreq_governor_lock);
return -EBUSY;
}
if (event == CPUFREQ_GOV_STOP)
policy->governor_enabled = false;
else if (event == CPUFREQ_GOV_START)
policy->governor_enabled = true;
mutex_unlock(&cpufreq_governor_lock);
ret = policy->governor->governor(policy, event); ret = policy->governor->governor(policy, event);
if (!ret) { if (!ret) {
...@@ -1985,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, ...@@ -1985,14 +1918,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
policy->governor->initialized++; policy->governor->initialized++;
else if (event == CPUFREQ_GOV_POLICY_EXIT) else if (event == CPUFREQ_GOV_POLICY_EXIT)
policy->governor->initialized--; policy->governor->initialized--;
} else {
/* Restore original values */
mutex_lock(&cpufreq_governor_lock);
if (event == CPUFREQ_GOV_STOP)
policy->governor_enabled = true;
else if (event == CPUFREQ_GOV_START)
policy->governor_enabled = false;
mutex_unlock(&cpufreq_governor_lock);
} }
if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
...@@ -2147,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2147,7 +2072,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
old_gov = policy->governor; old_gov = policy->governor;
/* end old governor */ /* end old governor */
if (old_gov) { if (old_gov) {
ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP);
if (ret) { if (ret) {
/* This can happen due to race with other operations */ /* This can happen due to race with other operations */
pr_debug("%s: Failed to Stop Governor: %s (%d)\n", pr_debug("%s: Failed to Stop Governor: %s (%d)\n",
...@@ -2155,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2155,10 +2080,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
return ret; return ret;
} }
up_write(&policy->rwsem); ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
down_write(&policy->rwsem);
if (ret) { if (ret) {
pr_err("%s: Failed to Exit Governor: %s (%d)\n", pr_err("%s: Failed to Exit Governor: %s (%d)\n",
__func__, old_gov->name, ret); __func__, old_gov->name, ret);
...@@ -2168,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2168,32 +2090,30 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
/* start new governor */ /* start new governor */
policy->governor = new_policy->governor; policy->governor = new_policy->governor;
ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
if (!ret) { if (!ret) {
ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
if (!ret) if (!ret)
goto out; goto out;
up_write(&policy->rwsem); cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
down_write(&policy->rwsem);
} }
/* new governor failed, so re-start old one */ /* new governor failed, so re-start old one */
pr_debug("starting governor %s failed\n", policy->governor->name); pr_debug("starting governor %s failed\n", policy->governor->name);
if (old_gov) { if (old_gov) {
policy->governor = old_gov; policy->governor = old_gov;
if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
policy->governor = NULL; policy->governor = NULL;
else else
__cpufreq_governor(policy, CPUFREQ_GOV_START); cpufreq_governor(policy, CPUFREQ_GOV_START);
} }
return ret; return ret;
out: out:
pr_debug("governor: change or update limits\n"); pr_debug("governor: change or update limits\n");
return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
} }
/** /**
...@@ -2260,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, ...@@ -2260,11 +2180,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
cpufreq_offline_prepare(cpu); cpufreq_offline(cpu);
break;
case CPU_POST_DEAD:
cpufreq_offline_finish(cpu);
break; break;
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
...@@ -2297,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state) ...@@ -2297,8 +2213,11 @@ static int cpufreq_boost_set_sw(int state)
__func__); __func__);
break; break;
} }
down_write(&policy->rwsem);
policy->user_policy.max = policy->max; policy->user_policy.max = policy->max;
__cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
up_write(&policy->rwsem);
} }
} }
...@@ -2384,7 +2303,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); ...@@ -2384,7 +2303,7 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled);
* submitted by the CPU Frequency driver. * submitted by the CPU Frequency driver.
* *
* Registers a CPU Frequency driver to this core code. This code * Registers a CPU Frequency driver to this core code. This code
* returns zero on success, -EBUSY when another driver got here first * returns zero on success, -EEXIST when another driver got here first
* (and isn't unregistered in the meantime). * (and isn't unregistered in the meantime).
* *
*/ */
......
...@@ -14,6 +14,22 @@ ...@@ -14,6 +14,22 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "cpufreq_governor.h" #include "cpufreq_governor.h"
struct cs_policy_dbs_info {
struct policy_dbs_info policy_dbs;
unsigned int down_skip;
unsigned int requested_freq;
};
static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
{
return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs);
}
struct cs_dbs_tuners {
unsigned int down_threshold;
unsigned int freq_step;
};
/* Conservative governor macros */ /* Conservative governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20) #define DEF_FREQUENCY_DOWN_THRESHOLD (20)
...@@ -21,21 +37,6 @@ ...@@ -21,21 +37,6 @@
#define DEF_SAMPLING_DOWN_FACTOR (1) #define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10) #define MAX_SAMPLING_DOWN_FACTOR (10)
static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int event);
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
static
#endif
struct cpufreq_governor cpufreq_gov_conservative = {
.name = "conservative",
.governor = cs_cpufreq_governor_dbs,
.max_transition_latency = TRANSITION_LATENCY_LIMIT,
.owner = THIS_MODULE,
};
static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
struct cpufreq_policy *policy) struct cpufreq_policy *policy)
{ {
...@@ -57,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, ...@@ -57,27 +58,28 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
* Any frequency increase takes it to the maximum frequency. Frequency reduction * Any frequency increase takes it to the maximum frequency. Frequency reduction
* happens at minimum steps of 5% (default) of maximum frequency * happens at minimum steps of 5% (default) of maximum frequency
*/ */
static void cs_check_cpu(int cpu, unsigned int load) static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
{ {
struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); struct policy_dbs_info *policy_dbs = policy->governor_data;
struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
struct dbs_data *dbs_data = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int load = dbs_update(policy);
/* /*
* break out if we 'cannot' reduce the speed as the user might * break out if we 'cannot' reduce the speed as the user might
* want freq_step to be zero * want freq_step to be zero
*/ */
if (cs_tuners->freq_step == 0) if (cs_tuners->freq_step == 0)
return; goto out;
/* Check for frequency increase */ /* Check for frequency increase */
if (load > cs_tuners->up_threshold) { if (load > dbs_data->up_threshold) {
dbs_info->down_skip = 0; dbs_info->down_skip = 0;
/* if we are already at full speed then break out early */ /* if we are already at full speed then break out early */
if (dbs_info->requested_freq == policy->max) if (dbs_info->requested_freq == policy->max)
return; goto out;
dbs_info->requested_freq += get_freq_target(cs_tuners, policy); dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
...@@ -86,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load) ...@@ -86,12 +88,12 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target(policy, dbs_info->requested_freq, __cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_H); CPUFREQ_RELATION_H);
return; goto out;
} }
/* if sampling_down_factor is active break out early */ /* if sampling_down_factor is active break out early */
if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) if (++dbs_info->down_skip < dbs_data->sampling_down_factor)
return; goto out;
dbs_info->down_skip = 0; dbs_info->down_skip = 0;
/* Check for frequency decrease */ /* Check for frequency decrease */
...@@ -101,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load) ...@@ -101,7 +103,7 @@ static void cs_check_cpu(int cpu, unsigned int load)
* if we cannot reduce the frequency anymore, break out early * if we cannot reduce the frequency anymore, break out early
*/ */
if (policy->cur == policy->min) if (policy->cur == policy->min)
return; goto out;
freq_target = get_freq_target(cs_tuners, policy); freq_target = get_freq_target(cs_tuners, policy);
if (dbs_info->requested_freq > freq_target) if (dbs_info->requested_freq > freq_target)
...@@ -111,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load) ...@@ -111,58 +113,25 @@ static void cs_check_cpu(int cpu, unsigned int load)
__cpufreq_driver_target(policy, dbs_info->requested_freq, __cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_L); CPUFREQ_RELATION_L);
return;
} }
}
static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
{
struct dbs_data *dbs_data = policy->governor_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
if (modify_all)
dbs_check_cpu(dbs_data, policy->cpu);
return delay_for_sampling_rate(cs_tuners->sampling_rate); out:
return dbs_data->sampling_rate;
} }
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data);
{
struct cpufreq_freqs *freq = data;
struct cs_cpu_dbs_info_s *dbs_info =
&per_cpu(cs_cpu_dbs_info, freq->cpu);
struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
if (!policy)
return 0;
/* policy isn't governed by conservative governor */
if (policy->governor != &cpufreq_gov_conservative)
return 0;
/*
* we only care if our internally tracked freq moves outside the 'valid'
* ranges of frequency available to us otherwise we do not change it
*/
if (dbs_info->requested_freq > policy->max
|| dbs_info->requested_freq < policy->min)
dbs_info->requested_freq = freq->new;
return 0;
}
static struct notifier_block cs_cpufreq_notifier_block = { static struct notifier_block cs_cpufreq_notifier_block = {
.notifier_call = dbs_cpufreq_notifier, .notifier_call = dbs_cpufreq_notifier,
}; };
/************************** sysfs interface ************************/ /************************** sysfs interface ************************/
static struct common_dbs_data cs_dbs_cdata; static struct dbs_governor cs_dbs_gov;
static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
const char *buf, size_t count) const char *buf, size_t count)
{ {
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input; unsigned int input;
int ret; int ret;
ret = sscanf(buf, "%u", &input); ret = sscanf(buf, "%u", &input);
...@@ -170,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, ...@@ -170,22 +139,7 @@ static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
return -EINVAL; return -EINVAL;
cs_tuners->sampling_down_factor = input; dbs_data->sampling_down_factor = input;
return count;
}
static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
size_t count)
{
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate);
return count; return count;
} }
...@@ -200,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, ...@@ -200,7 +154,7 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold)
return -EINVAL; return -EINVAL;
cs_tuners->up_threshold = input; dbs_data->up_threshold = input;
return count; return count;
} }
...@@ -214,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, ...@@ -214,7 +168,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
/* cannot be lower than 11 otherwise freq will not fall */ /* cannot be lower than 11 otherwise freq will not fall */
if (ret != 1 || input < 11 || input > 100 || if (ret != 1 || input < 11 || input > 100 ||
input >= cs_tuners->up_threshold) input >= dbs_data->up_threshold)
return -EINVAL; return -EINVAL;
cs_tuners->down_threshold = input; cs_tuners->down_threshold = input;
...@@ -224,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, ...@@ -224,8 +178,7 @@ static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
const char *buf, size_t count) const char *buf, size_t count)
{ {
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; unsigned int input;
unsigned int input, j;
int ret; int ret;
ret = sscanf(buf, "%u", &input); ret = sscanf(buf, "%u", &input);
...@@ -235,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, ...@@ -235,21 +188,14 @@ static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
if (input > 1) if (input > 1)
input = 1; input = 1;
if (input == cs_tuners->ignore_nice_load) /* nothing to do */ if (input == dbs_data->ignore_nice_load) /* nothing to do */
return count; return count;
cs_tuners->ignore_nice_load = input; dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */ /* we need to re-evaluate prev_cpu_idle */
for_each_online_cpu(j) { gov_update_cpu_data(dbs_data);
struct cs_cpu_dbs_info_s *dbs_info;
dbs_info = &per_cpu(cs_cpu_dbs_info, j);
dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->cdbs.prev_cpu_wall, 0);
if (cs_tuners->ignore_nice_load)
dbs_info->cdbs.prev_cpu_nice =
kcpustat_cpu(j).cpustat[CPUTIME_NICE];
}
return count; return count;
} }
...@@ -275,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, ...@@ -275,55 +221,47 @@ static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
return count; return count;
} }
show_store_one(cs, sampling_rate); gov_show_one_common(sampling_rate);
show_store_one(cs, sampling_down_factor); gov_show_one_common(sampling_down_factor);
show_store_one(cs, up_threshold); gov_show_one_common(up_threshold);
show_store_one(cs, down_threshold); gov_show_one_common(ignore_nice_load);
show_store_one(cs, ignore_nice_load); gov_show_one_common(min_sampling_rate);
show_store_one(cs, freq_step); gov_show_one(cs, down_threshold);
declare_show_sampling_rate_min(cs); gov_show_one(cs, freq_step);
gov_sys_pol_attr_rw(sampling_rate); gov_attr_rw(sampling_rate);
gov_sys_pol_attr_rw(sampling_down_factor); gov_attr_rw(sampling_down_factor);
gov_sys_pol_attr_rw(up_threshold); gov_attr_rw(up_threshold);
gov_sys_pol_attr_rw(down_threshold); gov_attr_rw(ignore_nice_load);
gov_sys_pol_attr_rw(ignore_nice_load); gov_attr_ro(min_sampling_rate);
gov_sys_pol_attr_rw(freq_step); gov_attr_rw(down_threshold);
gov_sys_pol_attr_ro(sampling_rate_min); gov_attr_rw(freq_step);
static struct attribute *dbs_attributes_gov_sys[] = { static struct attribute *cs_attributes[] = {
&sampling_rate_min_gov_sys.attr, &min_sampling_rate.attr,
&sampling_rate_gov_sys.attr, &sampling_rate.attr,
&sampling_down_factor_gov_sys.attr, &sampling_down_factor.attr,
&up_threshold_gov_sys.attr, &up_threshold.attr,
&down_threshold_gov_sys.attr, &down_threshold.attr,
&ignore_nice_load_gov_sys.attr, &ignore_nice_load.attr,
&freq_step_gov_sys.attr, &freq_step.attr,
NULL NULL
}; };
static struct attribute_group cs_attr_group_gov_sys = { /************************** sysfs end ************************/
.attrs = dbs_attributes_gov_sys,
.name = "conservative",
};
static struct attribute *dbs_attributes_gov_pol[] = { static struct policy_dbs_info *cs_alloc(void)
&sampling_rate_min_gov_pol.attr, {
&sampling_rate_gov_pol.attr, struct cs_policy_dbs_info *dbs_info;
&sampling_down_factor_gov_pol.attr,
&up_threshold_gov_pol.attr,
&down_threshold_gov_pol.attr,
&ignore_nice_load_gov_pol.attr,
&freq_step_gov_pol.attr,
NULL
};
static struct attribute_group cs_attr_group_gov_pol = { dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL);
.attrs = dbs_attributes_gov_pol, return dbs_info ? &dbs_info->policy_dbs : NULL;
.name = "conservative", }
};
/************************** sysfs end ************************/ static void cs_free(struct policy_dbs_info *policy_dbs)
{
kfree(to_dbs_info(policy_dbs));
}
static int cs_init(struct dbs_data *dbs_data, bool notify) static int cs_init(struct dbs_data *dbs_data, bool notify)
{ {
...@@ -335,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) ...@@ -335,11 +273,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify)
return -ENOMEM; return -ENOMEM;
} }
tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
tuners->ignore_nice_load = 0;
tuners->freq_step = DEF_FREQUENCY_STEP; tuners->freq_step = DEF_FREQUENCY_STEP;
dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
dbs_data->ignore_nice_load = 0;
dbs_data->tuners = tuners; dbs_data->tuners = tuners;
dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
...@@ -361,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify) ...@@ -361,35 +299,66 @@ static void cs_exit(struct dbs_data *dbs_data, bool notify)
kfree(dbs_data->tuners); kfree(dbs_data->tuners);
} }
define_get_cpu_dbs_routines(cs_cpu_dbs_info); static void cs_start(struct cpufreq_policy *policy)
{
struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
dbs_info->down_skip = 0;
dbs_info->requested_freq = policy->cur;
}
static struct common_dbs_data cs_dbs_cdata = { static struct dbs_governor cs_dbs_gov = {
.governor = GOV_CONSERVATIVE, .gov = {
.attr_group_gov_sys = &cs_attr_group_gov_sys, .name = "conservative",
.attr_group_gov_pol = &cs_attr_group_gov_pol, .governor = cpufreq_governor_dbs,
.get_cpu_cdbs = get_cpu_cdbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT,
.get_cpu_dbs_info_s = get_cpu_dbs_info_s, .owner = THIS_MODULE,
},
.kobj_type = { .default_attrs = cs_attributes },
.gov_dbs_timer = cs_dbs_timer, .gov_dbs_timer = cs_dbs_timer,
.gov_check_cpu = cs_check_cpu, .alloc = cs_alloc,
.free = cs_free,
.init = cs_init, .init = cs_init,
.exit = cs_exit, .exit = cs_exit,
.mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), .start = cs_start,
}; };
static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov)
unsigned int event)
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{ {
return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); struct cpufreq_freqs *freq = data;
struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
struct cs_policy_dbs_info *dbs_info;
if (!policy)
return 0;
/* policy isn't governed by conservative governor */
if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE)
return 0;
dbs_info = to_dbs_info(policy->governor_data);
/*
* we only care if our internally tracked freq moves outside the 'valid'
* ranges of frequency available to us otherwise we do not change it
*/
if (dbs_info->requested_freq > policy->max
|| dbs_info->requested_freq < policy->min)
dbs_info->requested_freq = freq->new;
return 0;
} }
static int __init cpufreq_gov_dbs_init(void) static int __init cpufreq_gov_dbs_init(void)
{ {
return cpufreq_register_governor(&cpufreq_gov_conservative); return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE);
} }
static void __exit cpufreq_gov_dbs_exit(void) static void __exit cpufreq_gov_dbs_exit(void)
{ {
cpufreq_unregister_governor(&cpufreq_gov_conservative); cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE);
} }
MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
...@@ -399,6 +368,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " ...@@ -399,6 +368,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
struct cpufreq_governor *cpufreq_default_governor(void)
{
return CPU_FREQ_GOV_CONSERVATIVE;
}
fs_initcall(cpufreq_gov_dbs_init); fs_initcall(cpufreq_gov_dbs_init);
#else #else
module_init(cpufreq_gov_dbs_init); module_init(cpufreq_gov_dbs_init);
......
此差异已折叠。
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define _CPUFREQ_GOVERNOR_H #define _CPUFREQ_GOVERNOR_H
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/irq_work.h>
#include <linux/cpufreq.h> #include <linux/cpufreq.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -41,96 +42,68 @@ ...@@ -41,96 +42,68 @@
enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
/* /*
* Macro for creating governors sysfs routines * Abbreviations:
* * dbs: used as a shortform for demand based switching It helps to keep variable
* - gov_sys: One governor instance per whole system * names smaller, simpler
* - gov_pol: One governor instance per policy * cdbs: common dbs
* od_*: On-demand governor
* cs_*: Conservative governor
*/ */
/* Create attributes */ /* Governor demand based switching data (per-policy or global). */
#define gov_sys_attr_ro(_name) \ struct dbs_data {
static struct global_attr _name##_gov_sys = \ int usage_count;
__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) void *tuners;
unsigned int min_sampling_rate;
#define gov_sys_attr_rw(_name) \ unsigned int ignore_nice_load;
static struct global_attr _name##_gov_sys = \ unsigned int sampling_rate;
__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) unsigned int sampling_down_factor;
unsigned int up_threshold;
#define gov_pol_attr_ro(_name) \ unsigned int io_is_busy;
static struct freq_attr _name##_gov_pol = \
__ATTR(_name, 0444, show_##_name##_gov_pol, NULL)
#define gov_pol_attr_rw(_name) \
static struct freq_attr _name##_gov_pol = \
__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
#define gov_sys_pol_attr_rw(_name) \ struct kobject kobj;
gov_sys_attr_rw(_name); \ struct list_head policy_dbs_list;
gov_pol_attr_rw(_name) /*
* Protect concurrent updates to governor tunables from sysfs,
* policy_dbs_list and usage_count.
*/
struct mutex mutex;
};
#define gov_sys_pol_attr_ro(_name) \ /* Governor's specific attributes */
gov_sys_attr_ro(_name); \ struct dbs_data;
gov_pol_attr_ro(_name) struct governor_attr {
struct attribute attr;
ssize_t (*show)(struct dbs_data *dbs_data, char *buf);
ssize_t (*store)(struct dbs_data *dbs_data, const char *buf,
size_t count);
};
/* Create show/store routines */ #define gov_show_one(_gov, file_name) \
#define show_one(_gov, file_name) \ static ssize_t show_##file_name \
static ssize_t show_##file_name##_gov_sys \ (struct dbs_data *dbs_data, char *buf) \
(struct kobject *kobj, struct attribute *attr, char *buf) \
{ \ { \
struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \
} \
\
static ssize_t show_##file_name##_gov_pol \
(struct cpufreq_policy *policy, char *buf) \
{ \
struct dbs_data *dbs_data = policy->governor_data; \
struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \ return sprintf(buf, "%u\n", tuners->file_name); \
} }
#define store_one(_gov, file_name) \ #define gov_show_one_common(file_name) \
static ssize_t store_##file_name##_gov_sys \ static ssize_t show_##file_name \
(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ (struct dbs_data *dbs_data, char *buf) \
{ \
struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
return store_##file_name(dbs_data, buf, count); \
} \
\
static ssize_t store_##file_name##_gov_pol \
(struct cpufreq_policy *policy, const char *buf, size_t count) \
{ \ { \
struct dbs_data *dbs_data = policy->governor_data; \ return sprintf(buf, "%u\n", dbs_data->file_name); \
return store_##file_name(dbs_data, buf, count); \
} }
#define show_store_one(_gov, file_name) \ #define gov_attr_ro(_name) \
show_one(_gov, file_name); \ static struct governor_attr _name = \
store_one(_gov, file_name) __ATTR(_name, 0444, show_##_name, NULL)
/* create helper routines */ #define gov_attr_rw(_name) \
#define define_get_cpu_dbs_routines(_dbs_info) \ static struct governor_attr _name = \
static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ __ATTR(_name, 0644, show_##_name, store_##_name)
{ \
return &per_cpu(_dbs_info, cpu).cdbs; \
} \
\
static void *get_cpu_dbs_info_s(int cpu) \
{ \
return &per_cpu(_dbs_info, cpu); \
}
/*
* Abbreviations:
* dbs: used as a shortform for demand based switching It helps to keep variable
* names smaller, simpler
* cdbs: common dbs
* od_*: On-demand governor
* cs_*: Conservative governor
*/
/* Common to all CPUs of a policy */ /* Common to all CPUs of a policy */
struct cpu_common_dbs_info { struct policy_dbs_info {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
/* /*
* Per policy mutex that serializes load evaluation from limit-change * Per policy mutex that serializes load evaluation from limit-change
...@@ -138,11 +111,27 @@ struct cpu_common_dbs_info { ...@@ -138,11 +111,27 @@ struct cpu_common_dbs_info {
*/ */
struct mutex timer_mutex; struct mutex timer_mutex;
ktime_t time_stamp; u64 last_sample_time;
atomic_t skip_work; s64 sample_delay_ns;
atomic_t work_count;
struct irq_work irq_work;
struct work_struct work; struct work_struct work;
/* dbs_data may be shared between multiple policy objects */
struct dbs_data *dbs_data;
struct list_head list;
/* Multiplier for increasing sample delay temporarily. */
unsigned int rate_mult;
/* Status indicators */
bool is_shared; /* This object is used by multiple CPUs */
bool work_in_progress; /* Work is being queued up or in progress */
}; };
static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs,
unsigned int delay_us)
{
policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC;
}
/* Per cpu structures */ /* Per cpu structures */
struct cpu_dbs_info { struct cpu_dbs_info {
u64 prev_cpu_idle; u64 prev_cpu_idle;
...@@ -155,54 +144,14 @@ struct cpu_dbs_info { ...@@ -155,54 +144,14 @@ struct cpu_dbs_info {
* wake-up from idle. * wake-up from idle.
*/ */
unsigned int prev_load; unsigned int prev_load;
struct timer_list timer; struct update_util_data update_util;
struct cpu_common_dbs_info *shared; struct policy_dbs_info *policy_dbs;
};
struct od_cpu_dbs_info_s {
struct cpu_dbs_info cdbs;
struct cpufreq_frequency_table *freq_table;
unsigned int freq_lo;
unsigned int freq_lo_jiffies;
unsigned int freq_hi_jiffies;
unsigned int rate_mult;
unsigned int sample_type:1;
};
struct cs_cpu_dbs_info_s {
struct cpu_dbs_info cdbs;
unsigned int down_skip;
unsigned int requested_freq;
};
/* Per policy Governors sysfs tunables */
struct od_dbs_tuners {
unsigned int ignore_nice_load;
unsigned int sampling_rate;
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int powersave_bias;
unsigned int io_is_busy;
};
struct cs_dbs_tuners {
unsigned int ignore_nice_load;
unsigned int sampling_rate;
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int down_threshold;
unsigned int freq_step;
}; };
/* Common Governor data across policies */ /* Common Governor data across policies */
struct dbs_data; struct dbs_governor {
struct common_dbs_data { struct cpufreq_governor gov;
/* Common across governors */ struct kobj_type kobj_type;
#define GOV_ONDEMAND 0
#define GOV_CONSERVATIVE 1
int governor;
struct attribute_group *attr_group_gov_sys; /* one governor - system */
struct attribute_group *attr_group_gov_pol; /* one governor - policy */
/* /*
* Common data for platforms that don't set * Common data for platforms that don't set
...@@ -210,74 +159,32 @@ struct common_dbs_data { ...@@ -210,74 +159,32 @@ struct common_dbs_data {
*/ */
struct dbs_data *gdbs_data; struct dbs_data *gdbs_data;
struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
void *(*get_cpu_dbs_info_s)(int cpu); struct policy_dbs_info *(*alloc)(void);
unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, void (*free)(struct policy_dbs_info *policy_dbs);
bool modify_all);
void (*gov_check_cpu)(int cpu, unsigned int load);
int (*init)(struct dbs_data *dbs_data, bool notify); int (*init)(struct dbs_data *dbs_data, bool notify);
void (*exit)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify);
void (*start)(struct cpufreq_policy *policy);
/* Governor specific ops, see below */
void *gov_ops;
/*
* Protects governor's data (struct dbs_data and struct common_dbs_data)
*/
struct mutex mutex;
}; };
/* Governor Per policy data */ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy)
struct dbs_data { {
struct common_dbs_data *cdata; return container_of(policy->governor, struct dbs_governor, gov);
unsigned int min_sampling_rate; }
int usage_count;
void *tuners;
};
/* Governor specific ops, will be passed to dbs_data->gov_ops */ /* Governor specific operations */
struct od_ops { struct od_ops {
void (*powersave_bias_init_cpu)(int cpu);
unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation); unsigned int freq_next, unsigned int relation);
void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq);
}; };
static inline int delay_for_sampling_rate(unsigned int sampling_rate) unsigned int dbs_update(struct cpufreq_policy *policy);
{ int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
int delay = usecs_to_jiffies(sampling_rate);
/* We want all CPUs to do sampling nearly on same jiffy */
if (num_online_cpus() > 1)
delay -= jiffies % delay;
return delay;
}
#define declare_show_sampling_rate_min(_gov) \
static ssize_t show_sampling_rate_min_gov_sys \
(struct kobject *kobj, struct attribute *attr, char *buf) \
{ \
struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \
return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
} \
\
static ssize_t show_sampling_rate_min_gov_pol \
(struct cpufreq_policy *policy, char *buf) \
{ \
struct dbs_data *dbs_data = policy->governor_data; \
return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \
}
extern struct mutex cpufreq_governor_lock;
void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay);
void gov_cancel_work(struct cpu_common_dbs_info *shared);
void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct common_dbs_data *cdata, unsigned int event);
void od_register_powersave_bias_handler(unsigned int (*f) void od_register_powersave_bias_handler(unsigned int (*f)
(struct cpufreq_policy *, unsigned int, unsigned int), (struct cpufreq_policy *, unsigned int, unsigned int),
unsigned int powersave_bias); unsigned int powersave_bias);
void od_unregister_powersave_bias_handler(void); void od_unregister_powersave_bias_handler(void);
ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
size_t count);
void gov_update_cpu_data(struct dbs_data *dbs_data);
#endif /* _CPUFREQ_GOVERNOR_H */ #endif /* _CPUFREQ_GOVERNOR_H */
此差异已折叠。
/*
* Header file for CPUFreq ondemand governor and related code.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "cpufreq_governor.h"
struct od_policy_dbs_info {
struct policy_dbs_info policy_dbs;
struct cpufreq_frequency_table *freq_table;
unsigned int freq_lo;
unsigned int freq_lo_delay_us;
unsigned int freq_hi_delay_us;
unsigned int sample_type:1;
};
static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
{
return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs);
}
struct od_dbs_tuners {
unsigned int powersave_bias;
};
...@@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, ...@@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
return 0; return 0;
} }
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE static struct cpufreq_governor cpufreq_gov_performance = {
static
#endif
struct cpufreq_governor cpufreq_gov_performance = {
.name = "performance", .name = "performance",
.governor = cpufreq_governor_performance, .governor = cpufreq_governor_performance,
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void) ...@@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void)
cpufreq_unregister_governor(&cpufreq_gov_performance); cpufreq_unregister_governor(&cpufreq_gov_performance);
} }
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &cpufreq_gov_performance;
}
#endif
#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
struct cpufreq_governor *cpufreq_fallback_governor(void)
{
return &cpufreq_gov_performance;
}
#endif
MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
......
...@@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, ...@@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
return 0; return 0;
} }
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE static struct cpufreq_governor cpufreq_gov_powersave = {
static
#endif
struct cpufreq_governor cpufreq_gov_powersave = {
.name = "powersave", .name = "powersave",
.governor = cpufreq_governor_powersave, .governor = cpufreq_governor_powersave,
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); ...@@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &cpufreq_gov_powersave;
}
fs_initcall(cpufreq_gov_powersave_init); fs_initcall(cpufreq_gov_powersave_init);
#else #else
module_init(cpufreq_gov_powersave_init); module_init(cpufreq_gov_powersave_init);
......
...@@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, ...@@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
return rc; return rc;
} }
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE static struct cpufreq_governor cpufreq_gov_userspace = {
static
#endif
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace", .name = "userspace",
.governor = cpufreq_governor_userspace, .governor = cpufreq_governor_userspace,
.store_setspeed = cpufreq_set, .store_setspeed = cpufreq_set,
...@@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); ...@@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &cpufreq_gov_userspace;
}
fs_initcall(cpufreq_gov_userspace_init); fs_initcall(cpufreq_gov_userspace_init);
#else #else
module_init(cpufreq_gov_userspace_init); module_init(cpufreq_gov_userspace_init);
......
...@@ -71,7 +71,7 @@ struct sample { ...@@ -71,7 +71,7 @@ struct sample {
u64 mperf; u64 mperf;
u64 tsc; u64 tsc;
int freq; int freq;
ktime_t time; u64 time;
}; };
struct pstate_data { struct pstate_data {
...@@ -103,13 +103,13 @@ struct _pid { ...@@ -103,13 +103,13 @@ struct _pid {
struct cpudata { struct cpudata {
int cpu; int cpu;
struct timer_list timer; struct update_util_data update_util;
struct pstate_data pstate; struct pstate_data pstate;
struct vid_data vid; struct vid_data vid;
struct _pid pid; struct _pid pid;
ktime_t last_sample_time; u64 last_sample_time;
u64 prev_aperf; u64 prev_aperf;
u64 prev_mperf; u64 prev_mperf;
u64 prev_tsc; u64 prev_tsc;
...@@ -120,6 +120,7 @@ struct cpudata { ...@@ -120,6 +120,7 @@ struct cpudata {
static struct cpudata **all_cpu_data; static struct cpudata **all_cpu_data;
struct pstate_adjust_policy { struct pstate_adjust_policy {
int sample_rate_ms; int sample_rate_ms;
s64 sample_rate_ns;
int deadband; int deadband;
int setpoint; int setpoint;
int p_gain_pct; int p_gain_pct;
...@@ -197,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits; ...@@ -197,8 +198,8 @@ static struct perf_limits *limits = &powersave_limits;
static inline void pid_reset(struct _pid *pid, int setpoint, int busy, static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) { int deadband, int integral) {
pid->setpoint = setpoint; pid->setpoint = int_tofp(setpoint);
pid->deadband = deadband; pid->deadband = int_tofp(deadband);
pid->integral = int_tofp(integral); pid->integral = int_tofp(integral);
pid->last_err = int_tofp(setpoint) - int_tofp(busy); pid->last_err = int_tofp(setpoint) - int_tofp(busy);
} }
...@@ -224,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) ...@@ -224,9 +225,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
int32_t pterm, dterm, fp_error; int32_t pterm, dterm, fp_error;
int32_t integral_limit; int32_t integral_limit;
fp_error = int_tofp(pid->setpoint) - busy; fp_error = pid->setpoint - busy;
if (abs(fp_error) <= int_tofp(pid->deadband)) if (abs(fp_error) <= pid->deadband)
return 0; return 0;
pterm = mul_fp(pid->p_gain, fp_error); pterm = mul_fp(pid->p_gain, fp_error);
...@@ -286,7 +287,7 @@ static inline void update_turbo_state(void) ...@@ -286,7 +287,7 @@ static inline void update_turbo_state(void)
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
} }
static void intel_pstate_hwp_set(void) static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{ {
int min, hw_min, max, hw_max, cpu, range, adj_range; int min, hw_min, max, hw_max, cpu, range, adj_range;
u64 value, cap; u64 value, cap;
...@@ -296,9 +297,7 @@ static void intel_pstate_hwp_set(void) ...@@ -296,9 +297,7 @@ static void intel_pstate_hwp_set(void)
hw_max = HWP_HIGHEST_PERF(cap); hw_max = HWP_HIGHEST_PERF(cap);
range = hw_max - hw_min; range = hw_max - hw_min;
get_online_cpus(); for_each_cpu(cpu, cpumask) {
for_each_online_cpu(cpu) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
adj_range = limits->min_perf_pct * range / 100; adj_range = limits->min_perf_pct * range / 100;
min = hw_min + adj_range; min = hw_min + adj_range;
...@@ -317,7 +316,12 @@ static void intel_pstate_hwp_set(void) ...@@ -317,7 +316,12 @@ static void intel_pstate_hwp_set(void)
value |= HWP_MAX_PERF(max); value |= HWP_MAX_PERF(max);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
} }
}
static void intel_pstate_hwp_set_online_cpus(void)
{
get_online_cpus();
intel_pstate_hwp_set(cpu_online_mask);
put_online_cpus(); put_online_cpus();
} }
...@@ -439,7 +443,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, ...@@ -439,7 +443,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
limits->no_turbo = clamp_t(int, input, 0, 1); limits->no_turbo = clamp_t(int, input, 0, 1);
if (hwp_active) if (hwp_active)
intel_pstate_hwp_set(); intel_pstate_hwp_set_online_cpus();
return count; return count;
} }
...@@ -465,7 +469,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, ...@@ -465,7 +469,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100)); int_tofp(100));
if (hwp_active) if (hwp_active)
intel_pstate_hwp_set(); intel_pstate_hwp_set_online_cpus();
return count; return count;
} }
...@@ -490,7 +494,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, ...@@ -490,7 +494,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100)); int_tofp(100));
if (hwp_active) if (hwp_active)
intel_pstate_hwp_set(); intel_pstate_hwp_set_online_cpus();
return count; return count;
} }
...@@ -531,6 +535,9 @@ static void __init intel_pstate_sysfs_expose_params(void) ...@@ -531,6 +535,9 @@ static void __init intel_pstate_sysfs_expose_params(void)
static void intel_pstate_hwp_enable(struct cpudata *cpudata) static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{ {
/* First disable HWP notification interrupt as we don't process them */
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
} }
...@@ -712,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) ...@@ -712,7 +719,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
if (limits->no_turbo && !limits->turbo_disabled) if (limits->no_turbo && !limits->turbo_disabled)
val |= (u64)1 << 32; val |= (u64)1 << 32;
wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); wrmsrl(MSR_IA32_PERF_CTL, val);
} }
static int knl_get_turbo_pstate(void) static int knl_get_turbo_pstate(void)
...@@ -824,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) ...@@ -824,11 +831,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
* policy, or by cpu specific default values determined through * policy, or by cpu specific default values determined through
* experimentation. * experimentation.
*/ */
max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); max_perf_adj = fp_toint(max_perf * limits->max_perf);
*max = clamp_t(int, max_perf_adj, *max = clamp_t(int, max_perf_adj,
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); min_perf = fp_toint(max_perf * limits->min_perf);
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
} }
...@@ -874,16 +881,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) ...@@ -874,16 +881,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
core_pct = int_tofp(sample->aperf) * int_tofp(100); core_pct = int_tofp(sample->aperf) * int_tofp(100);
core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
sample->freq = fp_toint(
mul_fp(int_tofp(
cpu->pstate.max_pstate_physical *
cpu->pstate.scaling / 100),
core_pct));
sample->core_pct_busy = (int32_t)core_pct; sample->core_pct_busy = (int32_t)core_pct;
} }
static inline void intel_pstate_sample(struct cpudata *cpu) static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
{ {
u64 aperf, mperf; u64 aperf, mperf;
unsigned long flags; unsigned long flags;
...@@ -893,14 +894,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu) ...@@ -893,14 +894,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf); rdmsrl(MSR_IA32_MPERF, mperf);
tsc = rdtsc(); tsc = rdtsc();
if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags); local_irq_restore(flags);
return; return false;
} }
local_irq_restore(flags); local_irq_restore(flags);
cpu->last_sample_time = cpu->sample.time; cpu->last_sample_time = cpu->sample.time;
cpu->sample.time = ktime_get(); cpu->sample.time = time;
cpu->sample.aperf = aperf; cpu->sample.aperf = aperf;
cpu->sample.mperf = mperf; cpu->sample.mperf = mperf;
cpu->sample.tsc = tsc; cpu->sample.tsc = tsc;
...@@ -908,27 +909,16 @@ static inline void intel_pstate_sample(struct cpudata *cpu) ...@@ -908,27 +909,16 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
cpu->sample.mperf -= cpu->prev_mperf; cpu->sample.mperf -= cpu->prev_mperf;
cpu->sample.tsc -= cpu->prev_tsc; cpu->sample.tsc -= cpu->prev_tsc;
intel_pstate_calc_busy(cpu);
cpu->prev_aperf = aperf; cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf; cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc; cpu->prev_tsc = tsc;
return true;
} }
static inline void intel_hwp_set_sample_time(struct cpudata *cpu) static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
int delay;
delay = msecs_to_jiffies(50);
mod_timer_pinned(&cpu->timer, jiffies + delay);
}
static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
{ {
int delay; return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf *
cpu->pstate.scaling, cpu->sample.mperf);
delay = msecs_to_jiffies(pid_params.sample_rate_ms);
mod_timer_pinned(&cpu->timer, jiffies + delay);
} }
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
...@@ -954,7 +944,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) ...@@ -954,7 +944,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
mperf = cpu->sample.mperf + delta_iowait_mperf; mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait; cpu->prev_cummulative_iowait = cummulative_iowait;
/* /*
* The load can be estimated as the ratio of the mperf counter * The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods * running at a constant frequency during active periods
...@@ -970,8 +959,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) ...@@ -970,8 +959,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
{ {
int32_t core_busy, max_pstate, current_pstate, sample_ratio; int32_t core_busy, max_pstate, current_pstate, sample_ratio;
s64 duration_us; u64 duration_ns;
u32 sample_time;
intel_pstate_calc_busy(cpu);
/* /*
* core_busy is the ratio of actual performance to max * core_busy is the ratio of actual performance to max
...@@ -990,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) ...@@ -990,18 +980,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
/* /*
* Since we have a deferred timer, it will not fire unless * Since our utilization update callback will not run unless we are
* we are in C0. So, determine if the actual elapsed time * in C0, check if the actual elapsed time is significantly greater (3x)
* is significantly greater (3x) than our sample interval. If it * than our sample interval. If it is, then we were idle for a long
* is, then we were idle for a long enough period of time * enough period of time to adjust our busyness.
* to adjust our busyness.
*/ */
sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; duration_ns = cpu->sample.time - cpu->last_sample_time;
duration_us = ktime_us_delta(cpu->sample.time, if ((s64)duration_ns > pid_params.sample_rate_ns * 3
cpu->last_sample_time); && cpu->last_sample_time > 0) {
if (duration_us > sample_time * 3) { sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
sample_ratio = div_fp(int_tofp(sample_time), int_tofp(duration_ns));
int_tofp(duration_us));
core_busy = mul_fp(core_busy, sample_ratio); core_busy = mul_fp(core_busy, sample_ratio);
} }
...@@ -1028,26 +1016,21 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) ...@@ -1028,26 +1016,21 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample->mperf, sample->mperf,
sample->aperf, sample->aperf,
sample->tsc, sample->tsc,
sample->freq); get_avg_frequency(cpu));
} }
static void intel_hwp_timer_func(unsigned long __data) static void intel_pstate_update_util(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max)
{ {
struct cpudata *cpu = (struct cpudata *) __data; struct cpudata *cpu = container_of(data, struct cpudata, update_util);
u64 delta_ns = time - cpu->sample.time;
intel_pstate_sample(cpu); if ((s64)delta_ns >= pid_params.sample_rate_ns) {
intel_hwp_set_sample_time(cpu); bool sample_taken = intel_pstate_sample(cpu, time);
}
static void intel_pstate_timer_func(unsigned long __data) if (sample_taken && !hwp_active)
{ intel_pstate_adjust_busy_pstate(cpu);
struct cpudata *cpu = (struct cpudata *) __data; }
intel_pstate_sample(cpu);
intel_pstate_adjust_busy_pstate(cpu);
intel_pstate_set_sample_time(cpu);
} }
#define ICPU(model, policy) \ #define ICPU(model, policy) \
...@@ -1095,24 +1078,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum) ...@@ -1095,24 +1078,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
cpu->cpu = cpunum; cpu->cpu = cpunum;
if (hwp_active) if (hwp_active) {
intel_pstate_hwp_enable(cpu); intel_pstate_hwp_enable(cpu);
pid_params.sample_rate_ms = 50;
pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
}
intel_pstate_get_cpu_pstates(cpu); intel_pstate_get_cpu_pstates(cpu);
init_timer_deferrable(&cpu->timer);
cpu->timer.data = (unsigned long)cpu;
cpu->timer.expires = jiffies + HZ/100;
if (!hwp_active)
cpu->timer.function = intel_pstate_timer_func;
else
cpu->timer.function = intel_hwp_timer_func;
intel_pstate_busy_pid_reset(cpu); intel_pstate_busy_pid_reset(cpu);
intel_pstate_sample(cpu); intel_pstate_sample(cpu, 0);
add_timer_on(&cpu->timer, cpunum); cpu->update_util.func = intel_pstate_update_util;
cpufreq_set_update_util_data(cpunum, &cpu->update_util);
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
...@@ -1128,7 +1106,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) ...@@ -1128,7 +1106,7 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
if (!cpu) if (!cpu)
return 0; return 0;
sample = &cpu->sample; sample = &cpu->sample;
return sample->freq; return get_avg_frequency(cpu);
} }
static int intel_pstate_set_policy(struct cpufreq_policy *policy) static int intel_pstate_set_policy(struct cpufreq_policy *policy)
...@@ -1141,7 +1119,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) ...@@ -1141,7 +1119,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
pr_debug("intel_pstate: set performance\n"); pr_debug("intel_pstate: set performance\n");
limits = &performance_limits; limits = &performance_limits;
if (hwp_active) if (hwp_active)
intel_pstate_hwp_set(); intel_pstate_hwp_set(policy->cpus);
return 0; return 0;
} }
...@@ -1173,7 +1151,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) ...@@ -1173,7 +1151,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
int_tofp(100)); int_tofp(100));
if (hwp_active) if (hwp_active)
intel_pstate_hwp_set(); intel_pstate_hwp_set(policy->cpus);
return 0; return 0;
} }
...@@ -1196,7 +1174,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) ...@@ -1196,7 +1174,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
del_timer_sync(&all_cpu_data[cpu_num]->timer); cpufreq_set_update_util_data(cpu_num, NULL);
synchronize_sched();
if (hwp_active) if (hwp_active)
return; return;
...@@ -1260,6 +1240,7 @@ static int intel_pstate_msrs_not_valid(void) ...@@ -1260,6 +1240,7 @@ static int intel_pstate_msrs_not_valid(void)
static void copy_pid_params(struct pstate_adjust_policy *policy) static void copy_pid_params(struct pstate_adjust_policy *policy)
{ {
pid_params.sample_rate_ms = policy->sample_rate_ms; pid_params.sample_rate_ms = policy->sample_rate_ms;
pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
pid_params.p_gain_pct = policy->p_gain_pct; pid_params.p_gain_pct = policy->p_gain_pct;
pid_params.i_gain_pct = policy->i_gain_pct; pid_params.i_gain_pct = policy->i_gain_pct;
pid_params.d_gain_pct = policy->d_gain_pct; pid_params.d_gain_pct = policy->d_gain_pct;
...@@ -1397,6 +1378,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } ...@@ -1397,6 +1378,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
static inline bool intel_pstate_has_acpi_ppc(void) { return false; } static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
#endif /* CONFIG_ACPI */ #endif /* CONFIG_ACPI */
static const struct x86_cpu_id hwp_support_ids[] __initconst = {
{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
{}
};
static int __init intel_pstate_init(void) static int __init intel_pstate_init(void)
{ {
int cpu, rc = 0; int cpu, rc = 0;
...@@ -1406,17 +1392,16 @@ static int __init intel_pstate_init(void) ...@@ -1406,17 +1392,16 @@ static int __init intel_pstate_init(void)
if (no_load) if (no_load)
return -ENODEV; return -ENODEV;
if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
copy_cpu_funcs(&core_params.funcs);
hwp_active++;
goto hwp_cpu_matched;
}
id = x86_match_cpu(intel_pstate_cpu_ids); id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes.
*/
if (intel_pstate_platform_pwr_mgmt_exists())
return -ENODEV;
cpu_def = (struct cpu_defaults *)id->driver_data; cpu_def = (struct cpu_defaults *)id->driver_data;
copy_pid_params(&cpu_def->pid_policy); copy_pid_params(&cpu_def->pid_policy);
...@@ -1425,17 +1410,20 @@ static int __init intel_pstate_init(void) ...@@ -1425,17 +1410,20 @@ static int __init intel_pstate_init(void)
if (intel_pstate_msrs_not_valid()) if (intel_pstate_msrs_not_valid())
return -ENODEV; return -ENODEV;
hwp_cpu_matched:
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes.
*/
if (intel_pstate_platform_pwr_mgmt_exists())
return -ENODEV;
pr_info("Intel P-state driver initializing.\n"); pr_info("Intel P-state driver initializing.\n");
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data) if (!all_cpu_data)
return -ENOMEM; return -ENOMEM;
if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
pr_info("intel_pstate: HWP enabled\n");
hwp_active++;
}
if (!hwp_active && hwp_only) if (!hwp_active && hwp_only)
goto out; goto out;
...@@ -1446,12 +1434,16 @@ static int __init intel_pstate_init(void) ...@@ -1446,12 +1434,16 @@ static int __init intel_pstate_init(void)
intel_pstate_debug_expose_params(); intel_pstate_debug_expose_params();
intel_pstate_sysfs_expose_params(); intel_pstate_sysfs_expose_params();
if (hwp_active)
pr_info("intel_pstate: HWP enabled\n");
return rc; return rc;
out: out:
get_online_cpus(); get_online_cpus();
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) { if (all_cpu_data[cpu]) {
del_timer_sync(&all_cpu_data[cpu]->timer); cpufreq_set_update_util_data(cpu, NULL);
synchronize_sched();
kfree(all_cpu_data[cpu]); kfree(all_cpu_data[cpu]);
} }
} }
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
#include <linux/of.h> #include <linux/of.h>
#include <linux/reboot.h> #include <linux/reboot.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/cpu.h>
#include <trace/events/power.h>
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#include <asm/firmware.h> #include <asm/firmware.h>
...@@ -42,13 +44,24 @@ ...@@ -42,13 +44,24 @@
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
static bool rebooting, throttled, occ_reset; static bool rebooting, throttled, occ_reset;
static unsigned int *core_to_chip_map;
static const char * const throttle_reason[] = {
"No throttling",
"Power Cap",
"Processor Over Temperature",
"Power Supply Failure",
"Over Current",
"OCC Reset"
};
static struct chip { static struct chip {
unsigned int id; unsigned int id;
bool throttled; bool throttled;
bool restore;
u8 throttle_reason;
cpumask_t mask; cpumask_t mask;
struct work_struct throttle; struct work_struct throttle;
bool restore;
} *chips; } *chips;
static int nr_chips; static int nr_chips;
...@@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void) ...@@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void)
static void powernv_cpufreq_throttle_check(void *data) static void powernv_cpufreq_throttle_check(void *data)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
unsigned long pmsr; unsigned long pmsr;
int pmsr_pmax, i; int pmsr_pmax, i;
pmsr = get_pmspr(SPRN_PMSR); pmsr = get_pmspr(SPRN_PMSR);
for (i = 0; i < nr_chips; i++) for (i = 0; i < nr_chips; i++)
if (chips[i].id == cpu_to_chip_id(cpu)) if (chips[i].id == chip_id)
break; break;
/* Check for Pmax Capping */ /* Check for Pmax Capping */
...@@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data) ...@@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data)
goto next; goto next;
chips[i].throttled = true; chips[i].throttled = true;
if (pmsr_pmax < powernv_pstate_info.nominal) if (pmsr_pmax < powernv_pstate_info.nominal)
pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
cpu, chips[i].id, pmsr_pmax, cpu, chips[i].id, pmsr_pmax,
powernv_pstate_info.nominal); powernv_pstate_info.nominal);
else trace_powernv_throttle(chips[i].id,
pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", throttle_reason[chips[i].throttle_reason],
cpu, chips[i].id, pmsr_pmax, pmsr_pmax);
powernv_pstate_info.max);
} else if (chips[i].throttled) { } else if (chips[i].throttled) {
chips[i].throttled = false; chips[i].throttled = false;
pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, trace_powernv_throttle(chips[i].id,
chips[i].id, pmsr_pmax); throttle_reason[chips[i].throttle_reason],
pmsr_pmax);
} }
/* Check if Psafe_mode_active is set in PMSR. */ /* Check if Psafe_mode_active is set in PMSR. */
...@@ -356,7 +370,7 @@ static void powernv_cpufreq_throttle_check(void *data) ...@@ -356,7 +370,7 @@ static void powernv_cpufreq_throttle_check(void *data)
if (throttled) { if (throttled) {
pr_info("PMSR = %16lx\n", pmsr); pr_info("PMSR = %16lx\n", pmsr);
pr_crit("CPU Frequency could be throttled\n"); pr_warn("CPU Frequency could be throttled\n");
} }
} }
...@@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work) ...@@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
{ {
struct chip *chip = container_of(work, struct chip, throttle); struct chip *chip = container_of(work, struct chip, throttle);
unsigned int cpu; unsigned int cpu;
cpumask_var_t mask; cpumask_t mask;
smp_call_function_any(&chip->mask, get_online_cpus();
cpumask_and(&mask, &chip->mask, cpu_online_mask);
smp_call_function_any(&mask,
powernv_cpufreq_throttle_check, NULL, 0); powernv_cpufreq_throttle_check, NULL, 0);
if (!chip->restore) if (!chip->restore)
return; goto out;
chip->restore = false; chip->restore = false;
cpumask_copy(mask, &chip->mask); for_each_cpu(cpu, &mask) {
for_each_cpu_and(cpu, mask, cpu_online_mask) { int index;
int index, tcpu;
struct cpufreq_policy policy; struct cpufreq_policy policy;
cpufreq_get_policy(&policy, cpu); cpufreq_get_policy(&policy, cpu);
...@@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work) ...@@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
policy.cur, policy.cur,
CPUFREQ_RELATION_C, &index); CPUFREQ_RELATION_C, &index);
powernv_cpufreq_target_index(&policy, index); powernv_cpufreq_target_index(&policy, index);
for_each_cpu(tcpu, policy.cpus) cpumask_andnot(&mask, &mask, policy.cpus);
cpumask_clear_cpu(tcpu, mask);
} }
out:
put_online_cpus();
} }
static char throttle_reason[][30] = {
"No throttling",
"Power Cap",
"Processor Over Temperature",
"Power Supply Failure",
"Over Current",
"OCC Reset"
};
static int powernv_cpufreq_occ_msg(struct notifier_block *nb, static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
unsigned long msg_type, void *_msg) unsigned long msg_type, void *_msg)
{ {
...@@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, ...@@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
*/ */
if (!throttled) { if (!throttled) {
throttled = true; throttled = true;
pr_crit("CPU frequency is throttled for duration\n"); pr_warn("CPU frequency is throttled for duration\n");
} }
break; break;
...@@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, ...@@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
return 0; return 0;
} }
if (omsg.throttle_status && for (i = 0; i < nr_chips; i++)
if (chips[i].id == omsg.chip)
break;
if (omsg.throttle_status >= 0 &&
omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
pr_info("OCC: Chip %u Pmax reduced due to %s\n", chips[i].throttle_reason = omsg.throttle_status;
(unsigned int)omsg.chip,
throttle_reason[omsg.throttle_status]);
else if (!omsg.throttle_status)
pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip,
throttle_reason[omsg.throttle_status]);
else
return 0;
for (i = 0; i < nr_chips; i++) if (!omsg.throttle_status)
if (chips[i].id == omsg.chip) { chips[i].restore = true;
if (!omsg.throttle_status)
chips[i].restore = true; schedule_work(&chips[i].throttle);
schedule_work(&chips[i].throttle);
}
} }
return 0; return 0;
} }
...@@ -556,29 +558,54 @@ static int init_chip_info(void) ...@@ -556,29 +558,54 @@ static int init_chip_info(void)
unsigned int chip[256]; unsigned int chip[256];
unsigned int cpu, i; unsigned int cpu, i;
unsigned int prev_chip_id = UINT_MAX; unsigned int prev_chip_id = UINT_MAX;
cpumask_t cpu_mask;
int ret = -ENOMEM;
core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
GFP_KERNEL);
if (!core_to_chip_map)
goto out;
for_each_possible_cpu(cpu) { cpumask_copy(&cpu_mask, cpu_possible_mask);
for_each_cpu(cpu, &cpu_mask) {
unsigned int id = cpu_to_chip_id(cpu); unsigned int id = cpu_to_chip_id(cpu);
if (prev_chip_id != id) { if (prev_chip_id != id) {
prev_chip_id = id; prev_chip_id = id;
chip[nr_chips++] = id; chip[nr_chips++] = id;
} }
core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
} }
chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
if (!chips) if (!chips)
return -ENOMEM; goto free_chip_map;
for (i = 0; i < nr_chips; i++) { for (i = 0; i < nr_chips; i++) {
chips[i].id = chip[i]; chips[i].id = chip[i];
chips[i].throttled = false;
cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
chips[i].restore = false;
} }
return 0; return 0;
free_chip_map:
kfree(core_to_chip_map);
out:
return ret;
}
static inline void clean_chip_info(void)
{
kfree(chips);
kfree(core_to_chip_map);
}
static inline void unregister_all_notifiers(void)
{
opal_message_notifier_unregister(OPAL_MSG_OCC,
&powernv_cpufreq_opal_nb);
unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
} }
static int __init powernv_cpufreq_init(void) static int __init powernv_cpufreq_init(void)
...@@ -591,28 +618,35 @@ static int __init powernv_cpufreq_init(void) ...@@ -591,28 +618,35 @@ static int __init powernv_cpufreq_init(void)
/* Discover pstates from device tree and init */ /* Discover pstates from device tree and init */
rc = init_powernv_pstates(); rc = init_powernv_pstates();
if (rc) { if (rc)
pr_info("powernv-cpufreq disabled. System does not support PState control\n"); goto out;
return rc;
}
/* Populate chip info */ /* Populate chip info */
rc = init_chip_info(); rc = init_chip_info();
if (rc) if (rc)
return rc; goto out;
register_reboot_notifier(&powernv_cpufreq_reboot_nb); register_reboot_notifier(&powernv_cpufreq_reboot_nb);
opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb);
return cpufreq_register_driver(&powernv_cpufreq_driver);
rc = cpufreq_register_driver(&powernv_cpufreq_driver);
if (!rc)
return 0;
pr_info("Failed to register the cpufreq driver (%d)\n", rc);
unregister_all_notifiers();
clean_chip_info();
out:
pr_info("Platform driver disabled. System does not support PState control\n");
return rc;
} }
module_init(powernv_cpufreq_init); module_init(powernv_cpufreq_init);
static void __exit powernv_cpufreq_exit(void) static void __exit powernv_cpufreq_exit(void)
{ {
unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
opal_message_notifier_unregister(OPAL_MSG_OCC,
&powernv_cpufreq_opal_nb);
cpufreq_unregister_driver(&powernv_cpufreq_driver); cpufreq_unregister_driver(&powernv_cpufreq_driver);
unregister_all_notifiers();
clean_chip_info();
} }
module_exit(powernv_cpufreq_exit); module_exit(powernv_cpufreq_exit);
......
...@@ -80,7 +80,6 @@ struct cpufreq_policy { ...@@ -80,7 +80,6 @@ struct cpufreq_policy {
unsigned int last_policy; /* policy before unplug */ unsigned int last_policy; /* policy before unplug */
struct cpufreq_governor *governor; /* see below */ struct cpufreq_governor *governor; /* see below */
void *governor_data; void *governor_data;
bool governor_enabled; /* governor start/stop flag */
char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */
struct work_struct update; /* if update_policy() needs to be struct work_struct update; /* if update_policy() needs to be
...@@ -100,10 +99,6 @@ struct cpufreq_policy { ...@@ -100,10 +99,6 @@ struct cpufreq_policy {
* - Any routine that will write to the policy structure and/or may take away * - Any routine that will write to the policy structure and/or may take away
* the policy altogether (eg. CPU hotplug), will hold this lock in write * the policy altogether (eg. CPU hotplug), will hold this lock in write
* mode before doing so. * mode before doing so.
*
* Additional rules:
* - Lock should not be held across
* __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
*/ */
struct rw_semaphore rwsem; struct rw_semaphore rwsem;
...@@ -464,29 +459,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, ...@@ -464,29 +459,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
int cpufreq_register_governor(struct cpufreq_governor *governor); int cpufreq_register_governor(struct cpufreq_governor *governor);
void cpufreq_unregister_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor);
/* CPUFREQ DEFAULT GOVERNOR */ struct cpufreq_governor *cpufreq_default_governor(void);
/* struct cpufreq_governor *cpufreq_fallback_governor(void);
* Performance governor is fallback governor if any other gov failed to auto
* load due latency restrictions
*/
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
extern struct cpufreq_governor cpufreq_gov_performance;
#endif
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance)
#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE)
extern struct cpufreq_governor cpufreq_gov_powersave;
#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave)
#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE)
extern struct cpufreq_governor cpufreq_gov_userspace;
#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace)
#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND)
extern struct cpufreq_governor cpufreq_gov_ondemand;
#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand)
#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
extern struct cpufreq_governor cpufreq_gov_conservative;
#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative)
#endif
/********************************************************************* /*********************************************************************
* FREQUENCY TABLE HELPERS * * FREQUENCY TABLE HELPERS *
...@@ -525,16 +499,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, ...@@ -525,16 +499,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
} }
#endif #endif
static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
{
while ((*pos)->frequency != CPUFREQ_TABLE_END)
if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
return true;
else
(*pos)++;
return false;
}
/* /*
* cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table
* @pos: the cpufreq_frequency_table * to use as a loop cursor. * @pos: the cpufreq_frequency_table * to use as a loop cursor.
...@@ -551,8 +515,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) ...@@ -551,8 +515,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
* @table: the cpufreq_frequency_table * to iterate over. * @table: the cpufreq_frequency_table * to iterate over.
*/ */
#define cpufreq_for_each_valid_entry(pos, table) \ #define cpufreq_for_each_valid_entry(pos, table) \
for (pos = table; cpufreq_next_valid(&pos); pos++) for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++) \
if (pos->frequency == CPUFREQ_ENTRY_INVALID) \
continue; \
else
int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table); struct cpufreq_frequency_table *table);
......
...@@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit) ...@@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit); return task_rlimit_max(current, limit);
} }
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
void (*func)(struct update_util_data *data,
u64 time, unsigned long util, unsigned long max);
};
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
#endif /* CONFIG_CPU_FREQ */
#endif #endif
...@@ -38,6 +38,28 @@ DEFINE_EVENT(cpu, cpu_idle, ...@@ -38,6 +38,28 @@ DEFINE_EVENT(cpu, cpu_idle,
TP_ARGS(state, cpu_id) TP_ARGS(state, cpu_id)
); );
TRACE_EVENT(powernv_throttle,
TP_PROTO(int chip_id, const char *reason, int pmax),
TP_ARGS(chip_id, reason, pmax),
TP_STRUCT__entry(
__field(int, chip_id)
__string(reason, reason)
__field(int, pmax)
),
TP_fast_assign(
__entry->chip_id = chip_id;
__assign_str(reason, reason);
__entry->pmax = pmax;
),
TP_printk("Chip %d Pmax %d %s", __entry->chip_id,
__entry->pmax, __get_str(reason))
);
TRACE_EVENT(pstate_sample, TRACE_EVENT(pstate_sample,
TP_PROTO(u32 core_busy, TP_PROTO(u32 core_busy,
......
...@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o ...@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
/*
* Scheduler code and data structures related to cpufreq.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "sched.h"
DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
* @data: New pointer value.
*
* Set and publish the update_util_data pointer for the given CPU. That pointer
* points to a struct update_util_data object containing a callback function
* to call from cpufreq_update_util(). That function will be called from an RCU
* read-side critical section, so it must not sleep.
*
* Callers must use RCU-sched callbacks to free any memory that might be
* accessed via the old update_util_data pointer or invoke synchronize_sched()
* right after this function to avoid use-after-free.
*/
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
{
if (WARN_ON(data && !data->func))
return;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}
EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
...@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq) ...@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
if (!dl_task(curr) || !on_dl_rq(dl_se)) if (!dl_task(curr) || !on_dl_rq(dl_se))
return; return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
/* /*
* Consumed budget is computed considering the time as * Consumed budget is computed considering the time as
* observed by schedulable tasks (excluding time spent * observed by schedulable tasks (excluding time spent
......
...@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) ...@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
{ {
struct cfs_rq *cfs_rq = cfs_rq_of(se); struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq); u64 now = cfs_rq_clock_task(cfs_rq);
int cpu = cpu_of(rq_of(cfs_rq)); struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
/* /*
* Track task load average for carrying it to new CPU after migrated, and * Track task load average for carrying it to new CPU after migrated, and
...@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) ...@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
update_tg_load_avg(cfs_rq, 0); update_tg_load_avg(cfs_rq, 0);
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
* a real problem -- added to that it only calls on the local
* CPU, so if we enqueue remotely we'll miss an update, but
* the next tick/schedule should update.
*
* It will not get called when we go idle, because the idle
* thread is a different class (!fair), nor will the utilization
* number include things like RT tasks.
*
* As is, the util number is not freq-invariant (we'd have to
* implement arch_scale_freq_capacity() for that).
*
* See cpu_util().
*/
cpufreq_update_util(rq_clock(rq),
min(cfs_rq->avg.util_avg, max), max);
}
} }
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
......
...@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq) ...@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != &rt_sched_class) if (curr->sched_class != &rt_sched_class)
return; return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
delta_exec = rq_clock_task(rq) - curr->se.exec_start; delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0)) if (unlikely((s64)delta_exec <= 0))
return; return;
......
...@@ -1738,3 +1738,51 @@ static inline u64 irq_time_read(int cpu) ...@@ -1738,3 +1738,51 @@ static inline u64 irq_time_read(int cpu)
} }
#endif /* CONFIG_64BIT */ #endif /* CONFIG_64BIT */
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @time: Current time.
* @util: Current utilization.
* @max: Utilization ceiling.
*
* This function is called by the scheduler on every invocation of
* update_load_avg() on the CPU whose utilization is being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*/
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, time, util, max);
}
/**
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
* @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
* being stuck in a completely inadequate performance level for too long.
* That is not guaranteed to happen if the updates are only triggered from CFS,
* though, because they may not be coming in if RT or deadline tasks are active
* all the time (or there are RT and DL tasks only).
*
* As a workaround for that issue, this function is called by the RT and DL
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
static inline void cpufreq_trigger_update(u64 time)
{
cpufreq_update_util(time, ULONG_MAX, 0);
}
#else
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */
...@@ -15,4 +15,5 @@ ...@@ -15,4 +15,5 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册