提交 2652df3a 编写于 作者: R Rafael J. Wysocki

Merge branch 'pm-cpufreq'

Additional cpufreq updates for 4.18-rc1: fixes and cleanups in the
core and drivers and intel_pstate extension to do iowait boosting
on systems with HWP that improves performance quite a bit.

* pm-cpufreq:
  cpufreq: imx6q: check speed grades for i.MX6ULL
  cpufreq: governors: Fix long idle detection logic in load calculation
  cpufreq: intel_pstate: enable boost for Skylake Xeon
  cpufreq: intel_pstate: New sysfs entry to control HWP boost
  cpufreq: intel_pstate: HWP boost performance on IO wakeup
  cpufreq: intel_pstate: Add HWP boost utility and sched util hooks
  cpufreq: ti-cpufreq: Use devres managed API in probe()
  cpufreq: ti-cpufreq: Fix an incorrect error return value
  cpufreq: ACPI: make function acpi_cpufreq_fast_switch() static
  cpufreq: kryo: allow building as a loadable module
......@@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ
default ARCH_OMAP2PLUS
config ARM_QCOM_CPUFREQ_KRYO
bool "Qualcomm Kryo based CPUFreq"
tristate "Qualcomm Kryo based CPUFreq"
depends on ARM64
depends on QCOM_QFPROM
depends on QCOM_SMEM
......
......@@ -465,7 +465,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
return result;
}
unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
unsigned int target_freq)
{
struct acpi_cpufreq_data *data = policy->driver_data;
......
......@@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
* calls, so the previous load value can be used then.
*/
load = j_cdbs->prev_load;
} else if (unlikely(time_elapsed > 2 * sampling_rate &&
} else if (unlikely((int)idle_time > 2 * sampling_rate &&
j_cdbs->prev_load)) {
/*
* If the CPU had gone completely idle and a task has
......@@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
* clear prev_load to guarantee that the load will be
* computed again next time.
*
* Detecting this situation is easy: the governor's
* utilization update handler would not have run during
* CPU-idle periods. Hence, an unusually large
* 'time_elapsed' (as compared to the sampling rate)
* Detecting this situation is easy: an unusually large
* 'idle_time' (as compared to the sampling rate)
* indicates this scenario.
*/
load = j_cdbs->prev_load;
......@@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
j_cdbs->prev_load = load;
}
if (time_elapsed > 2 * sampling_rate) {
unsigned int periods = time_elapsed / sampling_rate;
if (unlikely((int)idle_time > 2 * sampling_rate)) {
unsigned int periods = idle_time / sampling_rate;
if (periods < idle_periods)
idle_periods = periods;
......
......@@ -266,6 +266,8 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
}
#define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2
#define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2
#define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3
static void imx6ul_opp_check_speed_grading(struct device *dev)
{
......@@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev)
* Speed GRADING[1:0] defines the max speed of ARM:
* 2b'00: Reserved;
* 2b'01: 528000000Hz;
* 2b'10: 696000000Hz;
* 2b'11: Reserved;
* 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL;
* 2b'11: 900000000Hz on i.MX6ULL only;
* We need to set the max speed of ARM according to fuse map.
*/
val = readl_relaxed(base + OCOTP_CFG3);
val >>= OCOTP_CFG3_SPEED_SHIFT;
val &= 0x3;
if (of_machine_is_compatible("fsl,imx6ul")) {
if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
if (dev_pm_opp_disable(dev, 696000000))
dev_warn(dev, "failed to disable 696MHz OPP\n");
}
if (of_machine_is_compatible("fsl,imx6ull")) {
if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ)
if (dev_pm_opp_disable(dev, 792000000))
dev_warn(dev, "failed to disable 792MHz OPP\n");
if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ)
if (dev_pm_opp_disable(dev, 900000000))
dev_warn(dev, "failed to disable 900MHz OPP\n");
}
iounmap(base);
put_node:
of_node_put(np);
......@@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
goto put_reg;
}
if (of_machine_is_compatible("fsl,imx6ul"))
if (of_machine_is_compatible("fsl,imx6ul") ||
of_machine_is_compatible("fsl,imx6ull"))
imx6ul_opp_check_speed_grading(cpu_dev);
else
imx6q_opp_check_speed_grading(cpu_dev);
......
......@@ -221,6 +221,11 @@ struct global_params {
* preference/bias
* @epp_saved: Saved EPP/EPB during system suspend or CPU offline
* operation
* @hwp_req_cached: Cached value of the last HWP Request MSR
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
* @last_io_update: Last time when IO wake flag was set
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
*
* This structure stores per CPU instance data for all CPUs.
*/
......@@ -253,6 +258,11 @@ struct cpudata {
s16 epp_policy;
s16 epp_default;
s16 epp_saved;
u64 hwp_req_cached;
u64 hwp_cap_cached;
u64 last_io_update;
unsigned int sched_flags;
u32 hwp_boost_min;
};
static struct cpudata **all_cpu_data;
......@@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
static int hwp_active __read_mostly;
static bool per_cpu_limits __read_mostly;
static bool hwp_boost __read_mostly;
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
......@@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
u64 cap;
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
if (global.no_turbo)
*current_max = HWP_GUARANTEED_PERF(cap);
else
......@@ -763,6 +775,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
intel_pstate_set_epb(cpu, epp);
}
skip_epp:
WRITE_ONCE(cpu_data->hwp_req_cached, value);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
......@@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
return count;
}
static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
struct attribute *attr, char *buf)
{
return sprintf(buf, "%u\n", hwp_boost);
}
static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
const char *buf, size_t count)
{
unsigned int input;
int ret;
ret = kstrtouint(buf, 10, &input);
if (ret)
return ret;
mutex_lock(&intel_pstate_driver_lock);
hwp_boost = !!input;
intel_pstate_update_policies();
mutex_unlock(&intel_pstate_driver_lock);
return count;
}
show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);
......@@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct);
define_one_global_rw(min_perf_pct);
define_one_global_ro(turbo_pct);
define_one_global_ro(num_pstates);
define_one_global_rw(hwp_dynamic_boost);
static struct attribute *intel_pstate_attributes[] = {
&status.attr,
......@@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
WARN_ON(rc);
if (hwp_active) {
rc = sysfs_create_file(intel_pstate_kobject,
&hwp_dynamic_boost.attr);
WARN_ON(rc);
}
}
/************************** sysfs end ************************/
......@@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
intel_pstate_set_min_pstate(cpu);
}
/*
* Long hold time will keep high perf limits for long time,
* which negatively impacts perf/watt for some workloads,
* like specpower. 3ms is based on experiements on some
* workoads.
*/
static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
{
u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
u32 max_limit = (hwp_req & 0xff00) >> 8;
u32 min_limit = (hwp_req & 0xff);
u32 boost_level1;
/*
* Cases to consider (User changes via sysfs or boot time):
* If, P0 (Turbo max) = P1 (Guaranteed max) = min:
* No boost, return.
* If, P0 (Turbo max) > P1 (Guaranteed max) = min:
* Should result in one level boost only for P0.
* If, P0 (Turbo max) = P1 (Guaranteed max) > min:
* Should result in two level boost:
* (min + p1)/2 and P1.
* If, P0 (Turbo max) > P1 (Guaranteed max) > min:
* Should result in three level boost:
* (min + p1)/2, P1 and P0.
*/
/* If max and min are equal or already at max, nothing to boost */
if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
return;
if (!cpu->hwp_boost_min)
cpu->hwp_boost_min = min_limit;
/* level at half way mark between min and guranteed */
boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
if (cpu->hwp_boost_min < boost_level1)
cpu->hwp_boost_min = boost_level1;
else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
cpu->hwp_boost_min = max_limit;
else
return;
hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
wrmsrl(MSR_HWP_REQUEST, hwp_req);
cpu->last_update = cpu->sample.time;
}
static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
{
if (cpu->hwp_boost_min) {
bool expired;
/* Check if we are idle for hold time to boost down */
expired = time_after64(cpu->sample.time, cpu->last_update +
hwp_boost_hold_time_ns);
if (expired) {
wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
cpu->hwp_boost_min = 0;
}
}
cpu->last_update = cpu->sample.time;
}
static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
u64 time)
{
cpu->sample.time = time;
if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
bool do_io = false;
cpu->sched_flags = 0;
/*
* Set iowait_boost flag and update time. Since IO WAIT flag
* is set all the time, we can't just conclude that there is
* some IO bound activity is scheduled on this CPU with just
* one occurrence. If we receive at least two in two
* consecutive ticks, then we treat as boost candidate.
*/
if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
do_io = true;
cpu->last_io_update = time;
if (do_io)
intel_pstate_hwp_boost_up(cpu);
} else {
intel_pstate_hwp_boost_down(cpu);
}
}
static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
u64 time, unsigned int flags)
{
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
cpu->sched_flags |= flags;
if (smp_processor_id() == cpu->cpu)
intel_pstate_update_util_hwp_local(cpu, time);
}
static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
......@@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
{}
};
static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
{}
};
static int intel_pstate_init_cpu(unsigned int cpunum)
{
struct cpudata *cpu;
......@@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
intel_pstate_disable_ee(cpunum);
intel_pstate_hwp_enable(cpu);
id = x86_match_cpu(intel_pstate_hwp_boost_ids);
if (id)
hwp_boost = true;
}
intel_pstate_get_cpu_pstates(cpu);
......@@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
{
struct cpudata *cpu = all_cpu_data[cpu_num];
if (hwp_active)
if (hwp_active && !hwp_boost)
return;
if (cpu->update_util_set)
......@@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
/* Prevent intel_pstate_update_util() from using stale data. */
cpu->sample.time = 0;
cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
intel_pstate_update_util);
(hwp_active ?
intel_pstate_update_util_hwp :
intel_pstate_update_util));
cpu->update_util_set = true;
}
......@@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
intel_pstate_set_update_util_hook(policy->cpu);
}
if (hwp_active)
if (hwp_active) {
/*
* When hwp_boost was active before and dynamically it
* was turned off, in that case we need to clear the
* update util hook.
*/
if (!hwp_boost)
intel_pstate_clear_update_util_hook(policy->cpu);
intel_pstate_hwp_set(policy->cpu);
}
mutex_unlock(&intel_pstate_limits_lock);
......
......@@ -217,7 +217,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
if (!match)
return -ENODEV;
opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL);
opp_data = devm_kzalloc(&pdev->dev, sizeof(*opp_data), GFP_KERNEL);
if (!opp_data)
return -ENOMEM;
......@@ -226,8 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
opp_data->cpu_dev = get_cpu_device(0);
if (!opp_data->cpu_dev) {
pr_err("%s: Failed to get device for CPU0\n", __func__);
ret = ENODEV;
goto free_opp_data;
return -ENODEV;
}
opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev);
......@@ -285,8 +284,6 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
fail_put_node:
of_node_put(opp_data->opp_node);
free_opp_data:
kfree(opp_data);
return ret;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册