cpufreq_governor.c 11.0 KB
Newer Older
1 2 3 4 5
/*
 * drivers/cpufreq/cpufreq_governor.c
 *
 * CPUFREQ governors common code
 *
6 7 8 9 10 11
 * Copyright	(C) 2001 Russell King
 *		(C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *		(C) 2003 Jun Nakajima <jun.nakajima@intel.com>
 *		(C) 2009 Alexander Clouter <alex@digriz.org.uk>
 *		(c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
 *
12 13 14 15 16
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

17 18
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

19
#include <asm/cputime.h>
20 21
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
22 23
#include <linux/export.h>
#include <linux/kernel_stat.h>
24
#include <linux/mutex.h>
25
#include <linux/slab.h>
26 27
#include <linux/tick.h>
#include <linux/types.h>
28 29 30 31
#include <linux/workqueue.h>

#include "cpufreq_governor.h"

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
{
	if (have_governor_per_policy())
		return &policy->kobj;
	else
		return cpufreq_global_kobject;
}

static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
{
	if (have_governor_per_policy())
		return dbs_data->cdata->attr_group_gov_pol;
	else
		return dbs_data->cdata->attr_group_gov_sys;
}

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
{
	u64 idle_time;
	u64 cur_wall_time;
	u64 busy_time;

	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());

	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];

	idle_time = cur_wall_time - busy_time;
	if (wall)
65
		*wall = cputime_to_usecs(cur_wall_time);
66

67
	return cputime_to_usecs(idle_time);
68 69
}

70
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
71
{
72
	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
73 74 75

	if (idle_time == -1ULL)
		return get_cpu_idle_time_jiffy(cpu, wall);
76
	else if (!io_busy)
77 78 79 80 81
		idle_time += get_cpu_iowait_time_us(cpu, wall);

	return idle_time;
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time);
82 83 84

void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
{
85
	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
86 87 88 89 90 91 92
	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
	struct cpufreq_policy *policy;
	unsigned int max_load = 0;
	unsigned int ignore_nice;
	unsigned int j;

93
	if (dbs_data->cdata->governor == GOV_ONDEMAND)
94 95 96 97 98 99 100 101 102
		ignore_nice = od_tuners->ignore_nice;
	else
		ignore_nice = cs_tuners->ignore_nice;

	policy = cdbs->cur_policy;

	/* Get Absolute Load (in terms of freq for ondemand gov) */
	for_each_cpu(j, policy->cpus) {
		struct cpu_dbs_common_info *j_cdbs;
103 104
		u64 cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
105
		unsigned int load;
106
		int io_busy = 0;
107

108
		j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
109

110 111 112 113 114 115 116 117 118
		/*
		 * For the purpose of ondemand, waiting for disk IO is
		 * an indication that you're performance critical, and
		 * not that the system is actually idle. So do not add
		 * the iowait time to the cpu idle time.
		 */
		if (dbs_data->cdata->governor == GOV_ONDEMAND)
			io_busy = od_tuners->io_is_busy;
		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150

		wall_time = (unsigned int)
			(cur_wall_time - j_cdbs->prev_cpu_wall);
		j_cdbs->prev_cpu_wall = cur_wall_time;

		idle_time = (unsigned int)
			(cur_idle_time - j_cdbs->prev_cpu_idle);
		j_cdbs->prev_cpu_idle = cur_idle_time;

		if (ignore_nice) {
			u64 cur_nice;
			unsigned long cur_nice_jiffies;

			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
					 cdbs->prev_cpu_nice;
			/*
			 * Assumption: nice time between sampling periods will
			 * be less than 2^32 jiffies for 32 bit sys
			 */
			cur_nice_jiffies = (unsigned long)
					cputime64_to_jiffies64(cur_nice);

			cdbs->prev_cpu_nice =
				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			idle_time += jiffies_to_usecs(cur_nice_jiffies);
		}

		if (unlikely(!wall_time || wall_time < idle_time))
			continue;

		load = 100 * (wall_time - idle_time) / wall_time;

151
		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
152 153 154 155 156 157 158 159 160 161 162
			int freq_avg = __cpufreq_driver_getavg(policy, j);
			if (freq_avg <= 0)
				freq_avg = policy->cur;

			load *= freq_avg;
		}

		if (load > max_load)
			max_load = load;
	}

163
	dbs_data->cdata->gov_check_cpu(cpu, max_load);
164 165 166
}
EXPORT_SYMBOL_GPL(dbs_check_cpu);

167 168
static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
		unsigned int delay)
169
{
170
	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
171

172
	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
173 174
}

175 176
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
		unsigned int delay, bool all_cpus)
177
{
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
	int i;

	if (!all_cpus) {
		__gov_queue_work(smp_processor_id(), dbs_data, delay);
	} else {
		for_each_cpu(i, policy->cpus)
			__gov_queue_work(i, dbs_data, delay);
	}
}
EXPORT_SYMBOL_GPL(gov_queue_work);

static inline void gov_cancel_work(struct dbs_data *dbs_data,
		struct cpufreq_policy *policy)
{
	struct cpu_dbs_common_info *cdbs;
	int i;
194

195 196 197 198
	for_each_cpu(i, policy->cpus) {
		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
		cancel_delayed_work_sync(&cdbs->work);
	}
199 200
}

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
/* Will return if we need to evaluate cpu load again or not */
bool need_load_eval(struct cpu_dbs_common_info *cdbs,
		unsigned int sampling_rate)
{
	if (policy_is_shared(cdbs->cur_policy)) {
		ktime_t time_now = ktime_get();
		s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp);

		/* Do nothing if we recently have sampled */
		if (delta_us < (s64)(sampling_rate / 2))
			return false;
		else
			cdbs->time_stamp = time_now;
	}

	return true;
}
EXPORT_SYMBOL_GPL(need_load_eval);

220 221 222 223 224 225 226 227 228 229 230 231 232 233
static void set_sampling_rate(struct dbs_data *dbs_data,
		unsigned int sampling_rate)
{
	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
		cs_tuners->sampling_rate = sampling_rate;
	} else {
		struct od_dbs_tuners *od_tuners = dbs_data->tuners;
		od_tuners->sampling_rate = sampling_rate;
	}
}

int cpufreq_governor_dbs(struct cpufreq_policy *policy,
		struct common_dbs_data *cdata, unsigned int event)
234
{
235
	struct dbs_data *dbs_data;
236 237
	struct od_cpu_dbs_info_s *od_dbs_info = NULL;
	struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
238
	struct od_ops *od_ops = NULL;
239 240
	struct od_dbs_tuners *od_tuners = NULL;
	struct cs_dbs_tuners *cs_tuners = NULL;
241
	struct cpu_dbs_common_info *cpu_cdbs;
242
	unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
243
	int io_busy = 0;
244 245
	int rc;

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
	if (have_governor_per_policy())
		dbs_data = policy->governor_data;
	else
		dbs_data = cdata->gdbs_data;

	WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT));

	switch (event) {
	case CPUFREQ_GOV_POLICY_INIT:
		if (have_governor_per_policy()) {
			WARN_ON(dbs_data);
		} else if (dbs_data) {
			policy->governor_data = dbs_data;
			return 0;
		}

		dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
		if (!dbs_data) {
			pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
			return -ENOMEM;
		}

		dbs_data->cdata = cdata;
		rc = cdata->init(dbs_data);
		if (rc) {
			pr_err("%s: POLICY_INIT: init() failed\n", __func__);
			kfree(dbs_data);
			return rc;
		}

		rc = sysfs_create_group(get_governor_parent_kobj(policy),
				get_sysfs_attr(dbs_data));
		if (rc) {
			cdata->exit(dbs_data);
			kfree(dbs_data);
			return rc;
		}

		policy->governor_data = dbs_data;

		/* policy latency is in nS. Convert it to uS first */
		latency = policy->cpuinfo.transition_latency / 1000;
		if (latency == 0)
			latency = 1;

		/* Bring kernel and HW constraints together */
		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
				MIN_LATENCY_MULTIPLIER * latency);
		set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
					latency * LATENCY_MULTIPLIER));

		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
			struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;

			cpufreq_register_notifier(cs_ops->notifier_block,
					CPUFREQ_TRANSITION_NOTIFIER);
		}

		if (!have_governor_per_policy())
			cdata->gdbs_data = dbs_data;

		return 0;
	case CPUFREQ_GOV_POLICY_EXIT:
		if ((policy->governor->initialized == 1) ||
				have_governor_per_policy()) {
			sysfs_remove_group(get_governor_parent_kobj(policy),
					get_sysfs_attr(dbs_data));

			if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
				struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;

				cpufreq_unregister_notifier(cs_ops->notifier_block,
						CPUFREQ_TRANSITION_NOTIFIER);
			}

			cdata->exit(dbs_data);
			kfree(dbs_data);
			cdata->gdbs_data = NULL;
		}
325

326 327 328 329 330 331 332 333 334 335
		policy->governor_data = NULL;
		return 0;
	}

	cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);

	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
		cs_tuners = dbs_data->tuners;
		cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
		sampling_rate = cs_tuners->sampling_rate;
336 337
		ignore_nice = cs_tuners->ignore_nice;
	} else {
338 339 340
		od_tuners = dbs_data->tuners;
		od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
		sampling_rate = od_tuners->sampling_rate;
341
		ignore_nice = od_tuners->ignore_nice;
342
		od_ops = dbs_data->cdata->gov_ops;
343
		io_busy = od_tuners->io_is_busy;
344 345 346 347
	}

	switch (event) {
	case CPUFREQ_GOV_START:
348
		if (!policy->cur)
349 350 351 352 353
			return -EINVAL;

		mutex_lock(&dbs_data->mutex);

		for_each_cpu(j, policy->cpus) {
354
			struct cpu_dbs_common_info *j_cdbs =
355
				dbs_data->cdata->get_cpu_cdbs(j);
356

357
			j_cdbs->cpu = j;
358 359
			j_cdbs->cur_policy = policy;
			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
360
					       &j_cdbs->prev_cpu_wall, io_busy);
361 362 363
			if (ignore_nice)
				j_cdbs->prev_cpu_nice =
					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
364 365 366

			mutex_init(&j_cdbs->timer_mutex);
			INIT_DEFERRABLE_WORK(&j_cdbs->work,
367
					     dbs_data->cdata->gov_dbs_timer);
368 369 370 371 372 373
		}

		/*
		 * conservative does not implement micro like ondemand
		 * governor, thus we are bound to jiffes/HZ
		 */
374
		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
375 376 377
			cs_dbs_info->down_skip = 0;
			cs_dbs_info->enable = 1;
			cs_dbs_info->requested_freq = policy->cur;
378
		} else {
379 380 381
			od_dbs_info->rate_mult = 1;
			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
			od_ops->powersave_bias_init_cpu(cpu);
382 383 384 385
		}

		mutex_unlock(&dbs_data->mutex);

386 387
		/* Initiate timer time stamp */
		cpu_cdbs->time_stamp = ktime_get();
388

389 390
		gov_queue_work(dbs_data, policy,
				delay_for_sampling_rate(sampling_rate), true);
391 392 393
		break;

	case CPUFREQ_GOV_STOP:
394
		if (dbs_data->cdata->governor == GOV_CONSERVATIVE)
395 396
			cs_dbs_info->enable = 0;

397
		gov_cancel_work(dbs_data, policy);
398 399 400

		mutex_lock(&dbs_data->mutex);
		mutex_destroy(&cpu_cdbs->timer_mutex);
401

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
		mutex_unlock(&dbs_data->mutex);

		break;

	case CPUFREQ_GOV_LIMITS:
		mutex_lock(&cpu_cdbs->timer_mutex);
		if (policy->max < cpu_cdbs->cur_policy->cur)
			__cpufreq_driver_target(cpu_cdbs->cur_policy,
					policy->max, CPUFREQ_RELATION_H);
		else if (policy->min > cpu_cdbs->cur_policy->cur)
			__cpufreq_driver_target(cpu_cdbs->cur_policy,
					policy->min, CPUFREQ_RELATION_L);
		dbs_check_cpu(dbs_data, cpu);
		mutex_unlock(&cpu_cdbs->timer_mutex);
		break;
	}
	return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);