cpufreq_governor.c 11.2 KB
Newer Older
1 2 3 4 5
/*
 * drivers/cpufreq/cpufreq_governor.c
 *
 * CPUFREQ governors common code
 *
6 7 8 9 10 11
 * Copyright	(C) 2001 Russell King
 *		(C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *		(C) 2003 Jun Nakajima <jun.nakajima@intel.com>
 *		(C) 2009 Alexander Clouter <alex@digriz.org.uk>
 *		(c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
 *
12 13 14 15 16
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

17 18
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

19
#include <asm/cputime.h>
20 21
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
22 23
#include <linux/export.h>
#include <linux/kernel_stat.h>
24
#include <linux/mutex.h>
25
#include <linux/slab.h>
26 27
#include <linux/tick.h>
#include <linux/types.h>
28
#include <linux/workqueue.h>
29
#include <linux/cpu.h>
30 31 32

#include "cpufreq_governor.h"

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
{
	if (have_governor_per_policy())
		return &policy->kobj;
	else
		return cpufreq_global_kobject;
}

static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
{
	if (have_governor_per_policy())
		return dbs_data->cdata->attr_group_gov_pol;
	else
		return dbs_data->cdata->attr_group_gov_sys;
}

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
{
	u64 idle_time;
	u64 cur_wall_time;
	u64 busy_time;

	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());

	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];

	idle_time = cur_wall_time - busy_time;
	if (wall)
66
		*wall = cputime_to_usecs(cur_wall_time);
67

68
	return cputime_to_usecs(idle_time);
69 70
}

71
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
72
{
73
	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
74 75 76

	if (idle_time == -1ULL)
		return get_cpu_idle_time_jiffy(cpu, wall);
77
	else if (!io_busy)
78 79 80 81 82
		idle_time += get_cpu_iowait_time_us(cpu, wall);

	return idle_time;
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time);
83 84 85

void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
{
86
	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
87 88 89 90 91 92 93
	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
	struct cpufreq_policy *policy;
	unsigned int max_load = 0;
	unsigned int ignore_nice;
	unsigned int j;

94
	if (dbs_data->cdata->governor == GOV_ONDEMAND)
95 96 97 98 99 100 101 102 103
		ignore_nice = od_tuners->ignore_nice;
	else
		ignore_nice = cs_tuners->ignore_nice;

	policy = cdbs->cur_policy;

	/* Get Absolute Load (in terms of freq for ondemand gov) */
	for_each_cpu(j, policy->cpus) {
		struct cpu_dbs_common_info *j_cdbs;
104 105
		u64 cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
106
		unsigned int load;
107
		int io_busy = 0;
108

109
		j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
110

111 112 113 114 115 116 117 118 119
		/*
		 * For the purpose of ondemand, waiting for disk IO is
		 * an indication that you're performance critical, and
		 * not that the system is actually idle. So do not add
		 * the iowait time to the cpu idle time.
		 */
		if (dbs_data->cdata->governor == GOV_ONDEMAND)
			io_busy = od_tuners->io_is_busy;
		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

		wall_time = (unsigned int)
			(cur_wall_time - j_cdbs->prev_cpu_wall);
		j_cdbs->prev_cpu_wall = cur_wall_time;

		idle_time = (unsigned int)
			(cur_idle_time - j_cdbs->prev_cpu_idle);
		j_cdbs->prev_cpu_idle = cur_idle_time;

		if (ignore_nice) {
			u64 cur_nice;
			unsigned long cur_nice_jiffies;

			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
					 cdbs->prev_cpu_nice;
			/*
			 * Assumption: nice time between sampling periods will
			 * be less than 2^32 jiffies for 32 bit sys
			 */
			cur_nice_jiffies = (unsigned long)
					cputime64_to_jiffies64(cur_nice);

			cdbs->prev_cpu_nice =
				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			idle_time += jiffies_to_usecs(cur_nice_jiffies);
		}

		if (unlikely(!wall_time || wall_time < idle_time))
			continue;

		load = 100 * (wall_time - idle_time) / wall_time;

152
		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
153 154 155 156 157 158 159 160 161 162 163
			int freq_avg = __cpufreq_driver_getavg(policy, j);
			if (freq_avg <= 0)
				freq_avg = policy->cur;

			load *= freq_avg;
		}

		if (load > max_load)
			max_load = load;
	}

164
	dbs_data->cdata->gov_check_cpu(cpu, max_load);
165 166 167
}
EXPORT_SYMBOL_GPL(dbs_check_cpu);

168 169
static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
		unsigned int delay)
170
{
171
	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
172

173
	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
174 175
}

176 177
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
		unsigned int delay, bool all_cpus)
178
{
179 180 181 182 183
	int i;

	if (!all_cpus) {
		__gov_queue_work(smp_processor_id(), dbs_data, delay);
	} else {
184
		get_online_cpus();
185 186
		for_each_cpu(i, policy->cpus)
			__gov_queue_work(i, dbs_data, delay);
187
		put_online_cpus();
188 189 190 191 192 193 194 195 196
	}
}
EXPORT_SYMBOL_GPL(gov_queue_work);

static inline void gov_cancel_work(struct dbs_data *dbs_data,
		struct cpufreq_policy *policy)
{
	struct cpu_dbs_common_info *cdbs;
	int i;
197

198 199 200 201
	for_each_cpu(i, policy->cpus) {
		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
		cancel_delayed_work_sync(&cdbs->work);
	}
202 203
}

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
/* Will return if we need to evaluate cpu load again or not */
bool need_load_eval(struct cpu_dbs_common_info *cdbs,
		unsigned int sampling_rate)
{
	if (policy_is_shared(cdbs->cur_policy)) {
		ktime_t time_now = ktime_get();
		s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp);

		/* Do nothing if we recently have sampled */
		if (delta_us < (s64)(sampling_rate / 2))
			return false;
		else
			cdbs->time_stamp = time_now;
	}

	return true;
}
EXPORT_SYMBOL_GPL(need_load_eval);

223 224 225 226 227 228 229 230 231 232 233 234 235 236
static void set_sampling_rate(struct dbs_data *dbs_data,
		unsigned int sampling_rate)
{
	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
		cs_tuners->sampling_rate = sampling_rate;
	} else {
		struct od_dbs_tuners *od_tuners = dbs_data->tuners;
		od_tuners->sampling_rate = sampling_rate;
	}
}

int cpufreq_governor_dbs(struct cpufreq_policy *policy,
		struct common_dbs_data *cdata, unsigned int event)
237
{
238
	struct dbs_data *dbs_data;
239 240
	struct od_cpu_dbs_info_s *od_dbs_info = NULL;
	struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
241
	struct od_ops *od_ops = NULL;
242 243
	struct od_dbs_tuners *od_tuners = NULL;
	struct cs_dbs_tuners *cs_tuners = NULL;
244
	struct cpu_dbs_common_info *cpu_cdbs;
245
	unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
246
	int io_busy = 0;
247 248
	int rc;

249 250 251 252 253 254 255 256 257 258 259 260
	if (have_governor_per_policy())
		dbs_data = policy->governor_data;
	else
		dbs_data = cdata->gdbs_data;

	WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT));

	switch (event) {
	case CPUFREQ_GOV_POLICY_INIT:
		if (have_governor_per_policy()) {
			WARN_ON(dbs_data);
		} else if (dbs_data) {
261
			dbs_data->usage_count++;
262 263 264 265 266 267 268 269 270 271 272
			policy->governor_data = dbs_data;
			return 0;
		}

		dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
		if (!dbs_data) {
			pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
			return -ENOMEM;
		}

		dbs_data->cdata = cdata;
273
		dbs_data->usage_count = 1;
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
		rc = cdata->init(dbs_data);
		if (rc) {
			pr_err("%s: POLICY_INIT: init() failed\n", __func__);
			kfree(dbs_data);
			return rc;
		}

		rc = sysfs_create_group(get_governor_parent_kobj(policy),
				get_sysfs_attr(dbs_data));
		if (rc) {
			cdata->exit(dbs_data);
			kfree(dbs_data);
			return rc;
		}

		policy->governor_data = dbs_data;

		/* policy latency is in nS. Convert it to uS first */
		latency = policy->cpuinfo.transition_latency / 1000;
		if (latency == 0)
			latency = 1;

		/* Bring kernel and HW constraints together */
		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
				MIN_LATENCY_MULTIPLIER * latency);
		set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
					latency * LATENCY_MULTIPLIER));

302 303
		if ((cdata->governor == GOV_CONSERVATIVE) &&
				(!policy->governor->initialized)) {
304 305 306 307 308 309 310 311 312 313 314
			struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;

			cpufreq_register_notifier(cs_ops->notifier_block,
					CPUFREQ_TRANSITION_NOTIFIER);
		}

		if (!have_governor_per_policy())
			cdata->gdbs_data = dbs_data;

		return 0;
	case CPUFREQ_GOV_POLICY_EXIT:
315
		if (!--dbs_data->usage_count) {
316 317 318
			sysfs_remove_group(get_governor_parent_kobj(policy),
					get_sysfs_attr(dbs_data));

319 320
			if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) &&
				(policy->governor->initialized == 1)) {
321 322 323 324 325 326 327 328 329 330
				struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;

				cpufreq_unregister_notifier(cs_ops->notifier_block,
						CPUFREQ_TRANSITION_NOTIFIER);
			}

			cdata->exit(dbs_data);
			kfree(dbs_data);
			cdata->gdbs_data = NULL;
		}
331

332 333 334 335 336 337 338 339 340 341
		policy->governor_data = NULL;
		return 0;
	}

	cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);

	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
		cs_tuners = dbs_data->tuners;
		cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
		sampling_rate = cs_tuners->sampling_rate;
342 343
		ignore_nice = cs_tuners->ignore_nice;
	} else {
344 345 346
		od_tuners = dbs_data->tuners;
		od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
		sampling_rate = od_tuners->sampling_rate;
347
		ignore_nice = od_tuners->ignore_nice;
348
		od_ops = dbs_data->cdata->gov_ops;
349
		io_busy = od_tuners->io_is_busy;
350 351 352 353
	}

	switch (event) {
	case CPUFREQ_GOV_START:
354
		if (!policy->cur)
355 356 357 358 359
			return -EINVAL;

		mutex_lock(&dbs_data->mutex);

		for_each_cpu(j, policy->cpus) {
360
			struct cpu_dbs_common_info *j_cdbs =
361
				dbs_data->cdata->get_cpu_cdbs(j);
362

363
			j_cdbs->cpu = j;
364 365
			j_cdbs->cur_policy = policy;
			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
366
					       &j_cdbs->prev_cpu_wall, io_busy);
367 368 369
			if (ignore_nice)
				j_cdbs->prev_cpu_nice =
					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
370 371 372

			mutex_init(&j_cdbs->timer_mutex);
			INIT_DEFERRABLE_WORK(&j_cdbs->work,
373
					     dbs_data->cdata->gov_dbs_timer);
374 375 376 377 378 379
		}

		/*
		 * conservative does not implement micro like ondemand
		 * governor, thus we are bound to jiffes/HZ
		 */
380
		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
381 382 383
			cs_dbs_info->down_skip = 0;
			cs_dbs_info->enable = 1;
			cs_dbs_info->requested_freq = policy->cur;
384
		} else {
385 386 387
			od_dbs_info->rate_mult = 1;
			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
			od_ops->powersave_bias_init_cpu(cpu);
388 389 390 391
		}

		mutex_unlock(&dbs_data->mutex);

392 393
		/* Initiate timer time stamp */
		cpu_cdbs->time_stamp = ktime_get();
394

395 396
		gov_queue_work(dbs_data, policy,
				delay_for_sampling_rate(sampling_rate), true);
397 398 399
		break;

	case CPUFREQ_GOV_STOP:
400
		if (dbs_data->cdata->governor == GOV_CONSERVATIVE)
401 402
			cs_dbs_info->enable = 0;

403
		gov_cancel_work(dbs_data, policy);
404 405 406

		mutex_lock(&dbs_data->mutex);
		mutex_destroy(&cpu_cdbs->timer_mutex);
407

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
		mutex_unlock(&dbs_data->mutex);

		break;

	case CPUFREQ_GOV_LIMITS:
		mutex_lock(&cpu_cdbs->timer_mutex);
		if (policy->max < cpu_cdbs->cur_policy->cur)
			__cpufreq_driver_target(cpu_cdbs->cur_policy,
					policy->max, CPUFREQ_RELATION_H);
		else if (policy->min > cpu_cdbs->cur_policy->cur)
			__cpufreq_driver_target(cpu_cdbs->cur_policy,
					policy->min, CPUFREQ_RELATION_L);
		dbs_check_cpu(dbs_data, cpu);
		mutex_unlock(&cpu_cdbs->timer_mutex);
		break;
	}
	return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);