cpufreq_ondemand.c 19.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  drivers/cpufreq/cpufreq_ondemand.c
 *
 *  Copyright (C)  2001 Russell King
 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *                      Jun Nakajima <jun.nakajima@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
A
Andrew Morton 已提交
17
#include <linux/cpu.h>
L
Linus Torvalds 已提交
18 19
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
20
#include <linux/mutex.h>
21 22 23
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/ktime.h>
24
#include <linux/sched.h>
L
Linus Torvalds 已提交
25 26 27 28 29 30

/*
 * dbs is used in this file as a shortform for demandbased switching
 * It helps to keep variable names smaller, simpler
 */

31
#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
L
Linus Torvalds 已提交
32
#define DEF_FREQUENCY_UP_THRESHOLD		(80)
33 34
#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
35
#define MIN_FREQUENCY_UP_THRESHOLD		(11)
L
Linus Torvalds 已提交
36 37
#define MAX_FREQUENCY_UP_THRESHOLD		(100)

38 39
/*
 * The polling frequency of this governor depends on the capability of
L
Linus Torvalds 已提交
40
 * the processor. Default polling frequency is 1000 times the transition
41 42
 * latency of the processor. The governor will work on any processor with
 * transition latency <= 10mS, using appropriate sampling
L
Linus Torvalds 已提交
43 44 45 46 47
 * rate.
 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
 * this governor will not work.
 * All times here are in uS.
 */
48
static unsigned int def_sampling_rate;
49 50
#define MIN_SAMPLING_RATE_RATIO			(2)
/* for correct statistics, we need at least 10 ticks between each measure */
51 52 53 54
#define MIN_STAT_SAMPLING_RATE 			\
			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
#define MIN_SAMPLING_RATE			\
			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
55 56 57 58 59 60 61 62 63 64 65 66
/* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon
 * Define the minimal settable sampling rate to the greater of:
 *   - "HW transition latency" * 100 (same as default sampling / 10)
 *   - MIN_STAT_SAMPLING_RATE
 * To avoid that userspace shoots itself.
*/
static unsigned int minimum_sampling_rate(void)
{
	return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE);
}

/* This will also vanish soon with removing sampling_rate_max */
L
Linus Torvalds 已提交
67
#define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
68
#define LATENCY_MULTIPLIER			(1000)
69
#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
L
Linus Torvalds 已提交
70

D
David Howells 已提交
71 72 73
static void do_dbs_timer(struct work_struct *work);

/* Sampling types */
74
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
L
Linus Torvalds 已提交
75 76

struct cpu_dbs_info_s {
77 78
	cputime64_t prev_cpu_idle;
	cputime64_t prev_cpu_wall;
79
	cputime64_t prev_cpu_nice;
80
	struct cpufreq_policy *cur_policy;
81
	struct delayed_work work;
82 83 84 85
	struct cpufreq_frequency_table *freq_table;
	unsigned int freq_lo;
	unsigned int freq_lo_jiffies;
	unsigned int freq_hi_jiffies;
86 87
	int cpu;
	unsigned int enable:1,
88
		sample_type:1;
L
Linus Torvalds 已提交
89 90 91 92 93
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);

static unsigned int dbs_enable;	/* number of CPUs using this policy */

94 95 96 97 98 99 100 101
/*
 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
 * lock and dbs_mutex. cpu_hotplug lock should always be held before
 * dbs_mutex. If any function that can potentially take cpu_hotplug lock
 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
 * is recursive for the same process. -Venki
 */
102
static DEFINE_MUTEX(dbs_mutex);
L
Linus Torvalds 已提交
103

104
static struct workqueue_struct	*kondemand_wq;
105

106
static struct dbs_tuners {
107 108
	unsigned int sampling_rate;
	unsigned int up_threshold;
109
	unsigned int down_differential;
110
	unsigned int ignore_nice;
111 112
	unsigned int powersave_bias;
} dbs_tuners_ins = {
113
	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
114
	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
115
	.ignore_nice = 0,
116
	.powersave_bias = 0,
L
Linus Torvalds 已提交
117 118
};

119 120
static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
							cputime64_t *wall)
121
{
122
	cputime64_t idle_time;
123
	cputime64_t cur_wall_time;
124
	cputime64_t busy_time;
125

126
	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
127 128
	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
			kstat_cpu(cpu).cpustat.system);
129

130 131 132
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
133
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
134

135 136 137 138
	idle_time = cputime64_sub(cur_wall_time, busy_time);
	if (wall)
		*wall = cur_wall_time;

139
	return idle_time;
140 141
}

142 143 144 145 146 147 148 149 150 151
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
{
	u64 idle_time = get_cpu_idle_time_us(cpu, wall);

	if (idle_time == -1ULL)
		return get_cpu_idle_time_jiffy(cpu, wall);

	return idle_time;
}

152 153 154 155 156
/*
 * Find right freq to be set now with powersave_bias on.
 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 */
157 158 159
static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
					  unsigned int freq_next,
					  unsigned int relation)
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
{
	unsigned int freq_req, freq_reduc, freq_avg;
	unsigned int freq_hi, freq_lo;
	unsigned int index = 0;
	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);

	if (!dbs_info->freq_table) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_next;
	}

	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
			relation, &index);
	freq_req = dbs_info->freq_table[index].frequency;
	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
	freq_avg = freq_req - freq_reduc;

	/* Find freq bounds for freq_avg in freq_table */
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_H, &index);
	freq_lo = dbs_info->freq_table[index].frequency;
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_L, &index);
	freq_hi = dbs_info->freq_table[index].frequency;

	/* Find out how long we have to be in hi and lo freqs */
	if (freq_hi == freq_lo) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_lo;
	}
	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
	jiffies_hi += ((freq_hi - freq_lo) / 2);
	jiffies_hi /= (freq_hi - freq_lo);
	jiffies_lo = jiffies_total - jiffies_hi;
	dbs_info->freq_lo = freq_lo;
	dbs_info->freq_lo_jiffies = jiffies_lo;
	dbs_info->freq_hi_jiffies = jiffies_hi;
	return freq_hi;
}

static void ondemand_powersave_bias_init(void)
{
	int i;
	for_each_online_cpu(i) {
		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
		dbs_info->freq_table = cpufreq_frequency_get_table(i);
		dbs_info->freq_lo = 0;
	}
}

L
Linus Torvalds 已提交
216 217 218
/************************** sysfs interface ************************/
static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
{
219 220 221 222 223 224 225 226
	static int print_once;

	if (!print_once) {
		printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
		       "sysfs file is deprecated - used by: %s\n",
		       current->comm);
		print_once = 1;
	}
227
	return sprintf(buf, "%u\n", MAX_SAMPLING_RATE);
L
Linus Torvalds 已提交
228 229 230 231
}

static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
{
232 233 234 235 236 237 238 239
	static int print_once;

	if (!print_once) {
		printk(KERN_INFO "CPUFREQ: ondemand sampling_rate_min "
		       "sysfs file is deprecated - used by: %s\n",
		       current->comm);
		print_once = 1;
	}
240
	return sprintf(buf, "%u\n", MIN_SAMPLING_RATE);
L
Linus Torvalds 已提交
241 242
}

243 244
#define define_one_ro(_name)		\
static struct freq_attr _name =		\
L
Linus Torvalds 已提交
245 246 247 248 249 250 251 252 253 254 255 256 257 258
__ATTR(_name, 0444, show_##_name, NULL)

define_one_ro(sampling_rate_max);
define_one_ro(sampling_rate_min);

/* cpufreq_ondemand Governor Tunables */
#define show_one(file_name, object)					\
static ssize_t show_##file_name						\
(struct cpufreq_policy *unused, char *buf)				\
{									\
	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
}
show_one(sampling_rate, sampling_rate);
show_one(up_threshold, up_threshold);
259
show_one(ignore_nice_load, ignore_nice);
260
show_one(powersave_bias, powersave_bias);
L
Linus Torvalds 已提交
261

262
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
263 264 265 266
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
267
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
268

269
	mutex_lock(&dbs_mutex);
270
	if (ret != 1) {
271
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
272 273
		return -EINVAL;
	}
274
	dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate());
275
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
276 277 278 279

	return count;
}

280
static ssize_t store_up_threshold(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
281 282 283 284
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
285
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
286

287
	mutex_lock(&dbs_mutex);
288
	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
289
			input < MIN_FREQUENCY_UP_THRESHOLD) {
290
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
291 292 293 294
		return -EINVAL;
	}

	dbs_tuners_ins.up_threshold = input;
295
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
296 297 298 299

	return count;
}

300
static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
301 302 303 304 305 306
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;

	unsigned int j;
307

308
	ret = sscanf(buf, "%u", &input);
309
	if (ret != 1)
310 311
		return -EINVAL;

312
	if (input > 1)
313
		input = 1;
314

315
	mutex_lock(&dbs_mutex);
316
	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
317
		mutex_unlock(&dbs_mutex);
318 319 320 321
		return count;
	}
	dbs_tuners_ins.ignore_nice = input;

322
	/* we need to re-evaluate prev_cpu_idle */
323
	for_each_online_cpu(j) {
324 325
		struct cpu_dbs_info_s *dbs_info;
		dbs_info = &per_cpu(cpu_dbs_info, j);
326 327
		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&dbs_info->prev_cpu_wall);
328 329 330
		if (dbs_tuners_ins.ignore_nice)
			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;

331
	}
332
	mutex_unlock(&dbs_mutex);
333 334 335 336

	return count;
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
	ret = sscanf(buf, "%u", &input);

	if (ret != 1)
		return -EINVAL;

	if (input > 1000)
		input = 1000;

	mutex_lock(&dbs_mutex);
	dbs_tuners_ins.powersave_bias = input;
	ondemand_powersave_bias_init();
	mutex_unlock(&dbs_mutex);

	return count;
}

L
Linus Torvalds 已提交
358 359 360 361 362 363
#define define_one_rw(_name) \
static struct freq_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)

define_one_rw(sampling_rate);
define_one_rw(up_threshold);
364
define_one_rw(ignore_nice_load);
365
define_one_rw(powersave_bias);
L
Linus Torvalds 已提交
366

367
static struct attribute *dbs_attributes[] = {
L
Linus Torvalds 已提交
368 369 370 371
	&sampling_rate_max.attr,
	&sampling_rate_min.attr,
	&sampling_rate.attr,
	&up_threshold.attr,
372
	&ignore_nice_load.attr,
373
	&powersave_bias.attr,
L
Linus Torvalds 已提交
374 375 376 377 378 379 380 381 382 383
	NULL
};

static struct attribute_group dbs_attr_group = {
	.attrs = dbs_attributes,
	.name = "ondemand",
};

/************************** sysfs end ************************/

384
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
L
Linus Torvalds 已提交
385
{
386
	unsigned int max_load_freq;
L
Linus Torvalds 已提交
387 388 389 390 391 392 393

	struct cpufreq_policy *policy;
	unsigned int j;

	if (!this_dbs_info->enable)
		return;

394
	this_dbs_info->freq_lo = 0;
L
Linus Torvalds 已提交
395
	policy = this_dbs_info->cur_policy;
396

397
	/*
398 399
	 * Every sampling_rate, we check, if current idle time is less
	 * than 20% (default), then we try to increase frequency
400
	 * Every sampling_rate, we look for a the lowest
401 402
	 * frequency which can sustain the load while keeping idle time over
	 * 30%. If such a frequency exist, we try to decrease to this frequency.
L
Linus Torvalds 已提交
403
	 *
404 405 406
	 * Any frequency increase takes it to the maximum frequency.
	 * Frequency reduction happens at minimum steps of
	 * 5% (default) of current frequency
L
Linus Torvalds 已提交
407 408
	 */

409 410 411
	/* Get Absolute Load - in terms of freq */
	max_load_freq = 0;

412
	for_each_cpu(j, policy->cpus) {
L
Linus Torvalds 已提交
413
		struct cpu_dbs_info_s *j_dbs_info;
414 415 416 417
		cputime64_t cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
		unsigned int load, load_freq;
		int freq_avg;
L
Linus Torvalds 已提交
418 419

		j_dbs_info = &per_cpu(cpu_dbs_info, j);
420 421 422

		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

423 424 425 426 427
		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
				j_dbs_info->prev_cpu_wall);
		j_dbs_info->prev_cpu_wall = cur_wall_time;

		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
428
				j_dbs_info->prev_cpu_idle);
429
		j_dbs_info->prev_cpu_idle = cur_idle_time;
L
Linus Torvalds 已提交
430

431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
		if (dbs_tuners_ins.ignore_nice) {
			cputime64_t cur_nice;
			unsigned long cur_nice_jiffies;

			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
					 j_dbs_info->prev_cpu_nice);
			/*
			 * Assumption: nice time between sampling periods will
			 * be less than 2^32 jiffies for 32 bit sys
			 */
			cur_nice_jiffies = (unsigned long)
					cputime64_to_jiffies64(cur_nice);

			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			idle_time += jiffies_to_usecs(cur_nice_jiffies);
		}

448
		if (unlikely(!wall_time || wall_time < idle_time))
449 450 451 452 453 454 455 456 457 458 459
			continue;

		load = 100 * (wall_time - idle_time) / wall_time;

		freq_avg = __cpufreq_driver_getavg(policy, j);
		if (freq_avg <= 0)
			freq_avg = policy->cur;

		load_freq = load * freq_avg;
		if (load_freq > max_load_freq)
			max_load_freq = load_freq;
L
Linus Torvalds 已提交
460 461
	}

462
	/* Check for frequency increase */
463
	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
464
		/* if we are already at full speed then break out early */
465 466 467 468 469 470 471 472 473 474 475 476
		if (!dbs_tuners_ins.powersave_bias) {
			if (policy->cur == policy->max)
				return;

			__cpufreq_driver_target(policy, policy->max,
				CPUFREQ_RELATION_H);
		} else {
			int freq = powersave_bias_target(policy, policy->max,
					CPUFREQ_RELATION_H);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
L
Linus Torvalds 已提交
477 478 479 480
		return;
	}

	/* Check for frequency decrease */
481 482 483
	/* if we cannot reduce the frequency anymore, break out early */
	if (policy->cur == policy->min)
		return;
L
Linus Torvalds 已提交
484

485 486 487 488 489
	/*
	 * The optimal frequency is the frequency that is the lowest that
	 * can support the current CPU usage without triggering the up
	 * policy. To be safe, we focus 10 points under the threshold.
	 */
490 491 492
	if (max_load_freq <
	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
	     policy->cur) {
493
		unsigned int freq_next;
494 495 496
		freq_next = max_load_freq /
				(dbs_tuners_ins.up_threshold -
				 dbs_tuners_ins.down_differential);
497

498 499 500 501 502 503 504 505 506
		if (!dbs_tuners_ins.powersave_bias) {
			__cpufreq_driver_target(policy, freq_next,
					CPUFREQ_RELATION_L);
		} else {
			int freq = powersave_bias_target(policy, freq_next,
					CPUFREQ_RELATION_L);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
507
	}
L
Linus Torvalds 已提交
508 509
}

D
David Howells 已提交
510
static void do_dbs_timer(struct work_struct *work)
511
{
512 513 514 515 516
	struct cpu_dbs_info_s *dbs_info =
		container_of(work, struct cpu_dbs_info_s, work.work);
	unsigned int cpu = dbs_info->cpu;
	int sample_type = dbs_info->sample_type;

517 518
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
D
David Howells 已提交
519

520
	delay -= jiffies % delay;
521

522
	if (lock_policy_rwsem_write(cpu) < 0)
523
		return;
524 525 526 527 528 529

	if (!dbs_info->enable) {
		unlock_policy_rwsem_write(cpu);
		return;
	}

530
	/* Common NORMAL_SAMPLE setup */
D
David Howells 已提交
531
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
532
	if (!dbs_tuners_ins.powersave_bias ||
D
David Howells 已提交
533
	    sample_type == DBS_NORMAL_SAMPLE) {
534 535 536
		dbs_check_cpu(dbs_info);
		if (dbs_info->freq_lo) {
			/* Setup timer for SUB_SAMPLE */
D
David Howells 已提交
537
			dbs_info->sample_type = DBS_SUB_SAMPLE;
538 539 540 541
			delay = dbs_info->freq_hi_jiffies;
		}
	} else {
		__cpufreq_driver_target(dbs_info->cur_policy,
542
			dbs_info->freq_lo, CPUFREQ_RELATION_H);
543
	}
544
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
545
	unlock_policy_rwsem_write(cpu);
546
}
L
Linus Torvalds 已提交
547

548
static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
549
{
550 551 552
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	delay -= jiffies % delay;
553

D
Dave Jones 已提交
554
	dbs_info->enable = 1;
555
	ondemand_powersave_bias_init();
D
David Howells 已提交
556
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
557
	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
558
	queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
559
		delay);
L
Linus Torvalds 已提交
560 561
}

562
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
563
{
564 565
	dbs_info->enable = 0;
	cancel_delayed_work(&dbs_info->work);
L
Linus Torvalds 已提交
566 567 568 569 570 571 572 573
}

static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				   unsigned int event)
{
	unsigned int cpu = policy->cpu;
	struct cpu_dbs_info_s *this_dbs_info;
	unsigned int j;
J
Jeff Garzik 已提交
574
	int rc;
L
Linus Torvalds 已提交
575 576 577 578 579

	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);

	switch (event) {
	case CPUFREQ_GOV_START:
580
		if ((!cpu_online(cpu)) || (!policy->cur))
L
Linus Torvalds 已提交
581 582 583 584
			return -EINVAL;

		if (this_dbs_info->enable) /* Already enabled */
			break;
585

586
		mutex_lock(&dbs_mutex);
587
		dbs_enable++;
J
Jeff Garzik 已提交
588 589 590 591 592 593 594 595

		rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
		if (rc) {
			dbs_enable--;
			mutex_unlock(&dbs_mutex);
			return rc;
		}

596
		for_each_cpu(j, policy->cpus) {
L
Linus Torvalds 已提交
597 598 599
			struct cpu_dbs_info_s *j_dbs_info;
			j_dbs_info = &per_cpu(cpu_dbs_info, j);
			j_dbs_info->cur_policy = policy;
600

601 602
			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&j_dbs_info->prev_cpu_wall);
603 604 605 606
			if (dbs_tuners_ins.ignore_nice) {
				j_dbs_info->prev_cpu_nice =
						kstat_cpu(j).cpustat.nice;
			}
L
Linus Torvalds 已提交
607
		}
608
		this_dbs_info->cpu = cpu;
L
Linus Torvalds 已提交
609 610 611 612 613 614 615
		/*
		 * Start the timerschedule work, when this governor
		 * is used for first time
		 */
		if (dbs_enable == 1) {
			unsigned int latency;
			/* policy latency is in nS. Convert it to uS first */
616 617 618
			latency = policy->cpuinfo.transition_latency / 1000;
			if (latency == 0)
				latency = 1;
L
Linus Torvalds 已提交
619

620 621 622
			def_sampling_rate =
				max(latency * LATENCY_MULTIPLIER,
				    MIN_STAT_SAMPLING_RATE);
623

L
Linus Torvalds 已提交
624 625
			dbs_tuners_ins.sampling_rate = def_sampling_rate;
		}
626
		dbs_timer_init(this_dbs_info);
627

628
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
629 630 631
		break;

	case CPUFREQ_GOV_STOP:
632
		mutex_lock(&dbs_mutex);
633
		dbs_timer_exit(this_dbs_info);
L
Linus Torvalds 已提交
634 635
		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
		dbs_enable--;
636
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
637 638 639 640

		break;

	case CPUFREQ_GOV_LIMITS:
641
		mutex_lock(&dbs_mutex);
L
Linus Torvalds 已提交
642
		if (policy->max < this_dbs_info->cur_policy->cur)
643
			__cpufreq_driver_target(this_dbs_info->cur_policy,
644
				policy->max, CPUFREQ_RELATION_H);
L
Linus Torvalds 已提交
645
		else if (policy->min > this_dbs_info->cur_policy->cur)
646
			__cpufreq_driver_target(this_dbs_info->cur_policy,
647
				policy->min, CPUFREQ_RELATION_L);
648
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
649 650 651 652 653
		break;
	}
	return 0;
}

654 655 656
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
static
#endif
657 658 659 660 661
struct cpufreq_governor cpufreq_gov_ondemand = {
	.name			= "ondemand",
	.governor		= cpufreq_governor_dbs,
	.max_transition_latency = TRANSITION_LATENCY_LIMIT,
	.owner			= THIS_MODULE,
L
Linus Torvalds 已提交
662 663 664 665
};

static int __init cpufreq_gov_dbs_init(void)
{
666
	int err;
667
	cputime64_t wall;
668 669
	u64 idle_time;
	int cpu = get_cpu();
670

671 672
	idle_time = get_cpu_idle_time_us(cpu, &wall);
	put_cpu();
673 674 675 676 677 678
	if (idle_time != -1ULL) {
		/* Idle micro accounting is supported. Use finer thresholds */
		dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
		dbs_tuners_ins.down_differential =
					MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
	}
679

680 681 682 683 684
	kondemand_wq = create_workqueue("kondemand");
	if (!kondemand_wq) {
		printk(KERN_ERR "Creation of kondemand failed\n");
		return -EFAULT;
	}
685 686 687 688 689
	err = cpufreq_register_governor(&cpufreq_gov_ondemand);
	if (err)
		destroy_workqueue(kondemand_wq);

	return err;
L
Linus Torvalds 已提交
690 691 692 693
}

static void __exit cpufreq_gov_dbs_exit(void)
{
694
	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
695
	destroy_workqueue(kondemand_wq);
L
Linus Torvalds 已提交
696 697 698
}


699 700 701
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
702
	"Low Latency Frequency Transition capable processors");
703
MODULE_LICENSE("GPL");
L
Linus Torvalds 已提交
704

705 706 707
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
fs_initcall(cpufreq_gov_dbs_init);
#else
L
Linus Torvalds 已提交
708
module_init(cpufreq_gov_dbs_init);
709
#endif
L
Linus Torvalds 已提交
710
module_exit(cpufreq_gov_dbs_exit);