cpufreq_ondemand.c 21.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  drivers/cpufreq/cpufreq_ondemand.c
 *
 *  Copyright (C)  2001 Russell King
 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *                      Jun Nakajima <jun.nakajima@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
A
Andrew Morton 已提交
17
#include <linux/cpu.h>
L
Linus Torvalds 已提交
18 19
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
20
#include <linux/mutex.h>
21 22 23
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/ktime.h>
24
#include <linux/sched.h>
L
Linus Torvalds 已提交
25 26 27 28 29 30

/*
 * dbs is used in this file as a shortform for demandbased switching
 * It helps to keep variable names smaller, simpler
 */

31
#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
L
Linus Torvalds 已提交
32
#define DEF_FREQUENCY_UP_THRESHOLD		(80)
33 34
#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
35
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
36
#define MIN_FREQUENCY_UP_THRESHOLD		(11)
L
Linus Torvalds 已提交
37 38
#define MAX_FREQUENCY_UP_THRESHOLD		(100)

39 40
/*
 * The polling frequency of this governor depends on the capability of
L
Linus Torvalds 已提交
41
 * the processor. Default polling frequency is 1000 times the transition
42 43
 * latency of the processor. The governor will work on any processor with
 * transition latency <= 10mS, using appropriate sampling
L
Linus Torvalds 已提交
44 45 46 47 48
 * rate.
 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
 * this governor will not work.
 * All times here are in uS.
 */
49
#define MIN_SAMPLING_RATE_RATIO			(2)
50

51 52
static unsigned int min_sampling_rate;

53
#define LATENCY_MULTIPLIER			(1000)
54
#define MIN_LATENCY_MULTIPLIER			(100)
55
#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
L
Linus Torvalds 已提交
56

D
David Howells 已提交
57
static void do_dbs_timer(struct work_struct *work);
58 59 60 61 62 63 64 65 66 67 68 69
static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				unsigned int event);

#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
static
#endif
struct cpufreq_governor cpufreq_gov_ondemand = {
       .name                   = "ondemand",
       .governor               = cpufreq_governor_dbs,
       .max_transition_latency = TRANSITION_LATENCY_LIMIT,
       .owner                  = THIS_MODULE,
};
D
David Howells 已提交
70 71

/* Sampling types */
72
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
L
Linus Torvalds 已提交
73 74

struct cpu_dbs_info_s {
75 76
	cputime64_t prev_cpu_idle;
	cputime64_t prev_cpu_wall;
77
	cputime64_t prev_cpu_nice;
78
	struct cpufreq_policy *cur_policy;
79
	struct delayed_work work;
80 81 82 83
	struct cpufreq_frequency_table *freq_table;
	unsigned int freq_lo;
	unsigned int freq_lo_jiffies;
	unsigned int freq_hi_jiffies;
84
	int cpu;
85 86 87 88 89 90 91
	unsigned int sample_type:1;
	/*
	 * percpu mutex that serializes governor limit change with
	 * do_dbs_timer invocation. We do not want do_dbs_timer to run
	 * when user is changing the governor or limits.
	 */
	struct mutex timer_mutex;
L
Linus Torvalds 已提交
92
};
93
static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
L
Linus Torvalds 已提交
94 95 96

static unsigned int dbs_enable;	/* number of CPUs using this policy */

97
/*
98
 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
99
 * different CPUs. It protects dbs_enable in governor start/stop.
100
 */
101
static DEFINE_MUTEX(dbs_mutex);
L
Linus Torvalds 已提交
102

103
static struct workqueue_struct	*kondemand_wq;
104

105
static struct dbs_tuners {
106 107
	unsigned int sampling_rate;
	unsigned int up_threshold;
108
	unsigned int down_differential;
109
	unsigned int ignore_nice;
110 111
	unsigned int powersave_bias;
} dbs_tuners_ins = {
112
	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
113
	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
114
	.ignore_nice = 0,
115
	.powersave_bias = 0,
L
Linus Torvalds 已提交
116 117
};

118 119
static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
							cputime64_t *wall)
120
{
121
	cputime64_t idle_time;
122
	cputime64_t cur_wall_time;
123
	cputime64_t busy_time;
124

125
	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
126 127
	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
			kstat_cpu(cpu).cpustat.system);
128

129 130 131
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
132
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
133

134 135
	idle_time = cputime64_sub(cur_wall_time, busy_time);
	if (wall)
136
		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
137

138
	return (cputime64_t)jiffies_to_usecs(idle_time);
139 140
}

141 142 143 144 145 146 147 148 149 150
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
{
	u64 idle_time = get_cpu_idle_time_us(cpu, wall);

	if (idle_time == -1ULL)
		return get_cpu_idle_time_jiffy(cpu, wall);

	return idle_time;
}

151 152 153 154 155
/*
 * Find right freq to be set now with powersave_bias on.
 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 */
156 157 158
static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
					  unsigned int freq_next,
					  unsigned int relation)
159 160 161 162 163
{
	unsigned int freq_req, freq_reduc, freq_avg;
	unsigned int freq_hi, freq_lo;
	unsigned int index = 0;
	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
164 165
	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
						   policy->cpu);
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

	if (!dbs_info->freq_table) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_next;
	}

	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
			relation, &index);
	freq_req = dbs_info->freq_table[index].frequency;
	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
	freq_avg = freq_req - freq_reduc;

	/* Find freq bounds for freq_avg in freq_table */
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_H, &index);
	freq_lo = dbs_info->freq_table[index].frequency;
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_L, &index);
	freq_hi = dbs_info->freq_table[index].frequency;

	/* Find out how long we have to be in hi and lo freqs */
	if (freq_hi == freq_lo) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_lo;
	}
	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
	jiffies_hi += ((freq_hi - freq_lo) / 2);
	jiffies_hi /= (freq_hi - freq_lo);
	jiffies_lo = jiffies_total - jiffies_hi;
	dbs_info->freq_lo = freq_lo;
	dbs_info->freq_lo_jiffies = jiffies_lo;
	dbs_info->freq_hi_jiffies = jiffies_hi;
	return freq_hi;
}

206 207
static void ondemand_powersave_bias_init_cpu(int cpu)
{
208
	struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
209 210 211 212
	dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
	dbs_info->freq_lo = 0;
}

213 214 215 216
static void ondemand_powersave_bias_init(void)
{
	int i;
	for_each_online_cpu(i) {
217
		ondemand_powersave_bias_init_cpu(i);
218 219 220
	}
}

L
Linus Torvalds 已提交
221
/************************** sysfs interface ************************/
222 223 224

static ssize_t show_sampling_rate_max(struct kobject *kobj,
				      struct attribute *attr, char *buf)
L
Linus Torvalds 已提交
225
{
226 227
	printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
	       "sysfs file is deprecated - used by: %s\n", current->comm);
228
	return sprintf(buf, "%u\n", -1U);
L
Linus Torvalds 已提交
229 230
}

231 232
static ssize_t show_sampling_rate_min(struct kobject *kobj,
				      struct attribute *attr, char *buf)
L
Linus Torvalds 已提交
233
{
234
	return sprintf(buf, "%u\n", min_sampling_rate);
L
Linus Torvalds 已提交
235 236
}

237
#define define_one_ro(_name)		\
238
static struct global_attr _name =	\
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246
__ATTR(_name, 0444, show_##_name, NULL)

define_one_ro(sampling_rate_max);
define_one_ro(sampling_rate_min);

/* cpufreq_ondemand Governor Tunables */
#define show_one(file_name, object)					\
static ssize_t show_##file_name						\
247
(struct kobject *kobj, struct attribute *attr, char *buf)              \
L
Linus Torvalds 已提交
248 249 250 251 252
{									\
	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
}
show_one(sampling_rate, sampling_rate);
show_one(up_threshold, up_threshold);
253
show_one(ignore_nice_load, ignore_nice);
254
show_one(powersave_bias, powersave_bias);
L
Linus Torvalds 已提交
255

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
/*** delete after deprecation time ***/

#define DEPRECATION_MSG(file_name)					\
	printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "	\
		    "interface is deprecated - " #file_name "\n");

#define show_one_old(file_name)						\
static ssize_t show_##file_name##_old					\
(struct cpufreq_policy *unused, char *buf)				\
{									\
	printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "	\
		    "interface is deprecated - " #file_name "\n");	\
	return show_##file_name(NULL, NULL, buf);			\
}
show_one_old(sampling_rate);
show_one_old(up_threshold);
show_one_old(ignore_nice_load);
show_one_old(powersave_bias);
show_one_old(sampling_rate_min);
show_one_old(sampling_rate_max);

#define define_one_ro_old(object, _name)       \
static struct freq_attr object =               \
__ATTR(_name, 0444, show_##_name##_old, NULL)

define_one_ro_old(sampling_rate_min_old, sampling_rate_min);
define_one_ro_old(sampling_rate_max_old, sampling_rate_max);

/*** delete after deprecation time ***/

static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
				   const char *buf, size_t count)
L
Linus Torvalds 已提交
288 289 290
{
	unsigned int input;
	int ret;
291
	ret = sscanf(buf, "%u", &input);
292 293
	if (ret != 1)
		return -EINVAL;
L
Linus Torvalds 已提交
294

295
	mutex_lock(&dbs_mutex);
296
	dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
297
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
298 299 300 301

	return count;
}

302 303
static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
				  const char *buf, size_t count)
L
Linus Torvalds 已提交
304 305 306
{
	unsigned int input;
	int ret;
307
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
308

309
	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
310
			input < MIN_FREQUENCY_UP_THRESHOLD) {
L
Linus Torvalds 已提交
311 312 313
		return -EINVAL;
	}

314
	mutex_lock(&dbs_mutex);
L
Linus Torvalds 已提交
315
	dbs_tuners_ins.up_threshold = input;
316
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
317 318 319 320

	return count;
}

321 322
static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
				      const char *buf, size_t count)
323 324 325 326 327
{
	unsigned int input;
	int ret;

	unsigned int j;
328

329
	ret = sscanf(buf, "%u", &input);
330
	if (ret != 1)
331 332
		return -EINVAL;

333
	if (input > 1)
334
		input = 1;
335

336
	mutex_lock(&dbs_mutex);
337
	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
338
		mutex_unlock(&dbs_mutex);
339 340 341 342
		return count;
	}
	dbs_tuners_ins.ignore_nice = input;

343
	/* we need to re-evaluate prev_cpu_idle */
344
	for_each_online_cpu(j) {
345
		struct cpu_dbs_info_s *dbs_info;
346
		dbs_info = &per_cpu(od_cpu_dbs_info, j);
347 348
		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&dbs_info->prev_cpu_wall);
349 350 351
		if (dbs_tuners_ins.ignore_nice)
			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;

352
	}
353
	mutex_unlock(&dbs_mutex);
354 355 356 357

	return count;
}

358 359
static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
				    const char *buf, size_t count)
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
{
	unsigned int input;
	int ret;
	ret = sscanf(buf, "%u", &input);

	if (ret != 1)
		return -EINVAL;

	if (input > 1000)
		input = 1000;

	mutex_lock(&dbs_mutex);
	dbs_tuners_ins.powersave_bias = input;
	ondemand_powersave_bias_init();
	mutex_unlock(&dbs_mutex);

	return count;
}

L
Linus Torvalds 已提交
379
#define define_one_rw(_name) \
380
static struct global_attr _name = \
L
Linus Torvalds 已提交
381 382 383 384
__ATTR(_name, 0644, show_##_name, store_##_name)

define_one_rw(sampling_rate);
define_one_rw(up_threshold);
385
define_one_rw(ignore_nice_load);
386
define_one_rw(powersave_bias);
L
Linus Torvalds 已提交
387

388
static struct attribute *dbs_attributes[] = {
L
Linus Torvalds 已提交
389 390 391 392
	&sampling_rate_max.attr,
	&sampling_rate_min.attr,
	&sampling_rate.attr,
	&up_threshold.attr,
393
	&ignore_nice_load.attr,
394
	&powersave_bias.attr,
L
Linus Torvalds 已提交
395 396 397 398 399 400 401 402
	NULL
};

static struct attribute_group dbs_attr_group = {
	.attrs = dbs_attributes,
	.name = "ondemand",
};

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
/*** delete after deprecation time ***/

#define write_one_old(file_name)					\
static ssize_t store_##file_name##_old					\
(struct cpufreq_policy *unused, const char *buf, size_t count)		\
{									\
       printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "	\
		   "interface is deprecated - " #file_name "\n");	\
       return store_##file_name(NULL, NULL, buf, count);		\
}
write_one_old(sampling_rate);
write_one_old(up_threshold);
write_one_old(ignore_nice_load);
write_one_old(powersave_bias);

#define define_one_rw_old(object, _name)       \
static struct freq_attr object =               \
__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)

define_one_rw_old(sampling_rate_old, sampling_rate);
define_one_rw_old(up_threshold_old, up_threshold);
define_one_rw_old(ignore_nice_load_old, ignore_nice_load);
define_one_rw_old(powersave_bias_old, powersave_bias);

static struct attribute *dbs_attributes_old[] = {
       &sampling_rate_max_old.attr,
       &sampling_rate_min_old.attr,
       &sampling_rate_old.attr,
       &up_threshold_old.attr,
       &ignore_nice_load_old.attr,
       &powersave_bias_old.attr,
       NULL
};

static struct attribute_group dbs_attr_group_old = {
       .attrs = dbs_attributes_old,
       .name = "ondemand",
};

/*** delete after deprecation time ***/

L
Linus Torvalds 已提交
444 445
/************************** sysfs end ************************/

446
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
L
Linus Torvalds 已提交
447
{
448
	unsigned int max_load_freq;
L
Linus Torvalds 已提交
449 450 451 452

	struct cpufreq_policy *policy;
	unsigned int j;

453
	this_dbs_info->freq_lo = 0;
L
Linus Torvalds 已提交
454
	policy = this_dbs_info->cur_policy;
455

456
	/*
457 458
	 * Every sampling_rate, we check, if current idle time is less
	 * than 20% (default), then we try to increase frequency
459
	 * Every sampling_rate, we look for a the lowest
460 461
	 * frequency which can sustain the load while keeping idle time over
	 * 30%. If such a frequency exist, we try to decrease to this frequency.
L
Linus Torvalds 已提交
462
	 *
463 464 465
	 * Any frequency increase takes it to the maximum frequency.
	 * Frequency reduction happens at minimum steps of
	 * 5% (default) of current frequency
L
Linus Torvalds 已提交
466 467
	 */

468 469 470
	/* Get Absolute Load - in terms of freq */
	max_load_freq = 0;

471
	for_each_cpu(j, policy->cpus) {
L
Linus Torvalds 已提交
472
		struct cpu_dbs_info_s *j_dbs_info;
473 474 475 476
		cputime64_t cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
		unsigned int load, load_freq;
		int freq_avg;
L
Linus Torvalds 已提交
477

478
		j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
479 480 481

		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

482 483 484 485 486
		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
				j_dbs_info->prev_cpu_wall);
		j_dbs_info->prev_cpu_wall = cur_wall_time;

		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
487
				j_dbs_info->prev_cpu_idle);
488
		j_dbs_info->prev_cpu_idle = cur_idle_time;
L
Linus Torvalds 已提交
489

490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
		if (dbs_tuners_ins.ignore_nice) {
			cputime64_t cur_nice;
			unsigned long cur_nice_jiffies;

			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
					 j_dbs_info->prev_cpu_nice);
			/*
			 * Assumption: nice time between sampling periods will
			 * be less than 2^32 jiffies for 32 bit sys
			 */
			cur_nice_jiffies = (unsigned long)
					cputime64_to_jiffies64(cur_nice);

			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			idle_time += jiffies_to_usecs(cur_nice_jiffies);
		}

507
		if (unlikely(!wall_time || wall_time < idle_time))
508 509 510 511 512 513 514 515 516 517 518
			continue;

		load = 100 * (wall_time - idle_time) / wall_time;

		freq_avg = __cpufreq_driver_getavg(policy, j);
		if (freq_avg <= 0)
			freq_avg = policy->cur;

		load_freq = load * freq_avg;
		if (load_freq > max_load_freq)
			max_load_freq = load_freq;
L
Linus Torvalds 已提交
519 520
	}

521
	/* Check for frequency increase */
522
	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
523
		/* if we are already at full speed then break out early */
524 525 526 527 528 529 530 531 532 533 534 535
		if (!dbs_tuners_ins.powersave_bias) {
			if (policy->cur == policy->max)
				return;

			__cpufreq_driver_target(policy, policy->max,
				CPUFREQ_RELATION_H);
		} else {
			int freq = powersave_bias_target(policy, policy->max,
					CPUFREQ_RELATION_H);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
L
Linus Torvalds 已提交
536 537 538 539
		return;
	}

	/* Check for frequency decrease */
540 541 542
	/* if we cannot reduce the frequency anymore, break out early */
	if (policy->cur == policy->min)
		return;
L
Linus Torvalds 已提交
543

544 545 546 547 548
	/*
	 * The optimal frequency is the frequency that is the lowest that
	 * can support the current CPU usage without triggering the up
	 * policy. To be safe, we focus 10 points under the threshold.
	 */
549 550 551
	if (max_load_freq <
	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
	     policy->cur) {
552
		unsigned int freq_next;
553 554 555
		freq_next = max_load_freq /
				(dbs_tuners_ins.up_threshold -
				 dbs_tuners_ins.down_differential);
556

557 558 559 560 561 562 563 564 565
		if (!dbs_tuners_ins.powersave_bias) {
			__cpufreq_driver_target(policy, freq_next,
					CPUFREQ_RELATION_L);
		} else {
			int freq = powersave_bias_target(policy, freq_next,
					CPUFREQ_RELATION_L);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
566
	}
L
Linus Torvalds 已提交
567 568
}

D
David Howells 已提交
569
static void do_dbs_timer(struct work_struct *work)
570
{
571 572 573 574 575
	struct cpu_dbs_info_s *dbs_info =
		container_of(work, struct cpu_dbs_info_s, work.work);
	unsigned int cpu = dbs_info->cpu;
	int sample_type = dbs_info->sample_type;

576 577
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
D
David Howells 已提交
578

579
	delay -= jiffies % delay;
580
	mutex_lock(&dbs_info->timer_mutex);
581

582
	/* Common NORMAL_SAMPLE setup */
D
David Howells 已提交
583
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
584
	if (!dbs_tuners_ins.powersave_bias ||
D
David Howells 已提交
585
	    sample_type == DBS_NORMAL_SAMPLE) {
586 587 588
		dbs_check_cpu(dbs_info);
		if (dbs_info->freq_lo) {
			/* Setup timer for SUB_SAMPLE */
D
David Howells 已提交
589
			dbs_info->sample_type = DBS_SUB_SAMPLE;
590 591 592 593
			delay = dbs_info->freq_hi_jiffies;
		}
	} else {
		__cpufreq_driver_target(dbs_info->cur_policy,
594
			dbs_info->freq_lo, CPUFREQ_RELATION_H);
595
	}
596
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
597
	mutex_unlock(&dbs_info->timer_mutex);
598
}
L
Linus Torvalds 已提交
599

600
static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
601
{
602 603 604
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	delay -= jiffies % delay;
605

D
David Howells 已提交
606
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
607
	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
608
	queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
609
		delay);
L
Linus Torvalds 已提交
610 611
}

612
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
613
{
614
	cancel_delayed_work_sync(&dbs_info->work);
L
Linus Torvalds 已提交
615 616 617 618 619 620 621 622
}

static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				   unsigned int event)
{
	unsigned int cpu = policy->cpu;
	struct cpu_dbs_info_s *this_dbs_info;
	unsigned int j;
J
Jeff Garzik 已提交
623
	int rc;
L
Linus Torvalds 已提交
624

625
	this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
L
Linus Torvalds 已提交
626 627 628

	switch (event) {
	case CPUFREQ_GOV_START:
629
		if ((!cpu_online(cpu)) || (!policy->cur))
L
Linus Torvalds 已提交
630 631
			return -EINVAL;

632
		mutex_lock(&dbs_mutex);
J
Jeff Garzik 已提交
633

634
		rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old);
J
Jeff Garzik 已提交
635 636 637 638 639
		if (rc) {
			mutex_unlock(&dbs_mutex);
			return rc;
		}

640
		dbs_enable++;
641
		for_each_cpu(j, policy->cpus) {
L
Linus Torvalds 已提交
642
			struct cpu_dbs_info_s *j_dbs_info;
643
			j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
L
Linus Torvalds 已提交
644
			j_dbs_info->cur_policy = policy;
645

646 647
			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&j_dbs_info->prev_cpu_wall);
648 649 650 651
			if (dbs_tuners_ins.ignore_nice) {
				j_dbs_info->prev_cpu_nice =
						kstat_cpu(j).cpustat.nice;
			}
L
Linus Torvalds 已提交
652
		}
653
		this_dbs_info->cpu = cpu;
654
		ondemand_powersave_bias_init_cpu(cpu);
L
Linus Torvalds 已提交
655 656 657 658 659 660
		/*
		 * Start the timerschedule work, when this governor
		 * is used for first time
		 */
		if (dbs_enable == 1) {
			unsigned int latency;
661 662 663 664 665 666 667 668

			rc = sysfs_create_group(cpufreq_global_kobject,
						&dbs_attr_group);
			if (rc) {
				mutex_unlock(&dbs_mutex);
				return rc;
			}

L
Linus Torvalds 已提交
669
			/* policy latency is in nS. Convert it to uS first */
670 671 672
			latency = policy->cpuinfo.transition_latency / 1000;
			if (latency == 0)
				latency = 1;
673 674 675 676 677 678
			/* Bring kernel and HW constraints together */
			min_sampling_rate = max(min_sampling_rate,
					MIN_LATENCY_MULTIPLIER * latency);
			dbs_tuners_ins.sampling_rate =
				max(min_sampling_rate,
				    latency * LATENCY_MULTIPLIER);
L
Linus Torvalds 已提交
679
		}
680
		mutex_unlock(&dbs_mutex);
681

682
		mutex_init(&this_dbs_info->timer_mutex);
683
		dbs_timer_init(this_dbs_info);
L
Linus Torvalds 已提交
684 685 686
		break;

	case CPUFREQ_GOV_STOP:
687
		dbs_timer_exit(this_dbs_info);
688 689

		mutex_lock(&dbs_mutex);
690
		sysfs_remove_group(&policy->kobj, &dbs_attr_group_old);
691
		mutex_destroy(&this_dbs_info->timer_mutex);
L
Linus Torvalds 已提交
692
		dbs_enable--;
693
		mutex_unlock(&dbs_mutex);
694 695 696
		if (!dbs_enable)
			sysfs_remove_group(cpufreq_global_kobject,
					   &dbs_attr_group);
L
Linus Torvalds 已提交
697 698 699 700

		break;

	case CPUFREQ_GOV_LIMITS:
701
		mutex_lock(&this_dbs_info->timer_mutex);
L
Linus Torvalds 已提交
702
		if (policy->max < this_dbs_info->cur_policy->cur)
703
			__cpufreq_driver_target(this_dbs_info->cur_policy,
704
				policy->max, CPUFREQ_RELATION_H);
L
Linus Torvalds 已提交
705
		else if (policy->min > this_dbs_info->cur_policy->cur)
706
			__cpufreq_driver_target(this_dbs_info->cur_policy,
707
				policy->min, CPUFREQ_RELATION_L);
708
		mutex_unlock(&this_dbs_info->timer_mutex);
L
Linus Torvalds 已提交
709 710 711 712 713 714 715
		break;
	}
	return 0;
}

static int __init cpufreq_gov_dbs_init(void)
{
716
	int err;
717
	cputime64_t wall;
718 719
	u64 idle_time;
	int cpu = get_cpu();
720

721 722
	idle_time = get_cpu_idle_time_us(cpu, &wall);
	put_cpu();
723 724 725 726 727
	if (idle_time != -1ULL) {
		/* Idle micro accounting is supported. Use finer thresholds */
		dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
		dbs_tuners_ins.down_differential =
					MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
728 729 730 731 732 733 734 735 736 737
		/*
		 * In no_hz/micro accounting case we set the minimum frequency
		 * not depending on HZ, but fixed (very low). The deferred
		 * timer might skip some samples if idle/sleeping as needed.
		*/
		min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
	} else {
		/* For correct statistics, we need 10 ticks for each measure */
		min_sampling_rate =
			MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
738
	}
739

740 741 742 743 744
	kondemand_wq = create_workqueue("kondemand");
	if (!kondemand_wq) {
		printk(KERN_ERR "Creation of kondemand failed\n");
		return -EFAULT;
	}
745 746 747 748 749
	err = cpufreq_register_governor(&cpufreq_gov_ondemand);
	if (err)
		destroy_workqueue(kondemand_wq);

	return err;
L
Linus Torvalds 已提交
750 751 752 753
}

static void __exit cpufreq_gov_dbs_exit(void)
{
754
	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
755
	destroy_workqueue(kondemand_wq);
L
Linus Torvalds 已提交
756 757 758
}


759 760 761
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
762
	"Low Latency Frequency Transition capable processors");
763
MODULE_LICENSE("GPL");
L
Linus Torvalds 已提交
764

765 766 767
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
fs_initcall(cpufreq_gov_dbs_init);
#else
L
Linus Torvalds 已提交
768
module_init(cpufreq_gov_dbs_init);
769
#endif
L
Linus Torvalds 已提交
770
module_exit(cpufreq_gov_dbs_exit);