cpufreq_ondemand.c 17.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  drivers/cpufreq/cpufreq_ondemand.c
 *
 *  Copyright (C)  2001 Russell King
 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *                      Jun Nakajima <jun.nakajima@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
A
Andrew Morton 已提交
17
#include <linux/cpu.h>
L
Linus Torvalds 已提交
18 19
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
20
#include <linux/mutex.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26

/*
 * dbs is used in this file as a shortform for demandbased switching
 * It helps to keep variable names smaller, simpler
 */

27
#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
L
Linus Torvalds 已提交
28
#define DEF_FREQUENCY_UP_THRESHOLD		(80)
29
#define MIN_FREQUENCY_UP_THRESHOLD		(11)
L
Linus Torvalds 已提交
30 31
#define MAX_FREQUENCY_UP_THRESHOLD		(100)

32 33
/*
 * The polling frequency of this governor depends on the capability of
L
Linus Torvalds 已提交
34
 * the processor. Default polling frequency is 1000 times the transition
35 36
 * latency of the processor. The governor will work on any processor with
 * transition latency <= 10mS, using appropriate sampling
L
Linus Torvalds 已提交
37 38 39 40 41
 * rate.
 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
 * this governor will not work.
 * All times here are in uS.
 */
42
static unsigned int def_sampling_rate;
43 44
#define MIN_SAMPLING_RATE_RATIO			(2)
/* for correct statistics, we need at least 10 ticks between each measure */
45 46 47 48
#define MIN_STAT_SAMPLING_RATE 			\
			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
#define MIN_SAMPLING_RATE			\
			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
L
Linus Torvalds 已提交
49 50
#define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
51
#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
L
Linus Torvalds 已提交
52

D
David Howells 已提交
53 54 55
static void do_dbs_timer(struct work_struct *work);

/* Sampling types */
56
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
L
Linus Torvalds 已提交
57 58

struct cpu_dbs_info_s {
59 60
	cputime64_t prev_cpu_idle;
	cputime64_t prev_cpu_wall;
61
	struct cpufreq_policy *cur_policy;
D
David Howells 已提交
62
 	struct delayed_work work;
63 64 65 66
	struct cpufreq_frequency_table *freq_table;
	unsigned int freq_lo;
	unsigned int freq_lo_jiffies;
	unsigned int freq_hi_jiffies;
67 68 69
	int cpu;
	unsigned int enable:1,
	             sample_type:1;
L
Linus Torvalds 已提交
70 71 72 73 74
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);

static unsigned int dbs_enable;	/* number of CPUs using this policy */

75 76 77 78 79 80 81 82
/*
 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
 * lock and dbs_mutex. cpu_hotplug lock should always be held before
 * dbs_mutex. If any function that can potentially take cpu_hotplug lock
 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
 * is recursive for the same process. -Venki
 */
83
static DEFINE_MUTEX(dbs_mutex);
L
Linus Torvalds 已提交
84

85
static struct workqueue_struct	*kondemand_wq;
86

87
static struct dbs_tuners {
88 89
	unsigned int sampling_rate;
	unsigned int up_threshold;
90
	unsigned int down_differential;
91
	unsigned int ignore_nice;
92 93
	unsigned int powersave_bias;
} dbs_tuners_ins = {
94
	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
95
	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
96
	.ignore_nice = 0,
97
	.powersave_bias = 0,
L
Linus Torvalds 已提交
98 99
};

100
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
101
{
102
	cputime64_t idle_time;
103
	cputime64_t cur_wall_time;
104
	cputime64_t busy_time;
105

106
	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
107 108
	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
			kstat_cpu(cpu).cpustat.system);
109

110 111 112
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
113

114 115 116 117 118
	if (!dbs_tuners_ins.ignore_nice) {
		busy_time = cputime64_add(busy_time,
				kstat_cpu(cpu).cpustat.nice);
	}

119 120 121 122
	idle_time = cputime64_sub(cur_wall_time, busy_time);
	if (wall)
		*wall = cur_wall_time;

123
	return idle_time;
124 125
}

126 127 128 129 130
/*
 * Find right freq to be set now with powersave_bias on.
 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 */
131 132 133
static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
					  unsigned int freq_next,
					  unsigned int relation)
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
{
	unsigned int freq_req, freq_reduc, freq_avg;
	unsigned int freq_hi, freq_lo;
	unsigned int index = 0;
	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);

	if (!dbs_info->freq_table) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_next;
	}

	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
			relation, &index);
	freq_req = dbs_info->freq_table[index].frequency;
	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
	freq_avg = freq_req - freq_reduc;

	/* Find freq bounds for freq_avg in freq_table */
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_H, &index);
	freq_lo = dbs_info->freq_table[index].frequency;
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_L, &index);
	freq_hi = dbs_info->freq_table[index].frequency;

	/* Find out how long we have to be in hi and lo freqs */
	if (freq_hi == freq_lo) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_lo;
	}
	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
	jiffies_hi += ((freq_hi - freq_lo) / 2);
	jiffies_hi /= (freq_hi - freq_lo);
	jiffies_lo = jiffies_total - jiffies_hi;
	dbs_info->freq_lo = freq_lo;
	dbs_info->freq_lo_jiffies = jiffies_lo;
	dbs_info->freq_hi_jiffies = jiffies_hi;
	return freq_hi;
}

static void ondemand_powersave_bias_init(void)
{
	int i;
	for_each_online_cpu(i) {
		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
		dbs_info->freq_table = cpufreq_frequency_get_table(i);
		dbs_info->freq_lo = 0;
	}
}

L
Linus Torvalds 已提交
190 191 192 193 194 195 196 197 198 199 200
/************************** sysfs interface ************************/
static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
{
	return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
}

static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
{
	return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
}

201 202
#define define_one_ro(_name)		\
static struct freq_attr _name =		\
L
Linus Torvalds 已提交
203 204 205 206 207 208 209 210 211 212 213 214 215 216
__ATTR(_name, 0444, show_##_name, NULL)

define_one_ro(sampling_rate_max);
define_one_ro(sampling_rate_min);

/* cpufreq_ondemand Governor Tunables */
#define show_one(file_name, object)					\
static ssize_t show_##file_name						\
(struct cpufreq_policy *unused, char *buf)				\
{									\
	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
}
show_one(sampling_rate, sampling_rate);
show_one(up_threshold, up_threshold);
217
show_one(ignore_nice_load, ignore_nice);
218
show_one(powersave_bias, powersave_bias);
L
Linus Torvalds 已提交
219

220
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
221 222 223 224
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
225
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
226

227
	mutex_lock(&dbs_mutex);
228 229
	if (ret != 1 || input > MAX_SAMPLING_RATE
		     || input < MIN_SAMPLING_RATE) {
230
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
231 232 233 234
		return -EINVAL;
	}

	dbs_tuners_ins.sampling_rate = input;
235
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
236 237 238 239

	return count;
}

240
static ssize_t store_up_threshold(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
241 242 243 244
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
245
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
246

247
	mutex_lock(&dbs_mutex);
248
	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
249
			input < MIN_FREQUENCY_UP_THRESHOLD) {
250
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
251 252 253 254
		return -EINVAL;
	}

	dbs_tuners_ins.up_threshold = input;
255
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
256 257 258 259

	return count;
}

260
static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
261 262 263 264 265 266
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;

	unsigned int j;
267

268
	ret = sscanf(buf, "%u", &input);
269 270 271 272 273
	if ( ret != 1 )
		return -EINVAL;

	if ( input > 1 )
		input = 1;
274

275
	mutex_lock(&dbs_mutex);
276
	if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
277
		mutex_unlock(&dbs_mutex);
278 279 280 281
		return count;
	}
	dbs_tuners_ins.ignore_nice = input;

282
	/* we need to re-evaluate prev_cpu_idle */
283
	for_each_online_cpu(j) {
284 285
		struct cpu_dbs_info_s *dbs_info;
		dbs_info = &per_cpu(cpu_dbs_info, j);
286 287
		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&dbs_info->prev_cpu_wall);
288
	}
289
	mutex_unlock(&dbs_mutex);
290 291 292 293

	return count;
}

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
	ret = sscanf(buf, "%u", &input);

	if (ret != 1)
		return -EINVAL;

	if (input > 1000)
		input = 1000;

	mutex_lock(&dbs_mutex);
	dbs_tuners_ins.powersave_bias = input;
	ondemand_powersave_bias_init();
	mutex_unlock(&dbs_mutex);

	return count;
}

L
Linus Torvalds 已提交
315 316 317 318 319 320
#define define_one_rw(_name) \
static struct freq_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)

define_one_rw(sampling_rate);
define_one_rw(up_threshold);
321
define_one_rw(ignore_nice_load);
322
define_one_rw(powersave_bias);
L
Linus Torvalds 已提交
323 324 325 326 327 328

static struct attribute * dbs_attributes[] = {
	&sampling_rate_max.attr,
	&sampling_rate_min.attr,
	&sampling_rate.attr,
	&up_threshold.attr,
329
	&ignore_nice_load.attr,
330
	&powersave_bias.attr,
L
Linus Torvalds 已提交
331 332 333 334 335 336 337 338 339 340
	NULL
};

static struct attribute_group dbs_attr_group = {
	.attrs = dbs_attributes,
	.name = "ondemand",
};

/************************** sysfs end ************************/

341
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
L
Linus Torvalds 已提交
342
{
343
	unsigned int max_load_freq;
L
Linus Torvalds 已提交
344 345 346 347 348 349 350

	struct cpufreq_policy *policy;
	unsigned int j;

	if (!this_dbs_info->enable)
		return;

351
	this_dbs_info->freq_lo = 0;
L
Linus Torvalds 已提交
352
	policy = this_dbs_info->cur_policy;
353

354
	/*
355 356
	 * Every sampling_rate, we check, if current idle time is less
	 * than 20% (default), then we try to increase frequency
357
	 * Every sampling_rate, we look for a the lowest
358 359
	 * frequency which can sustain the load while keeping idle time over
	 * 30%. If such a frequency exist, we try to decrease to this frequency.
L
Linus Torvalds 已提交
360
	 *
361 362 363
	 * Any frequency increase takes it to the maximum frequency.
	 * Frequency reduction happens at minimum steps of
	 * 5% (default) of current frequency
L
Linus Torvalds 已提交
364 365
	 */

366 367 368
	/* Get Absolute Load - in terms of freq */
	max_load_freq = 0;

369
	for_each_cpu_mask_nr(j, policy->cpus) {
L
Linus Torvalds 已提交
370
		struct cpu_dbs_info_s *j_dbs_info;
371 372 373 374
		cputime64_t cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
		unsigned int load, load_freq;
		int freq_avg;
L
Linus Torvalds 已提交
375 376

		j_dbs_info = &per_cpu(cpu_dbs_info, j);
377 378 379

		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);

380 381 382 383 384
		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
				j_dbs_info->prev_cpu_wall);
		j_dbs_info->prev_cpu_wall = cur_wall_time;

		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
385
				j_dbs_info->prev_cpu_idle);
386
		j_dbs_info->prev_cpu_idle = cur_idle_time;
L
Linus Torvalds 已提交
387

388
		if (unlikely(!wall_time || wall_time < idle_time))
389 390 391 392 393 394 395 396 397 398 399
			continue;

		load = 100 * (wall_time - idle_time) / wall_time;

		freq_avg = __cpufreq_driver_getavg(policy, j);
		if (freq_avg <= 0)
			freq_avg = policy->cur;

		load_freq = load * freq_avg;
		if (load_freq > max_load_freq)
			max_load_freq = load_freq;
L
Linus Torvalds 已提交
400 401
	}

402
	/* Check for frequency increase */
403
	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
404
		/* if we are already at full speed then break out early */
405 406 407 408 409 410 411 412 413 414 415 416
		if (!dbs_tuners_ins.powersave_bias) {
			if (policy->cur == policy->max)
				return;

			__cpufreq_driver_target(policy, policy->max,
				CPUFREQ_RELATION_H);
		} else {
			int freq = powersave_bias_target(policy, policy->max,
					CPUFREQ_RELATION_H);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
L
Linus Torvalds 已提交
417 418 419 420
		return;
	}

	/* Check for frequency decrease */
421 422 423
	/* if we cannot reduce the frequency anymore, break out early */
	if (policy->cur == policy->min)
		return;
L
Linus Torvalds 已提交
424

425 426 427 428 429
	/*
	 * The optimal frequency is the frequency that is the lowest that
	 * can support the current CPU usage without triggering the up
	 * policy. To be safe, we focus 10 points under the threshold.
	 */
430 431 432
	if (max_load_freq <
	    (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
	     policy->cur) {
433
		unsigned int freq_next;
434 435 436
		freq_next = max_load_freq /
				(dbs_tuners_ins.up_threshold -
				 dbs_tuners_ins.down_differential);
437

438 439 440 441 442 443 444 445 446
		if (!dbs_tuners_ins.powersave_bias) {
			__cpufreq_driver_target(policy, freq_next,
					CPUFREQ_RELATION_L);
		} else {
			int freq = powersave_bias_target(policy, freq_next,
					CPUFREQ_RELATION_L);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
447
	}
L
Linus Torvalds 已提交
448 449
}

D
David Howells 已提交
450
static void do_dbs_timer(struct work_struct *work)
451
{
452 453 454 455 456
	struct cpu_dbs_info_s *dbs_info =
		container_of(work, struct cpu_dbs_info_s, work.work);
	unsigned int cpu = dbs_info->cpu;
	int sample_type = dbs_info->sample_type;

457 458
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
D
David Howells 已提交
459

460
	delay -= jiffies % delay;
461

462
	if (lock_policy_rwsem_write(cpu) < 0)
463
		return;
464 465 466 467 468 469

	if (!dbs_info->enable) {
		unlock_policy_rwsem_write(cpu);
		return;
	}

470
	/* Common NORMAL_SAMPLE setup */
D
David Howells 已提交
471
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
472
	if (!dbs_tuners_ins.powersave_bias ||
D
David Howells 已提交
473
	    sample_type == DBS_NORMAL_SAMPLE) {
474 475 476
		dbs_check_cpu(dbs_info);
		if (dbs_info->freq_lo) {
			/* Setup timer for SUB_SAMPLE */
D
David Howells 已提交
477
			dbs_info->sample_type = DBS_SUB_SAMPLE;
478 479 480 481 482 483 484
			delay = dbs_info->freq_hi_jiffies;
		}
	} else {
		__cpufreq_driver_target(dbs_info->cur_policy,
	                        	dbs_info->freq_lo,
	                        	CPUFREQ_RELATION_H);
	}
485
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
486
	unlock_policy_rwsem_write(cpu);
487
}
L
Linus Torvalds 已提交
488

489
static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
490
{
491 492 493
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	delay -= jiffies % delay;
494

D
Dave Jones 已提交
495
	dbs_info->enable = 1;
496
	ondemand_powersave_bias_init();
D
David Howells 已提交
497
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
498
	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
499 500
	queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
	                      delay);
L
Linus Torvalds 已提交
501 502
}

503
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
504
{
505 506
	dbs_info->enable = 0;
	cancel_delayed_work(&dbs_info->work);
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514
}

static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				   unsigned int event)
{
	unsigned int cpu = policy->cpu;
	struct cpu_dbs_info_s *this_dbs_info;
	unsigned int j;
J
Jeff Garzik 已提交
515
	int rc;
L
Linus Torvalds 已提交
516 517 518 519 520

	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);

	switch (event) {
	case CPUFREQ_GOV_START:
521
		if ((!cpu_online(cpu)) || (!policy->cur))
L
Linus Torvalds 已提交
522 523 524 525
			return -EINVAL;

		if (this_dbs_info->enable) /* Already enabled */
			break;
526

527
		mutex_lock(&dbs_mutex);
528
		dbs_enable++;
J
Jeff Garzik 已提交
529 530 531 532 533 534 535 536

		rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
		if (rc) {
			dbs_enable--;
			mutex_unlock(&dbs_mutex);
			return rc;
		}

537
		for_each_cpu_mask_nr(j, policy->cpus) {
L
Linus Torvalds 已提交
538 539 540
			struct cpu_dbs_info_s *j_dbs_info;
			j_dbs_info = &per_cpu(cpu_dbs_info, j);
			j_dbs_info->cur_policy = policy;
541

542 543
			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
						&j_dbs_info->prev_cpu_wall);
L
Linus Torvalds 已提交
544
		}
545
		this_dbs_info->cpu = cpu;
L
Linus Torvalds 已提交
546 547 548 549 550 551 552
		/*
		 * Start the timerschedule work, when this governor
		 * is used for first time
		 */
		if (dbs_enable == 1) {
			unsigned int latency;
			/* policy latency is in nS. Convert it to uS first */
553 554 555
			latency = policy->cpuinfo.transition_latency / 1000;
			if (latency == 0)
				latency = 1;
L
Linus Torvalds 已提交
556

557
			def_sampling_rate = latency *
L
Linus Torvalds 已提交
558
					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
559 560 561 562

			if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
				def_sampling_rate = MIN_STAT_SAMPLING_RATE;

L
Linus Torvalds 已提交
563 564
			dbs_tuners_ins.sampling_rate = def_sampling_rate;
		}
565
		dbs_timer_init(this_dbs_info);
566

567
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
568 569 570
		break;

	case CPUFREQ_GOV_STOP:
571
		mutex_lock(&dbs_mutex);
572
		dbs_timer_exit(this_dbs_info);
L
Linus Torvalds 已提交
573 574
		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
		dbs_enable--;
575
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
576 577 578 579

		break;

	case CPUFREQ_GOV_LIMITS:
580
		mutex_lock(&dbs_mutex);
L
Linus Torvalds 已提交
581
		if (policy->max < this_dbs_info->cur_policy->cur)
582 583 584
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->max,
			                        CPUFREQ_RELATION_H);
L
Linus Torvalds 已提交
585
		else if (policy->min > this_dbs_info->cur_policy->cur)
586 587 588
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->min,
			                        CPUFREQ_RELATION_L);
589
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
590 591 592 593 594
		break;
	}
	return 0;
}

595 596 597 598 599
struct cpufreq_governor cpufreq_gov_ondemand = {
	.name			= "ondemand",
	.governor		= cpufreq_governor_dbs,
	.max_transition_latency = TRANSITION_LATENCY_LIMIT,
	.owner			= THIS_MODULE,
L
Linus Torvalds 已提交
600
};
601
EXPORT_SYMBOL(cpufreq_gov_ondemand);
L
Linus Torvalds 已提交
602 603 604

static int __init cpufreq_gov_dbs_init(void)
{
605 606
	int err;

607 608 609 610 611
	kondemand_wq = create_workqueue("kondemand");
	if (!kondemand_wq) {
		printk(KERN_ERR "Creation of kondemand failed\n");
		return -EFAULT;
	}
612 613 614 615 616
	err = cpufreq_register_governor(&cpufreq_gov_ondemand);
	if (err)
		destroy_workqueue(kondemand_wq);

	return err;
L
Linus Torvalds 已提交
617 618 619 620
}

static void __exit cpufreq_gov_dbs_exit(void)
{
621
	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
622
	destroy_workqueue(kondemand_wq);
L
Linus Torvalds 已提交
623 624 625
}


626 627 628 629 630
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
                   "Low Latency Frequency Transition capable processors");
MODULE_LICENSE("GPL");
L
Linus Torvalds 已提交
631

632 633 634
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
fs_initcall(cpufreq_gov_dbs_init);
#else
L
Linus Torvalds 已提交
635
module_init(cpufreq_gov_dbs_init);
636
#endif
L
Linus Torvalds 已提交
637
module_exit(cpufreq_gov_dbs_exit);