cpufreq_ondemand.c 17.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  drivers/cpufreq/cpufreq_ondemand.c
 *
 *  Copyright (C)  2001 Russell King
 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
 *                      Jun Nakajima <jun.nakajima@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
A
Andrew Morton 已提交
17
#include <linux/cpu.h>
L
Linus Torvalds 已提交
18 19
#include <linux/jiffies.h>
#include <linux/kernel_stat.h>
20
#include <linux/mutex.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27

/*
 * dbs is used in this file as a shortform for demandbased switching
 * It helps to keep variable names smaller, simpler
 */

#define DEF_FREQUENCY_UP_THRESHOLD		(80)
28
#define MIN_FREQUENCY_UP_THRESHOLD		(11)
L
Linus Torvalds 已提交
29 30
#define MAX_FREQUENCY_UP_THRESHOLD		(100)

31 32
/*
 * The polling frequency of this governor depends on the capability of
L
Linus Torvalds 已提交
33
 * the processor. Default polling frequency is 1000 times the transition
34 35
 * latency of the processor. The governor will work on any processor with
 * transition latency <= 10mS, using appropriate sampling
L
Linus Torvalds 已提交
36 37 38 39 40
 * rate.
 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
 * this governor will not work.
 * All times here are in uS.
 */
41
static unsigned int def_sampling_rate;
42 43
#define MIN_SAMPLING_RATE_RATIO			(2)
/* for correct statistics, we need at least 10 ticks between each measure */
44 45 46 47
#define MIN_STAT_SAMPLING_RATE 			\
			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
#define MIN_SAMPLING_RATE			\
			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
L
Linus Torvalds 已提交
48 49
#define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
50
#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
L
Linus Torvalds 已提交
51

D
David Howells 已提交
52 53 54
static void do_dbs_timer(struct work_struct *work);

/* Sampling types */
55
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
L
Linus Torvalds 已提交
56 57

struct cpu_dbs_info_s {
58 59
	cputime64_t prev_cpu_idle;
	cputime64_t prev_cpu_wall;
60
	struct cpufreq_policy *cur_policy;
D
David Howells 已提交
61
 	struct delayed_work work;
62 63 64 65
	struct cpufreq_frequency_table *freq_table;
	unsigned int freq_lo;
	unsigned int freq_lo_jiffies;
	unsigned int freq_hi_jiffies;
66 67 68
	int cpu;
	unsigned int enable:1,
	             sample_type:1;
L
Linus Torvalds 已提交
69 70 71 72 73
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);

static unsigned int dbs_enable;	/* number of CPUs using this policy */

74 75 76 77 78 79 80 81
/*
 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
 * lock and dbs_mutex. cpu_hotplug lock should always be held before
 * dbs_mutex. If any function that can potentially take cpu_hotplug lock
 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
 * is recursive for the same process. -Venki
 */
82
static DEFINE_MUTEX(dbs_mutex);
L
Linus Torvalds 已提交
83

84
static struct workqueue_struct	*kondemand_wq;
85

86
static struct dbs_tuners {
87 88 89
	unsigned int sampling_rate;
	unsigned int up_threshold;
	unsigned int ignore_nice;
90 91
	unsigned int powersave_bias;
} dbs_tuners_ins = {
92
	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
93
	.ignore_nice = 0,
94
	.powersave_bias = 0,
L
Linus Torvalds 已提交
95 96
};

97
static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
98
{
99 100 101
	cputime64_t idle_time;
	cputime64_t cur_jiffies;
	cputime64_t busy_time;
102

103 104 105
	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
			kstat_cpu(cpu).cpustat.system);
106

107 108 109
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
110

111 112 113 114 115 116 117
	if (!dbs_tuners_ins.ignore_nice) {
		busy_time = cputime64_add(busy_time,
				kstat_cpu(cpu).cpustat.nice);
	}

	idle_time = cputime64_sub(cur_jiffies, busy_time);
	return idle_time;
118 119
}

120 121 122 123 124
/*
 * Find right freq to be set now with powersave_bias on.
 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 */
125 126 127
static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
					  unsigned int freq_next,
					  unsigned int relation)
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
{
	unsigned int freq_req, freq_reduc, freq_avg;
	unsigned int freq_hi, freq_lo;
	unsigned int index = 0;
	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);

	if (!dbs_info->freq_table) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_next;
	}

	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
			relation, &index);
	freq_req = dbs_info->freq_table[index].frequency;
	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
	freq_avg = freq_req - freq_reduc;

	/* Find freq bounds for freq_avg in freq_table */
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_H, &index);
	freq_lo = dbs_info->freq_table[index].frequency;
	index = 0;
	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
			CPUFREQ_RELATION_L, &index);
	freq_hi = dbs_info->freq_table[index].frequency;

	/* Find out how long we have to be in hi and lo freqs */
	if (freq_hi == freq_lo) {
		dbs_info->freq_lo = 0;
		dbs_info->freq_lo_jiffies = 0;
		return freq_lo;
	}
	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
	jiffies_hi += ((freq_hi - freq_lo) / 2);
	jiffies_hi /= (freq_hi - freq_lo);
	jiffies_lo = jiffies_total - jiffies_hi;
	dbs_info->freq_lo = freq_lo;
	dbs_info->freq_lo_jiffies = jiffies_lo;
	dbs_info->freq_hi_jiffies = jiffies_hi;
	return freq_hi;
}

static void ondemand_powersave_bias_init(void)
{
	int i;
	for_each_online_cpu(i) {
		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
		dbs_info->freq_table = cpufreq_frequency_get_table(i);
		dbs_info->freq_lo = 0;
	}
}

L
Linus Torvalds 已提交
184 185 186 187 188 189 190 191 192 193 194
/************************** sysfs interface ************************/
static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
{
	return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
}

static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
{
	return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
}

195 196
#define define_one_ro(_name)		\
static struct freq_attr _name =		\
L
Linus Torvalds 已提交
197 198 199 200 201 202 203 204 205 206 207 208 209 210
__ATTR(_name, 0444, show_##_name, NULL)

define_one_ro(sampling_rate_max);
define_one_ro(sampling_rate_min);

/* cpufreq_ondemand Governor Tunables */
#define show_one(file_name, object)					\
static ssize_t show_##file_name						\
(struct cpufreq_policy *unused, char *buf)				\
{									\
	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
}
show_one(sampling_rate, sampling_rate);
show_one(up_threshold, up_threshold);
211
show_one(ignore_nice_load, ignore_nice);
212
show_one(powersave_bias, powersave_bias);
L
Linus Torvalds 已提交
213

214
static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
215 216 217 218
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
219
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
220

221
	mutex_lock(&dbs_mutex);
222 223
	if (ret != 1 || input > MAX_SAMPLING_RATE
		     || input < MIN_SAMPLING_RATE) {
224
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
225 226 227 228
		return -EINVAL;
	}

	dbs_tuners_ins.sampling_rate = input;
229
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
230 231 232 233

	return count;
}

234
static ssize_t store_up_threshold(struct cpufreq_policy *unused,
L
Linus Torvalds 已提交
235 236 237 238
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
239
	ret = sscanf(buf, "%u", &input);
L
Linus Torvalds 已提交
240

241
	mutex_lock(&dbs_mutex);
242
	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
243
			input < MIN_FREQUENCY_UP_THRESHOLD) {
244
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
245 246 247 248
		return -EINVAL;
	}

	dbs_tuners_ins.up_threshold = input;
249
	mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
250 251 252 253

	return count;
}

254
static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
255 256 257 258 259 260
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;

	unsigned int j;
261

262
	ret = sscanf(buf, "%u", &input);
263 264 265 266 267
	if ( ret != 1 )
		return -EINVAL;

	if ( input > 1 )
		input = 1;
268

269
	mutex_lock(&dbs_mutex);
270
	if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
271
		mutex_unlock(&dbs_mutex);
272 273 274 275
		return count;
	}
	dbs_tuners_ins.ignore_nice = input;

276
	/* we need to re-evaluate prev_cpu_idle */
277
	for_each_online_cpu(j) {
278 279 280 281
		struct cpu_dbs_info_s *dbs_info;
		dbs_info = &per_cpu(cpu_dbs_info, j);
		dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
		dbs_info->prev_cpu_wall = get_jiffies_64();
282
	}
283
	mutex_unlock(&dbs_mutex);
284 285 286 287

	return count;
}

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
		const char *buf, size_t count)
{
	unsigned int input;
	int ret;
	ret = sscanf(buf, "%u", &input);

	if (ret != 1)
		return -EINVAL;

	if (input > 1000)
		input = 1000;

	mutex_lock(&dbs_mutex);
	dbs_tuners_ins.powersave_bias = input;
	ondemand_powersave_bias_init();
	mutex_unlock(&dbs_mutex);

	return count;
}

L
Linus Torvalds 已提交
309 310 311 312 313 314
#define define_one_rw(_name) \
static struct freq_attr _name = \
__ATTR(_name, 0644, show_##_name, store_##_name)

define_one_rw(sampling_rate);
define_one_rw(up_threshold);
315
define_one_rw(ignore_nice_load);
316
define_one_rw(powersave_bias);
L
Linus Torvalds 已提交
317 318 319 320 321 322

static struct attribute * dbs_attributes[] = {
	&sampling_rate_max.attr,
	&sampling_rate_min.attr,
	&sampling_rate.attr,
	&up_threshold.attr,
323
	&ignore_nice_load.attr,
324
	&powersave_bias.attr,
L
Linus Torvalds 已提交
325 326 327 328 329 330 331 332 333 334
	NULL
};

static struct attribute_group dbs_attr_group = {
	.attrs = dbs_attributes,
	.name = "ondemand",
};

/************************** sysfs end ************************/

335
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
L
Linus Torvalds 已提交
336
{
337
	unsigned int max_load_freq;
L
Linus Torvalds 已提交
338 339 340 341 342 343 344

	struct cpufreq_policy *policy;
	unsigned int j;

	if (!this_dbs_info->enable)
		return;

345
	this_dbs_info->freq_lo = 0;
L
Linus Torvalds 已提交
346
	policy = this_dbs_info->cur_policy;
347

348
	/*
349 350
	 * Every sampling_rate, we check, if current idle time is less
	 * than 20% (default), then we try to increase frequency
351
	 * Every sampling_rate, we look for a the lowest
352 353
	 * frequency which can sustain the load while keeping idle time over
	 * 30%. If such a frequency exist, we try to decrease to this frequency.
L
Linus Torvalds 已提交
354
	 *
355 356 357
	 * Any frequency increase takes it to the maximum frequency.
	 * Frequency reduction happens at minimum steps of
	 * 5% (default) of current frequency
L
Linus Torvalds 已提交
358 359
	 */

360 361 362
	/* Get Absolute Load - in terms of freq */
	max_load_freq = 0;

363
	for_each_cpu_mask_nr(j, policy->cpus) {
L
Linus Torvalds 已提交
364
		struct cpu_dbs_info_s *j_dbs_info;
365 366 367 368
		cputime64_t cur_wall_time, cur_idle_time;
		unsigned int idle_time, wall_time;
		unsigned int load, load_freq;
		int freq_avg;
L
Linus Torvalds 已提交
369 370

		j_dbs_info = &per_cpu(cpu_dbs_info, j);
371 372 373 374 375 376 377
		cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
				j_dbs_info->prev_cpu_wall);
		j_dbs_info->prev_cpu_wall = cur_wall_time;

		cur_idle_time = get_cpu_idle_time(j);
		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
378
				j_dbs_info->prev_cpu_idle);
379
		j_dbs_info->prev_cpu_idle = cur_idle_time;
L
Linus Torvalds 已提交
380

381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
		if (unlikely(wall_time <= idle_time ||
			     (cputime_to_msecs(wall_time) <
			      dbs_tuners_ins.sampling_rate / (2 * 1000)))) {
			continue;
		}

		load = 100 * (wall_time - idle_time) / wall_time;

		freq_avg = __cpufreq_driver_getavg(policy, j);
		if (freq_avg <= 0)
			freq_avg = policy->cur;

		load_freq = load * freq_avg;
		if (load_freq > max_load_freq)
			max_load_freq = load_freq;
L
Linus Torvalds 已提交
396 397
	}

398
	/* Check for frequency increase */
399
	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
400
		/* if we are already at full speed then break out early */
401 402 403 404 405 406 407 408 409 410 411 412
		if (!dbs_tuners_ins.powersave_bias) {
			if (policy->cur == policy->max)
				return;

			__cpufreq_driver_target(policy, policy->max,
				CPUFREQ_RELATION_H);
		} else {
			int freq = powersave_bias_target(policy, policy->max,
					CPUFREQ_RELATION_H);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
L
Linus Torvalds 已提交
413 414 415 416
		return;
	}

	/* Check for frequency decrease */
417 418 419
	/* if we cannot reduce the frequency anymore, break out early */
	if (policy->cur == policy->min)
		return;
L
Linus Torvalds 已提交
420

421 422 423 424 425
	/*
	 * The optimal frequency is the frequency that is the lowest that
	 * can support the current CPU usage without triggering the up
	 * policy. To be safe, we focus 10 points under the threshold.
	 */
426 427 428
	if (max_load_freq < (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
		unsigned int freq_next;
		freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
429

430 431 432 433 434 435 436 437 438
		if (!dbs_tuners_ins.powersave_bias) {
			__cpufreq_driver_target(policy, freq_next,
					CPUFREQ_RELATION_L);
		} else {
			int freq = powersave_bias_target(policy, freq_next,
					CPUFREQ_RELATION_L);
			__cpufreq_driver_target(policy, freq,
				CPUFREQ_RELATION_L);
		}
439
	}
L
Linus Torvalds 已提交
440 441
}

D
David Howells 已提交
442
static void do_dbs_timer(struct work_struct *work)
443
{
444 445 446 447 448
	struct cpu_dbs_info_s *dbs_info =
		container_of(work, struct cpu_dbs_info_s, work.work);
	unsigned int cpu = dbs_info->cpu;
	int sample_type = dbs_info->sample_type;

449 450
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
D
David Howells 已提交
451

452
	delay -= jiffies % delay;
453

454
	if (lock_policy_rwsem_write(cpu) < 0)
455
		return;
456 457 458 459 460 461

	if (!dbs_info->enable) {
		unlock_policy_rwsem_write(cpu);
		return;
	}

462
	/* Common NORMAL_SAMPLE setup */
D
David Howells 已提交
463
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
464
	if (!dbs_tuners_ins.powersave_bias ||
D
David Howells 已提交
465
	    sample_type == DBS_NORMAL_SAMPLE) {
466 467 468
		dbs_check_cpu(dbs_info);
		if (dbs_info->freq_lo) {
			/* Setup timer for SUB_SAMPLE */
D
David Howells 已提交
469
			dbs_info->sample_type = DBS_SUB_SAMPLE;
470 471 472 473 474 475 476
			delay = dbs_info->freq_hi_jiffies;
		}
	} else {
		__cpufreq_driver_target(dbs_info->cur_policy,
	                        	dbs_info->freq_lo,
	                        	CPUFREQ_RELATION_H);
	}
477
	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
478
	unlock_policy_rwsem_write(cpu);
479
}
L
Linus Torvalds 已提交
480

481
static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
482
{
483 484 485
	/* We want all CPUs to do sampling nearly on same jiffy */
	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
	delay -= jiffies % delay;
486

D
Dave Jones 已提交
487
	dbs_info->enable = 1;
488
	ondemand_powersave_bias_init();
D
David Howells 已提交
489
	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
490
	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
491 492
	queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
	                      delay);
L
Linus Torvalds 已提交
493 494
}

495
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
L
Linus Torvalds 已提交
496
{
497 498
	dbs_info->enable = 0;
	cancel_delayed_work(&dbs_info->work);
L
Linus Torvalds 已提交
499 500 501 502 503 504 505 506
}

static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
				   unsigned int event)
{
	unsigned int cpu = policy->cpu;
	struct cpu_dbs_info_s *this_dbs_info;
	unsigned int j;
J
Jeff Garzik 已提交
507
	int rc;
L
Linus Torvalds 已提交
508 509 510 511 512

	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);

	switch (event) {
	case CPUFREQ_GOV_START:
513
		if ((!cpu_online(cpu)) || (!policy->cur))
L
Linus Torvalds 已提交
514 515 516 517
			return -EINVAL;

		if (this_dbs_info->enable) /* Already enabled */
			break;
518

519
		mutex_lock(&dbs_mutex);
520
		dbs_enable++;
J
Jeff Garzik 已提交
521 522 523 524 525 526 527 528

		rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
		if (rc) {
			dbs_enable--;
			mutex_unlock(&dbs_mutex);
			return rc;
		}

529
		for_each_cpu_mask_nr(j, policy->cpus) {
L
Linus Torvalds 已提交
530 531 532
			struct cpu_dbs_info_s *j_dbs_info;
			j_dbs_info = &per_cpu(cpu_dbs_info, j);
			j_dbs_info->cur_policy = policy;
533

534 535
			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
			j_dbs_info->prev_cpu_wall = get_jiffies_64();
L
Linus Torvalds 已提交
536
		}
537
		this_dbs_info->cpu = cpu;
L
Linus Torvalds 已提交
538 539 540 541 542 543 544
		/*
		 * Start the timerschedule work, when this governor
		 * is used for first time
		 */
		if (dbs_enable == 1) {
			unsigned int latency;
			/* policy latency is in nS. Convert it to uS first */
545 546 547
			latency = policy->cpuinfo.transition_latency / 1000;
			if (latency == 0)
				latency = 1;
L
Linus Torvalds 已提交
548

549
			def_sampling_rate = latency *
L
Linus Torvalds 已提交
550
					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
551 552 553 554

			if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
				def_sampling_rate = MIN_STAT_SAMPLING_RATE;

L
Linus Torvalds 已提交
555 556
			dbs_tuners_ins.sampling_rate = def_sampling_rate;
		}
557
		dbs_timer_init(this_dbs_info);
558

559
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
560 561 562
		break;

	case CPUFREQ_GOV_STOP:
563
		mutex_lock(&dbs_mutex);
564
		dbs_timer_exit(this_dbs_info);
L
Linus Torvalds 已提交
565 566
		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
		dbs_enable--;
567
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
568 569 570 571

		break;

	case CPUFREQ_GOV_LIMITS:
572
		mutex_lock(&dbs_mutex);
L
Linus Torvalds 已提交
573
		if (policy->max < this_dbs_info->cur_policy->cur)
574 575 576
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->max,
			                        CPUFREQ_RELATION_H);
L
Linus Torvalds 已提交
577
		else if (policy->min > this_dbs_info->cur_policy->cur)
578 579 580
			__cpufreq_driver_target(this_dbs_info->cur_policy,
			                        policy->min,
			                        CPUFREQ_RELATION_L);
581
		mutex_unlock(&dbs_mutex);
L
Linus Torvalds 已提交
582 583 584 585 586
		break;
	}
	return 0;
}

587 588 589 590 591
struct cpufreq_governor cpufreq_gov_ondemand = {
	.name			= "ondemand",
	.governor		= cpufreq_governor_dbs,
	.max_transition_latency = TRANSITION_LATENCY_LIMIT,
	.owner			= THIS_MODULE,
L
Linus Torvalds 已提交
592
};
593
EXPORT_SYMBOL(cpufreq_gov_ondemand);
L
Linus Torvalds 已提交
594 595 596

static int __init cpufreq_gov_dbs_init(void)
{
597 598
	int err;

599 600 601 602 603
	kondemand_wq = create_workqueue("kondemand");
	if (!kondemand_wq) {
		printk(KERN_ERR "Creation of kondemand failed\n");
		return -EFAULT;
	}
604 605 606 607 608
	err = cpufreq_register_governor(&cpufreq_gov_ondemand);
	if (err)
		destroy_workqueue(kondemand_wq);

	return err;
L
Linus Torvalds 已提交
609 610 611 612
}

static void __exit cpufreq_gov_dbs_exit(void)
{
613
	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
614
	destroy_workqueue(kondemand_wq);
L
Linus Torvalds 已提交
615 616 617
}


618 619 620 621 622
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
                   "Low Latency Frequency Transition capable processors");
MODULE_LICENSE("GPL");
L
Linus Torvalds 已提交
623

624 625 626
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
fs_initcall(cpufreq_gov_dbs_init);
#else
L
Linus Torvalds 已提交
627
module_init(cpufreq_gov_dbs_init);
628
#endif
L
Linus Torvalds 已提交
629
module_exit(cpufreq_gov_dbs_exit);