cpufreq.c 56.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  linux/drivers/cpufreq/cpufreq.c
 *
 *  Copyright (C) 2001 Russell King
 *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6
 *            (C) 2013 Viresh Kumar <viresh.kumar@linaro.org>
L
Linus Torvalds 已提交
7
 *
8
 *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
9
 *	Added handling for CPU hotplug
10 11
 *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
 *	Fix handling for CPU hotplug -- affected CPUs
12
 *
L
Linus Torvalds 已提交
13 14 15 16 17
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

V
Viresh Kumar 已提交
18 19
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

20
#include <linux/cpu.h>
L
Linus Torvalds 已提交
21 22 23
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/device.h>
24 25 26
#include <linux/init.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
27
#include <linux/mutex.h>
28
#include <linux/slab.h>
29
#include <linux/syscore_ops.h>
30
#include <linux/tick.h>
31 32
#include <trace/events/power.h>

L
Linus Torvalds 已提交
33
/**
D
Dave Jones 已提交
34
 * The "cpufreq driver" - the arch- or hardware-dependent low
L
Linus Torvalds 已提交
35 36 37
 * level driver of CPUFreq support, and its spinlock. This lock
 * also protects the cpufreq_cpu_data array.
 */
38
static struct cpufreq_driver *cpufreq_driver;
39
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
40
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
41 42
static DEFINE_RWLOCK(cpufreq_driver_lock);
static DEFINE_MUTEX(cpufreq_governor_lock);
43
static LIST_HEAD(cpufreq_policy_list);
44

45 46
#ifdef CONFIG_HOTPLUG_CPU
/* This one keeps track of the previously set governor of a removed CPU */
47
static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
48
#endif
L
Linus Torvalds 已提交
49

50 51 52 53 54
static inline bool has_target(void)
{
	return cpufreq_driver->target_index || cpufreq_driver->target;
}

55 56 57 58 59 60 61 62 63 64 65 66 67 68
/*
 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
 * all cpufreq/hotplug/workqueue/etc related lock issues.
 *
 * The rules for this semaphore:
 * - Any routine that wants to read from the policy structure will
 *   do a down_read on this semaphore.
 * - Any routine that will write to the policy structure and/or may take away
 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
 *   mode before doing so.
 *
 * Additional rules:
 * - Governor routines that can be called in cpufreq hotplug path should not
 *   take this sem as top level hotplug notifier handler takes this.
69 70
 * - Lock should not be held across
 *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
71 72 73 74
 */
static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);

#define lock_policy_rwsem(mode, cpu)					\
75
static void lock_policy_rwsem_##mode(int cpu)				\
76
{									\
77 78 79
	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);	\
	BUG_ON(!policy);						\
	down_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu));		\
80 81 82 83 84
}

lock_policy_rwsem(read, cpu);
lock_policy_rwsem(write, cpu);

85 86 87
#define unlock_policy_rwsem(mode, cpu)					\
static void unlock_policy_rwsem_##mode(int cpu)				\
{									\
88 89 90
	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);	\
	BUG_ON(!policy);						\
	up_##mode(&per_cpu(cpu_policy_rwsem, policy->cpu));		\
91 92
}

93 94
unlock_policy_rwsem(read, cpu);
unlock_policy_rwsem(write, cpu);
95

96 97 98 99 100 101
/*
 * rwsem to guarantee that cpufreq driver module doesn't unload during critical
 * sections
 */
static DECLARE_RWSEM(cpufreq_rwsem);

L
Linus Torvalds 已提交
102
/* internal prototypes */
103 104
static int __cpufreq_governor(struct cpufreq_policy *policy,
		unsigned int event);
105
static unsigned int __cpufreq_get(unsigned int cpu);
106
static void handle_update(struct work_struct *work);
L
Linus Torvalds 已提交
107 108

/**
109 110
 * Two notifier lists: the "policy" list is involved in the
 * validation process for a new CPU frequency policy; the
L
Linus Torvalds 已提交
111 112 113 114
 * "transition" list for kernel code that needs to handle
 * changes to devices when the CPU clock speed changes.
 * The mutex locks both lists.
 */
115
static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
116
static struct srcu_notifier_head cpufreq_transition_notifier_list;
L
Linus Torvalds 已提交
117

118
static bool init_cpufreq_transition_notifier_list_called;
119 120 121
static int __init init_cpufreq_transition_notifier_list(void)
{
	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
122
	init_cpufreq_transition_notifier_list_called = true;
123 124
	return 0;
}
125
pure_initcall(init_cpufreq_transition_notifier_list);
L
Linus Torvalds 已提交
126

127
static int off __read_mostly;
128
static int cpufreq_disabled(void)
129 130 131 132 133 134 135
{
	return off;
}
void disable_cpufreq(void)
{
	off = 1;
}
L
Linus Torvalds 已提交
136
static LIST_HEAD(cpufreq_governor_list);
137
static DEFINE_MUTEX(cpufreq_governor_mutex);
L
Linus Torvalds 已提交
138

139 140
bool have_governor_per_policy(void)
{
141
	return !!(cpufreq_driver->flags & CPUFREQ_HAVE_GOVERNOR_PER_POLICY);
142
}
143
EXPORT_SYMBOL_GPL(have_governor_per_policy);
144

145 146 147 148 149 150 151 152 153
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
{
	if (have_governor_per_policy())
		return &policy->kobj;
	else
		return cpufreq_global_kobject;
}
EXPORT_SYMBOL_GPL(get_governor_parent_kobj);

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
{
	u64 idle_time;
	u64 cur_wall_time;
	u64 busy_time;

	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());

	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];

	idle_time = cur_wall_time - busy_time;
	if (wall)
		*wall = cputime_to_usecs(cur_wall_time);

	return cputime_to_usecs(idle_time);
}

u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
{
	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);

	if (idle_time == -1ULL)
		return get_cpu_idle_time_jiffy(cpu, wall);
	else if (!io_busy)
		idle_time += get_cpu_iowait_time_us(cpu, wall);

	return idle_time;
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time);

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
/*
 * This is a generic cpufreq init() routine which can be used by cpufreq
 * drivers of SMP systems. It will do following:
 * - validate & show freq table passed
 * - set policies transition latency
 * - policy->cpus with all possible CPUs
 */
int cpufreq_generic_init(struct cpufreq_policy *policy,
		struct cpufreq_frequency_table *table,
		unsigned int transition_latency)
{
	int ret;

	ret = cpufreq_table_validate_and_show(policy, table);
	if (ret) {
		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
		return ret;
	}

	policy->cpuinfo.transition_latency = transition_latency;

	/*
	 * The driver only supports the SMP configuartion where all processors
	 * share the clock and voltage and clock.
	 */
	cpumask_setall(policy->cpus);

	return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_generic_init);

220
struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
L
Linus Torvalds 已提交
221
{
222
	struct cpufreq_policy *policy = NULL;
L
Linus Torvalds 已提交
223 224
	unsigned long flags;

225 226 227 228 229
	if (cpufreq_disabled() || (cpu >= nr_cpu_ids))
		return NULL;

	if (!down_read_trylock(&cpufreq_rwsem))
		return NULL;
L
Linus Torvalds 已提交
230 231

	/* get the cpufreq driver */
232
	read_lock_irqsave(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
233

234 235 236 237 238 239
	if (cpufreq_driver) {
		/* get the CPU */
		policy = per_cpu(cpufreq_cpu_data, cpu);
		if (policy)
			kobject_get(&policy->kobj);
	}
L
Linus Torvalds 已提交
240

241
	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
242

243
	if (!policy)
244
		up_read(&cpufreq_rwsem);
L
Linus Torvalds 已提交
245

246
	return policy;
247
}
L
Linus Torvalds 已提交
248 249
EXPORT_SYMBOL_GPL(cpufreq_cpu_get);

250
void cpufreq_cpu_put(struct cpufreq_policy *policy)
L
Linus Torvalds 已提交
251
{
252 253 254
	if (cpufreq_disabled())
		return;

255 256
	kobject_put(&policy->kobj);
	up_read(&cpufreq_rwsem);
L
Linus Torvalds 已提交
257 258 259 260 261 262 263 264 265 266 267 268
}
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);

/*********************************************************************
 *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
 *********************************************************************/

/**
 * adjust_jiffies - adjust the system "loops_per_jiffy"
 *
 * This function alters the system "loops_per_jiffy" for the clock
 * speed change. Note that loops_per_jiffy cannot be updated on SMP
269
 * systems as each CPU might be scaled differently. So, use the arch
L
Linus Torvalds 已提交
270 271 272 273
 * per-CPU loops_per_jiffy value wherever possible.
 */
#ifndef CONFIG_SMP
static unsigned long l_p_j_ref;
274
static unsigned int l_p_j_ref_freq;
L
Linus Torvalds 已提交
275

276
static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
L
Linus Torvalds 已提交
277 278 279 280 281 282 283
{
	if (ci->flags & CPUFREQ_CONST_LOOPS)
		return;

	if (!l_p_j_ref_freq) {
		l_p_j_ref = loops_per_jiffy;
		l_p_j_ref_freq = ci->old;
284
		pr_debug("saving %lu as reference value for loops_per_jiffy; "
285
			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
L
Linus Torvalds 已提交
286
	}
287
	if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) ||
288
	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
289 290
		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
								ci->new);
291
		pr_debug("scaling loops_per_jiffy to %lu "
292
			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
L
Linus Torvalds 已提交
293 294 295
	}
}
#else
296 297 298 299
static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
{
	return;
}
L
Linus Torvalds 已提交
300 301
#endif

302
static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
303
		struct cpufreq_freqs *freqs, unsigned int state)
L
Linus Torvalds 已提交
304 305 306
{
	BUG_ON(irqs_disabled());

307 308 309
	if (cpufreq_disabled())
		return;

310
	freqs->flags = cpufreq_driver->flags;
311
	pr_debug("notification %u of frequency transition to %u kHz\n",
312
		state, freqs->new);
L
Linus Torvalds 已提交
313 314

	switch (state) {
315

L
Linus Torvalds 已提交
316
	case CPUFREQ_PRECHANGE:
317
		/* detect if the driver reported a value as "old frequency"
318 319
		 * which is not equal to what the cpufreq core thinks is
		 * "old frequency".
L
Linus Torvalds 已提交
320
		 */
321
		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
322 323
			if ((policy) && (policy->cpu == freqs->cpu) &&
			    (policy->cur) && (policy->cur != freqs->old)) {
324
				pr_debug("Warning: CPU frequency is"
325 326 327
					" %u, cpufreq assumed %u kHz.\n",
					freqs->old, policy->cur);
				freqs->old = policy->cur;
L
Linus Torvalds 已提交
328 329
			}
		}
330
		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
331
				CPUFREQ_PRECHANGE, freqs);
L
Linus Torvalds 已提交
332 333
		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
		break;
334

L
Linus Torvalds 已提交
335 336
	case CPUFREQ_POSTCHANGE:
		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
337
		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
338
			(unsigned long)freqs->cpu);
339
		trace_cpu_frequency(freqs->new, freqs->cpu);
340
		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
341
				CPUFREQ_POSTCHANGE, freqs);
342 343
		if (likely(policy) && likely(policy->cpu == freqs->cpu))
			policy->cur = freqs->new;
L
Linus Torvalds 已提交
344 345 346
		break;
	}
}
347

348 349 350 351 352 353 354 355 356 357 358 359 360 361
/**
 * cpufreq_notify_transition - call notifier chain and adjust_jiffies
 * on frequency transition.
 *
 * This function calls the transition notifiers and the "adjust_jiffies"
 * function. It is called twice on all CPU frequency changes that have
 * external effects.
 */
void cpufreq_notify_transition(struct cpufreq_policy *policy,
		struct cpufreq_freqs *freqs, unsigned int state)
{
	for_each_cpu(freqs->cpu, policy->cpus)
		__cpufreq_notify_transition(policy, freqs, state);
}
L
Linus Torvalds 已提交
362 363 364 365 366 367 368
EXPORT_SYMBOL_GPL(cpufreq_notify_transition);


/*********************************************************************
 *                          SYSFS INTERFACE                          *
 *********************************************************************/

369 370 371 372 373
static struct cpufreq_governor *__find_governor(const char *str_governor)
{
	struct cpufreq_governor *t;

	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
374
		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
375 376 377 378 379
			return t;

	return NULL;
}

L
Linus Torvalds 已提交
380 381 382
/**
 * cpufreq_parse_governor - parse a governor string
 */
D
Dave Jones 已提交
383
static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
L
Linus Torvalds 已提交
384 385
				struct cpufreq_governor **governor)
{
386
	int err = -EINVAL;
387 388

	if (!cpufreq_driver)
389 390
		goto out;

391
	if (cpufreq_driver->setpolicy) {
L
Linus Torvalds 已提交
392 393
		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
			*policy = CPUFREQ_POLICY_PERFORMANCE;
394
			err = 0;
395 396
		} else if (!strnicmp(str_governor, "powersave",
						CPUFREQ_NAME_LEN)) {
L
Linus Torvalds 已提交
397
			*policy = CPUFREQ_POLICY_POWERSAVE;
398
			err = 0;
L
Linus Torvalds 已提交
399
		}
400
	} else if (has_target()) {
L
Linus Torvalds 已提交
401
		struct cpufreq_governor *t;
402

403
		mutex_lock(&cpufreq_governor_mutex);
404 405 406

		t = __find_governor(str_governor);

407
		if (t == NULL) {
408
			int ret;
409

410 411 412
			mutex_unlock(&cpufreq_governor_mutex);
			ret = request_module("cpufreq_%s", str_governor);
			mutex_lock(&cpufreq_governor_mutex);
413

414 415
			if (ret == 0)
				t = __find_governor(str_governor);
416 417
		}

418 419 420
		if (t != NULL) {
			*governor = t;
			err = 0;
L
Linus Torvalds 已提交
421
		}
422

423
		mutex_unlock(&cpufreq_governor_mutex);
L
Linus Torvalds 已提交
424
	}
425
out:
426
	return err;
L
Linus Torvalds 已提交
427 428 429
}

/**
430 431
 * cpufreq_per_cpu_attr_read() / show_##file_name() -
 * print out cpufreq information
L
Linus Torvalds 已提交
432 433 434 435 436
 *
 * Write out information from cpufreq_driver->policy[cpu]; object must be
 * "unsigned int".
 */

437 438
#define show_one(file_name, object)			\
static ssize_t show_##file_name				\
D
Dave Jones 已提交
439
(struct cpufreq_policy *policy, char *buf)		\
440
{							\
441
	return sprintf(buf, "%u\n", policy->object);	\
L
Linus Torvalds 已提交
442 443 444 445
}

show_one(cpuinfo_min_freq, cpuinfo.min_freq);
show_one(cpuinfo_max_freq, cpuinfo.max_freq);
446
show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
L
Linus Torvalds 已提交
447 448 449 450
show_one(scaling_min_freq, min);
show_one(scaling_max_freq, max);
show_one(scaling_cur_freq, cur);

451
static int cpufreq_set_policy(struct cpufreq_policy *policy,
452
				struct cpufreq_policy *new_policy);
453

L
Linus Torvalds 已提交
454 455 456 457 458
/**
 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
 */
#define store_one(file_name, object)			\
static ssize_t store_##file_name					\
D
Dave Jones 已提交
459
(struct cpufreq_policy *policy, const char *buf, size_t count)		\
L
Linus Torvalds 已提交
460
{									\
461
	int ret;							\
L
Linus Torvalds 已提交
462 463 464 465 466 467
	struct cpufreq_policy new_policy;				\
									\
	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
	if (ret)							\
		return -EINVAL;						\
									\
468
	ret = sscanf(buf, "%u", &new_policy.object);			\
L
Linus Torvalds 已提交
469 470 471
	if (ret != 1)							\
		return -EINVAL;						\
									\
472
	ret = cpufreq_set_policy(policy, &new_policy);		\
473
	policy->user_policy.object = policy->object;			\
L
Linus Torvalds 已提交
474 475 476 477
									\
	return ret ? ret : count;					\
}

478 479
store_one(scaling_min_freq, min);
store_one(scaling_max_freq, max);
L
Linus Torvalds 已提交
480 481 482 483

/**
 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
 */
D
Dave Jones 已提交
484 485
static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
					char *buf)
L
Linus Torvalds 已提交
486
{
487
	unsigned int cur_freq = __cpufreq_get(policy->cpu);
L
Linus Torvalds 已提交
488 489 490 491 492 493 494 495
	if (!cur_freq)
		return sprintf(buf, "<unknown>");
	return sprintf(buf, "%u\n", cur_freq);
}

/**
 * show_scaling_governor - show the current policy for the specified CPU
 */
D
Dave Jones 已提交
496
static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
L
Linus Torvalds 已提交
497
{
498
	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
L
Linus Torvalds 已提交
499 500 501 502
		return sprintf(buf, "powersave\n");
	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
		return sprintf(buf, "performance\n");
	else if (policy->governor)
503
		return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
504
				policy->governor->name);
L
Linus Torvalds 已提交
505 506 507 508 509 510
	return -EINVAL;
}

/**
 * store_scaling_governor - store policy for the specified CPU
 */
D
Dave Jones 已提交
511 512
static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
					const char *buf, size_t count)
L
Linus Torvalds 已提交
513
{
514
	int ret;
L
Linus Torvalds 已提交
515 516 517 518 519 520 521
	char	str_governor[16];
	struct cpufreq_policy new_policy;

	ret = cpufreq_get_policy(&new_policy, policy->cpu);
	if (ret)
		return ret;

522
	ret = sscanf(buf, "%15s", str_governor);
L
Linus Torvalds 已提交
523 524 525
	if (ret != 1)
		return -EINVAL;

526 527
	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
						&new_policy.governor))
L
Linus Torvalds 已提交
528 529
		return -EINVAL;

530
	ret = cpufreq_set_policy(policy, &new_policy);
531 532 533 534

	policy->user_policy.policy = policy->policy;
	policy->user_policy.governor = policy->governor;

535 536 537 538
	if (ret)
		return ret;
	else
		return count;
L
Linus Torvalds 已提交
539 540 541 542 543
}

/**
 * show_scaling_driver - show the cpufreq driver currently loaded
 */
D
Dave Jones 已提交
544
static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
L
Linus Torvalds 已提交
545
{
546
	return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
L
Linus Torvalds 已提交
547 548 549 550 551
}

/**
 * show_scaling_available_governors - show the available CPUfreq governors
 */
D
Dave Jones 已提交
552 553
static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
						char *buf)
L
Linus Torvalds 已提交
554 555 556 557
{
	ssize_t i = 0;
	struct cpufreq_governor *t;

558
	if (!has_target()) {
L
Linus Torvalds 已提交
559 560 561 562 563
		i += sprintf(buf, "performance powersave");
		goto out;
	}

	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
564 565
		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
		    - (CPUFREQ_NAME_LEN + 2)))
L
Linus Torvalds 已提交
566
			goto out;
567
		i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
L
Linus Torvalds 已提交
568
	}
569
out:
L
Linus Torvalds 已提交
570 571 572
	i += sprintf(&buf[i], "\n");
	return i;
}
573

574
ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf)
L
Linus Torvalds 已提交
575 576 577 578
{
	ssize_t i = 0;
	unsigned int cpu;

579
	for_each_cpu(cpu, mask) {
L
Linus Torvalds 已提交
580 581 582 583
		if (i)
			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
		if (i >= (PAGE_SIZE - 5))
584
			break;
L
Linus Torvalds 已提交
585 586 587 588
	}
	i += sprintf(&buf[i], "\n");
	return i;
}
589
EXPORT_SYMBOL_GPL(cpufreq_show_cpus);
L
Linus Torvalds 已提交
590

591 592 593 594 595 596
/**
 * show_related_cpus - show the CPUs affected by each transition even if
 * hw coordination is in use
 */
static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
{
597
	return cpufreq_show_cpus(policy->related_cpus, buf);
598 599 600 601 602 603 604
}

/**
 * show_affected_cpus - show the CPUs affected by each transition
 */
static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
{
605
	return cpufreq_show_cpus(policy->cpus, buf);
606 607
}

608
static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
D
Dave Jones 已提交
609
					const char *buf, size_t count)
610 611 612 613
{
	unsigned int freq = 0;
	unsigned int ret;

614
	if (!policy->governor || !policy->governor->store_setspeed)
615 616 617 618 619 620 621 622 623 624 625 626 627
		return -EINVAL;

	ret = sscanf(buf, "%u", &freq);
	if (ret != 1)
		return -EINVAL;

	policy->governor->store_setspeed(policy, freq);

	return count;
}

static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
{
628
	if (!policy->governor || !policy->governor->show_setspeed)
629 630 631 632
		return sprintf(buf, "<unsupported>\n");

	return policy->governor->show_setspeed(policy, buf);
}
L
Linus Torvalds 已提交
633

634
/**
635
 * show_bios_limit - show the current cpufreq HW/BIOS limitation
636 637 638 639 640
 */
static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
{
	unsigned int limit;
	int ret;
641 642
	if (cpufreq_driver->bios_limit) {
		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
643 644 645 646 647 648
		if (!ret)
			return sprintf(buf, "%u\n", limit);
	}
	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
}

649 650 651 652 653 654 655 656 657 658 659 660 661 662
cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
cpufreq_freq_attr_ro(cpuinfo_min_freq);
cpufreq_freq_attr_ro(cpuinfo_max_freq);
cpufreq_freq_attr_ro(cpuinfo_transition_latency);
cpufreq_freq_attr_ro(scaling_available_governors);
cpufreq_freq_attr_ro(scaling_driver);
cpufreq_freq_attr_ro(scaling_cur_freq);
cpufreq_freq_attr_ro(bios_limit);
cpufreq_freq_attr_ro(related_cpus);
cpufreq_freq_attr_ro(affected_cpus);
cpufreq_freq_attr_rw(scaling_min_freq);
cpufreq_freq_attr_rw(scaling_max_freq);
cpufreq_freq_attr_rw(scaling_governor);
cpufreq_freq_attr_rw(scaling_setspeed);
L
Linus Torvalds 已提交
663

D
Dave Jones 已提交
664
static struct attribute *default_attrs[] = {
L
Linus Torvalds 已提交
665 666
	&cpuinfo_min_freq.attr,
	&cpuinfo_max_freq.attr,
667
	&cpuinfo_transition_latency.attr,
L
Linus Torvalds 已提交
668 669 670
	&scaling_min_freq.attr,
	&scaling_max_freq.attr,
	&affected_cpus.attr,
671
	&related_cpus.attr,
L
Linus Torvalds 已提交
672 673 674
	&scaling_governor.attr,
	&scaling_driver.attr,
	&scaling_available_governors.attr,
675
	&scaling_setspeed.attr,
L
Linus Torvalds 已提交
676 677 678
	NULL
};

679 680
#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
#define to_attr(a) container_of(a, struct freq_attr, attr)
L
Linus Torvalds 已提交
681

682
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
L
Linus Torvalds 已提交
683
{
D
Dave Jones 已提交
684 685
	struct cpufreq_policy *policy = to_policy(kobj);
	struct freq_attr *fattr = to_attr(attr);
686
	ssize_t ret;
687 688

	if (!down_read_trylock(&cpufreq_rwsem))
689
		return -EINVAL;
690

691
	lock_policy_rwsem_read(policy->cpu);
692

693 694 695 696 697
	if (fattr->show)
		ret = fattr->show(policy, buf);
	else
		ret = -EIO;

698
	unlock_policy_rwsem_read(policy->cpu);
699
	up_read(&cpufreq_rwsem);
700

L
Linus Torvalds 已提交
701 702 703
	return ret;
}

D
Dave Jones 已提交
704 705
static ssize_t store(struct kobject *kobj, struct attribute *attr,
		     const char *buf, size_t count)
L
Linus Torvalds 已提交
706
{
D
Dave Jones 已提交
707 708
	struct cpufreq_policy *policy = to_policy(kobj);
	struct freq_attr *fattr = to_attr(attr);
709
	ssize_t ret = -EINVAL;
710

711 712 713 714 715
	get_online_cpus();

	if (!cpu_online(policy->cpu))
		goto unlock;

716
	if (!down_read_trylock(&cpufreq_rwsem))
717
		goto unlock;
718

719
	lock_policy_rwsem_write(policy->cpu);
720

721 722 723 724 725
	if (fattr->store)
		ret = fattr->store(policy, buf, count);
	else
		ret = -EIO;

726
	unlock_policy_rwsem_write(policy->cpu);
727 728

	up_read(&cpufreq_rwsem);
729 730 731
unlock:
	put_online_cpus();

L
Linus Torvalds 已提交
732 733 734
	return ret;
}

D
Dave Jones 已提交
735
static void cpufreq_sysfs_release(struct kobject *kobj)
L
Linus Torvalds 已提交
736
{
D
Dave Jones 已提交
737
	struct cpufreq_policy *policy = to_policy(kobj);
738
	pr_debug("last reference is dropped\n");
L
Linus Torvalds 已提交
739 740 741
	complete(&policy->kobj_unregister);
}

742
static const struct sysfs_ops sysfs_ops = {
L
Linus Torvalds 已提交
743 744 745 746 747 748 749 750 751 752
	.show	= show,
	.store	= store,
};

static struct kobj_type ktype_cpufreq = {
	.sysfs_ops	= &sysfs_ops,
	.default_attrs	= default_attrs,
	.release	= cpufreq_sysfs_release,
};

753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
struct kobject *cpufreq_global_kobject;
EXPORT_SYMBOL(cpufreq_global_kobject);

static int cpufreq_global_kobject_usage;

int cpufreq_get_global_kobject(void)
{
	if (!cpufreq_global_kobject_usage++)
		return kobject_add(cpufreq_global_kobject,
				&cpu_subsys.dev_root->kobj, "%s", "cpufreq");

	return 0;
}
EXPORT_SYMBOL(cpufreq_get_global_kobject);

void cpufreq_put_global_kobject(void)
{
	if (!--cpufreq_global_kobject_usage)
		kobject_del(cpufreq_global_kobject);
}
EXPORT_SYMBOL(cpufreq_put_global_kobject);

int cpufreq_sysfs_create_file(const struct attribute *attr)
{
	int ret = cpufreq_get_global_kobject();

	if (!ret) {
		ret = sysfs_create_file(cpufreq_global_kobject, attr);
		if (ret)
			cpufreq_put_global_kobject();
	}

	return ret;
}
EXPORT_SYMBOL(cpufreq_sysfs_create_file);

void cpufreq_sysfs_remove_file(const struct attribute *attr)
{
	sysfs_remove_file(cpufreq_global_kobject, attr);
	cpufreq_put_global_kobject();
}
EXPORT_SYMBOL(cpufreq_sysfs_remove_file);

796
/* symlink affected CPUs */
797
static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
798 799 800 801 802
{
	unsigned int j;
	int ret = 0;

	for_each_cpu(j, policy->cpus) {
803
		struct device *cpu_dev;
804

805
		if (j == policy->cpu)
806 807
			continue;

808
		pr_debug("Adding link for CPU: %u\n", j);
809 810
		cpu_dev = get_cpu_device(j);
		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
811
					"cpufreq");
812 813
		if (ret)
			break;
814 815 816 817
	}
	return ret;
}

818
static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
819
				     struct device *dev)
820 821 822 823 824 825
{
	struct freq_attr **drv_attr;
	int ret = 0;

	/* prepare interface data */
	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
826
				   &dev->kobj, "cpufreq");
827 828 829 830
	if (ret)
		return ret;

	/* set up files for this cpu device */
831
	drv_attr = cpufreq_driver->attr;
832 833 834
	while ((drv_attr) && (*drv_attr)) {
		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
		if (ret)
835
			goto err_out_kobj_put;
836 837
		drv_attr++;
	}
838
	if (cpufreq_driver->get) {
839 840
		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
		if (ret)
841
			goto err_out_kobj_put;
842
	}
843
	if (has_target()) {
844 845
		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
		if (ret)
846
			goto err_out_kobj_put;
847
	}
848
	if (cpufreq_driver->bios_limit) {
849 850
		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
		if (ret)
851
			goto err_out_kobj_put;
852
	}
853

854
	ret = cpufreq_add_dev_symlink(policy);
855 856 857
	if (ret)
		goto err_out_kobj_put;

858 859 860 861 862 863 864 865 866 867 868 869 870
	return ret;

err_out_kobj_put:
	kobject_put(&policy->kobj);
	wait_for_completion(&policy->kobj_unregister);
	return ret;
}

static void cpufreq_init_policy(struct cpufreq_policy *policy)
{
	struct cpufreq_policy new_policy;
	int ret = 0;

871
	memcpy(&new_policy, policy, sizeof(*policy));
872
	/* assure that the starting sequence is run in cpufreq_set_policy */
873 874 875
	policy->governor = NULL;

	/* set default policy */
876
	ret = cpufreq_set_policy(policy, &new_policy);
877 878 879 880
	policy->user_policy.policy = policy->policy;
	policy->user_policy.governor = policy->governor;

	if (ret) {
881
		pr_debug("setting policy failed\n");
882 883
		if (cpufreq_driver->exit)
			cpufreq_driver->exit(policy);
884
	}
885 886
}

887
#ifdef CONFIG_HOTPLUG_CPU
888 889 890
static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
				  unsigned int cpu, struct device *dev,
				  bool frozen)
891
{
892
	int ret = 0;
893 894
	unsigned long flags;

895
	if (has_target()) {
896 897 898 899 900 901
		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
		if (ret) {
			pr_err("%s: Failed to stop governor\n", __func__);
			return ret;
		}
	}
902

903
	lock_policy_rwsem_write(policy->cpu);
V
Viresh Kumar 已提交
904

905
	write_lock_irqsave(&cpufreq_driver_lock, flags);
V
Viresh Kumar 已提交
906

907 908
	cpumask_set_cpu(cpu, policy->cpus);
	per_cpu(cpufreq_cpu_data, cpu) = policy;
909
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
910

911
	unlock_policy_rwsem_write(policy->cpu);
V
Viresh Kumar 已提交
912

913
	if (has_target()) {
914 915 916 917 918
		if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
			(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
			pr_err("%s: Failed to start governor\n", __func__);
			return ret;
		}
919
	}
920

921
	/* Don't touch sysfs links during light-weight init */
922 923
	if (!frozen)
		ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
924 925

	return ret;
926 927
}
#endif
L
Linus Torvalds 已提交
928

929 930 931 932 933
static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
{
	struct cpufreq_policy *policy;
	unsigned long flags;

934
	read_lock_irqsave(&cpufreq_driver_lock, flags);
935 936 937

	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);

938
	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
939 940 941 942

	return policy;
}

943 944 945 946 947 948 949 950 951 952 953 954 955 956
static struct cpufreq_policy *cpufreq_policy_alloc(void)
{
	struct cpufreq_policy *policy;

	policy = kzalloc(sizeof(*policy), GFP_KERNEL);
	if (!policy)
		return NULL;

	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
		goto err_free_policy;

	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
		goto err_free_cpumask;

957
	INIT_LIST_HEAD(&policy->policy_list);
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
	return policy;

err_free_cpumask:
	free_cpumask_var(policy->cpus);
err_free_policy:
	kfree(policy);

	return NULL;
}

static void cpufreq_policy_free(struct cpufreq_policy *policy)
{
	free_cpumask_var(policy->related_cpus);
	free_cpumask_var(policy->cpus);
	kfree(policy);
}

975 976
static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
{
977
	if (WARN_ON(cpu == policy->cpu))
978 979
		return;

980 981 982 983 984 985 986 987 988
	/*
	 * Take direct locks as lock_policy_rwsem_write wouldn't work here.
	 * Also lock for last cpu is enough here as contention will happen only
	 * after policy->cpu is changed and after it is changed, other threads
	 * will try to acquire lock for new cpu. And policy is already updated
	 * by then.
	 */
	down_write(&per_cpu(cpu_policy_rwsem, policy->cpu));

989 990 991
	policy->last_cpu = policy->cpu;
	policy->cpu = cpu;

992 993
	up_write(&per_cpu(cpu_policy_rwsem, policy->last_cpu));

994 995 996 997 998
	cpufreq_frequency_table_update_policy_cpu(policy);
	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
			CPUFREQ_UPDATE_POLICY_CPU, policy);
}

999 1000
static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
			     bool frozen)
L
Linus Torvalds 已提交
1001
{
1002
	unsigned int j, cpu = dev->id;
1003
	int ret = -ENOMEM;
L
Linus Torvalds 已提交
1004 1005
	struct cpufreq_policy *policy;
	unsigned long flags;
1006
#ifdef CONFIG_HOTPLUG_CPU
1007
	struct cpufreq_policy *tpolicy;
1008
	struct cpufreq_governor *gov;
1009
#endif
L
Linus Torvalds 已提交
1010

1011 1012 1013
	if (cpu_is_offline(cpu))
		return 0;

1014
	pr_debug("adding CPU %u\n", cpu);
L
Linus Torvalds 已提交
1015 1016 1017 1018 1019 1020

#ifdef CONFIG_SMP
	/* check whether a different CPU already registered this
	 * CPU because it is in the same boat. */
	policy = cpufreq_cpu_get(cpu);
	if (unlikely(policy)) {
1021
		cpufreq_cpu_put(policy);
L
Linus Torvalds 已提交
1022 1023
		return 0;
	}
1024
#endif
1025

1026 1027 1028
	if (!down_read_trylock(&cpufreq_rwsem))
		return 0;

1029 1030
#ifdef CONFIG_HOTPLUG_CPU
	/* Check if this cpu was hot-unplugged earlier and has siblings */
1031
	read_lock_irqsave(&cpufreq_driver_lock, flags);
1032 1033
	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
1034
			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035
			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen);
1036 1037
			up_read(&cpufreq_rwsem);
			return ret;
V
Viresh Kumar 已提交
1038
		}
1039
	}
1040
	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
1041 1042
#endif

1043 1044 1045 1046 1047 1048
	if (frozen)
		/* Restore the saved policy when doing light-weight init */
		policy = cpufreq_policy_restore(cpu);
	else
		policy = cpufreq_policy_alloc();

1049
	if (!policy)
L
Linus Torvalds 已提交
1050
		goto nomem_out;
1051

1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063

	/*
	 * In the resume path, since we restore a saved policy, the assignment
	 * to policy->cpu is like an update of the existing policy, rather than
	 * the creation of a brand new one. So we need to perform this update
	 * by invoking update_policy_cpu().
	 */
	if (frozen && cpu != policy->cpu)
		update_policy_cpu(policy, cpu);
	else
		policy->cpu = cpu;

1064
	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
1065
	cpumask_copy(policy->cpus, cpumask_of(cpu));
L
Linus Torvalds 已提交
1066 1067

	init_completion(&policy->kobj_unregister);
1068
	INIT_WORK(&policy->update, handle_update);
L
Linus Torvalds 已提交
1069 1070 1071 1072

	/* call driver. From then on the cpufreq must be able
	 * to accept all calls to ->verify and ->setpolicy for this CPU
	 */
1073
	ret = cpufreq_driver->init(policy);
L
Linus Torvalds 已提交
1074
	if (ret) {
1075
		pr_debug("initialization failed\n");
V
Viresh Kumar 已提交
1076
		goto err_set_policy_cpu;
L
Linus Torvalds 已提交
1077
	}
V
Viresh Kumar 已提交
1078

1079 1080 1081 1082 1083 1084 1085 1086
	if (cpufreq_driver->get) {
		policy->cur = cpufreq_driver->get(policy->cpu);
		if (!policy->cur) {
			pr_err("%s: ->get() failed\n", __func__);
			goto err_get_freq;
		}
	}

1087 1088 1089
	/* related cpus should atleast have policy->cpus */
	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);

V
Viresh Kumar 已提交
1090 1091 1092 1093 1094 1095
	/*
	 * affected cpus must always be the one, which are online. We aren't
	 * managing offline cpus here.
	 */
	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);

1096 1097
	policy->user_policy.min = policy->min;
	policy->user_policy.max = policy->max;
L
Linus Torvalds 已提交
1098

1099 1100 1101
	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
				     CPUFREQ_START, policy);

1102 1103 1104 1105 1106 1107
#ifdef CONFIG_HOTPLUG_CPU
	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
	if (gov) {
		policy->governor = gov;
		pr_debug("Restoring governor %s for cpu %d\n",
		       policy->governor->name, cpu);
1108
	}
1109
#endif
L
Linus Torvalds 已提交
1110

1111
	write_lock_irqsave(&cpufreq_driver_lock, flags);
1112
	for_each_cpu(j, policy->cpus)
1113 1114 1115
		per_cpu(cpufreq_cpu_data, j) = policy;
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);

1116
	if (!frozen) {
1117
		ret = cpufreq_add_dev_interface(policy, dev);
1118 1119 1120
		if (ret)
			goto err_out_unregister;
	}
1121

1122 1123 1124 1125
	write_lock_irqsave(&cpufreq_driver_lock, flags);
	list_add(&policy->policy_list, &cpufreq_policy_list);
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);

1126 1127
	cpufreq_init_policy(policy);

1128
	kobject_uevent(&policy->kobj, KOBJ_ADD);
1129 1130
	up_read(&cpufreq_rwsem);

1131
	pr_debug("initialization complete\n");
1132

L
Linus Torvalds 已提交
1133 1134 1135
	return 0;

err_out_unregister:
1136
	write_lock_irqsave(&cpufreq_driver_lock, flags);
1137
	for_each_cpu(j, policy->cpus)
1138
		per_cpu(cpufreq_cpu_data, j) = NULL;
1139
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
1140

1141 1142 1143
err_get_freq:
	if (cpufreq_driver->exit)
		cpufreq_driver->exit(policy);
V
Viresh Kumar 已提交
1144
err_set_policy_cpu:
1145
	cpufreq_policy_free(policy);
L
Linus Torvalds 已提交
1146
nomem_out:
1147 1148
	up_read(&cpufreq_rwsem);

L
Linus Torvalds 已提交
1149 1150 1151
	return ret;
}

1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
/**
 * cpufreq_add_dev - add a CPU device
 *
 * Adds the cpufreq interface for a CPU device.
 *
 * The Oracle says: try running cpufreq registration/unregistration concurrently
 * with with cpu hotplugging and all hell will break loose. Tried to clean this
 * mess up, but more thorough testing is needed. - Mathieu
 */
static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
{
	return __cpufreq_add_dev(dev, sif, false);
}

1166
static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
1167
					   unsigned int old_cpu, bool frozen)
1168 1169 1170 1171 1172
{
	struct device *cpu_dev;
	int ret;

	/* first sibling now owns the new sysfs dir */
1173
	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
1174 1175 1176 1177 1178

	/* Don't touch sysfs files during light-weight tear-down */
	if (frozen)
		return cpu_dev->id;

1179
	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
1180
	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
1181 1182 1183
	if (ret) {
		pr_err("%s: Failed to move kobj: %d", __func__, ret);

1184
		lock_policy_rwsem_write(old_cpu);
1185
		cpumask_set_cpu(old_cpu, policy->cpus);
1186 1187
		unlock_policy_rwsem_write(old_cpu);

1188
		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
1189 1190 1191 1192 1193 1194 1195 1196
					"cpufreq");

		return -EINVAL;
	}

	return cpu_dev->id;
}

1197 1198 1199
static int __cpufreq_remove_dev_prepare(struct device *dev,
					struct subsys_interface *sif,
					bool frozen)
L
Linus Torvalds 已提交
1200
{
1201
	unsigned int cpu = dev->id, cpus;
1202
	int new_cpu, ret;
L
Linus Torvalds 已提交
1203
	unsigned long flags;
1204
	struct cpufreq_policy *policy;
L
Linus Torvalds 已提交
1205

1206
	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
L
Linus Torvalds 已提交
1207

1208
	write_lock_irqsave(&cpufreq_driver_lock, flags);
V
Viresh Kumar 已提交
1209

1210
	policy = per_cpu(cpufreq_cpu_data, cpu);
V
Viresh Kumar 已提交
1211

1212 1213
	/* Save the policy somewhere when doing a light-weight tear-down */
	if (frozen)
1214
		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
1215

1216
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
1217

1218
	if (!policy) {
1219
		pr_debug("%s: No cpu_data found\n", __func__);
L
Linus Torvalds 已提交
1220 1221 1222
		return -EINVAL;
	}

1223
	if (has_target()) {
1224 1225 1226 1227 1228 1229
		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
		if (ret) {
			pr_err("%s: Failed to stop governor\n", __func__);
			return ret;
		}
	}
L
Linus Torvalds 已提交
1230

1231
#ifdef CONFIG_HOTPLUG_CPU
1232
	if (!cpufreq_driver->setpolicy)
1233
		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
1234
			policy->governor->name, CPUFREQ_NAME_LEN);
L
Linus Torvalds 已提交
1235 1236
#endif

1237
	lock_policy_rwsem_read(cpu);
1238
	cpus = cpumask_weight(policy->cpus);
1239
	unlock_policy_rwsem_read(cpu);
1240

1241 1242 1243
	if (cpu != policy->cpu) {
		if (!frozen)
			sysfs_remove_link(&dev->kobj, "cpufreq");
1244
	} else if (cpus > 1) {
1245
		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
1246
		if (new_cpu >= 0) {
1247
			update_policy_cpu(policy, new_cpu);
1248 1249

			if (!frozen) {
1250 1251
				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
						__func__, new_cpu, cpu);
1252
			}
L
Linus Torvalds 已提交
1253 1254 1255
		}
	}

1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
	return 0;
}

static int __cpufreq_remove_dev_finish(struct device *dev,
				       struct subsys_interface *sif,
				       bool frozen)
{
	unsigned int cpu = dev->id, cpus;
	int ret;
	unsigned long flags;
	struct cpufreq_policy *policy;
	struct kobject *kobj;
	struct completion *cmp;

	read_lock_irqsave(&cpufreq_driver_lock, flags);
	policy = per_cpu(cpufreq_cpu_data, cpu);
	read_unlock_irqrestore(&cpufreq_driver_lock, flags);

	if (!policy) {
		pr_debug("%s: No cpu_data found\n", __func__);
		return -EINVAL;
	}

1279
	lock_policy_rwsem_write(cpu);
1280
	cpus = cpumask_weight(policy->cpus);
1281 1282 1283 1284

	if (cpus > 1)
		cpumask_clear_cpu(cpu, policy->cpus);
	unlock_policy_rwsem_write(cpu);
1285

1286 1287
	/* If cpu is last user of policy, free policy */
	if (cpus == 1) {
1288
		if (has_target()) {
1289 1290 1291 1292 1293 1294 1295
			ret = __cpufreq_governor(policy,
					CPUFREQ_GOV_POLICY_EXIT);
			if (ret) {
				pr_err("%s: Failed to exit governor\n",
						__func__);
				return ret;
			}
1296
		}
1297

1298 1299
		if (!frozen) {
			lock_policy_rwsem_read(cpu);
1300 1301
			kobj = &policy->kobj;
			cmp = &policy->kobj_unregister;
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
			unlock_policy_rwsem_read(cpu);
			kobject_put(kobj);

			/*
			 * We need to make sure that the underlying kobj is
			 * actually not referenced anymore by anybody before we
			 * proceed with unloading.
			 */
			pr_debug("waiting for dropping of refcount\n");
			wait_for_completion(cmp);
			pr_debug("wait complete\n");
		}
1314

1315 1316 1317 1318
		/*
		 * Perform the ->exit() even during light-weight tear-down,
		 * since this is a core component, and is essential for the
		 * subsequent light-weight ->init() to succeed.
1319
		 */
1320
		if (cpufreq_driver->exit)
1321
			cpufreq_driver->exit(policy);
1322

1323 1324 1325 1326 1327
		/* Remove policy from list of active policies */
		write_lock_irqsave(&cpufreq_driver_lock, flags);
		list_del(&policy->policy_list);
		write_unlock_irqrestore(&cpufreq_driver_lock, flags);

1328
		if (!frozen)
1329
			cpufreq_policy_free(policy);
1330
	} else {
1331
		if (has_target()) {
1332 1333 1334 1335 1336 1337
			if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
					(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
				pr_err("%s: Failed to start governor\n",
						__func__);
				return ret;
			}
1338
		}
1339
	}
L
Linus Torvalds 已提交
1340

1341
	per_cpu(cpufreq_cpu_data, cpu) = NULL;
L
Linus Torvalds 已提交
1342 1343 1344
	return 0;
}

1345
/**
1346
 * cpufreq_remove_dev - remove a CPU device
1347 1348 1349
 *
 * Removes the cpufreq interface for a CPU device.
 */
1350
static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1351
{
1352
	unsigned int cpu = dev->id;
1353
	int ret;
1354 1355 1356 1357

	if (cpu_is_offline(cpu))
		return 0;

1358 1359 1360 1361 1362 1363
	ret = __cpufreq_remove_dev_prepare(dev, sif, false);

	if (!ret)
		ret = __cpufreq_remove_dev_finish(dev, sif, false);

	return ret;
1364 1365
}

1366
static void handle_update(struct work_struct *work)
L
Linus Torvalds 已提交
1367
{
1368 1369 1370
	struct cpufreq_policy *policy =
		container_of(work, struct cpufreq_policy, update);
	unsigned int cpu = policy->cpu;
1371
	pr_debug("handle_update for cpu %u called\n", cpu);
L
Linus Torvalds 已提交
1372 1373 1374 1375
	cpufreq_update_policy(cpu);
}

/**
1376 1377
 *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
 *	in deep trouble.
L
Linus Torvalds 已提交
1378 1379 1380 1381
 *	@cpu: cpu number
 *	@old_freq: CPU frequency the kernel thinks the CPU runs at
 *	@new_freq: CPU frequency the CPU actually runs at
 *
1382 1383
 *	We adjust to current frequency first, and need to clean up later.
 *	So either call to cpufreq_update_policy() or schedule handle_update()).
L
Linus Torvalds 已提交
1384
 */
1385 1386
static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
				unsigned int new_freq)
L
Linus Torvalds 已提交
1387
{
1388
	struct cpufreq_policy *policy;
L
Linus Torvalds 已提交
1389
	struct cpufreq_freqs freqs;
1390 1391
	unsigned long flags;

1392
	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
L
Linus Torvalds 已提交
1393 1394 1395 1396
	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);

	freqs.old = old_freq;
	freqs.new = new_freq;
1397 1398 1399 1400 1401 1402 1403

	read_lock_irqsave(&cpufreq_driver_lock, flags);
	policy = per_cpu(cpufreq_cpu_data, cpu);
	read_unlock_irqrestore(&cpufreq_driver_lock, flags);

	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
L
Linus Torvalds 已提交
1404 1405
}

1406
/**
D
Dhaval Giani 已提交
1407
 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1408 1409 1410 1411 1412 1413 1414
 * @cpu: CPU number
 *
 * This is the last known freq, without actually getting it from the driver.
 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
 */
unsigned int cpufreq_quick_get(unsigned int cpu)
{
1415
	struct cpufreq_policy *policy;
1416
	unsigned int ret_freq = 0;
1417

1418 1419
	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
		return cpufreq_driver->get(cpu);
1420 1421

	policy = cpufreq_cpu_get(cpu);
1422
	if (policy) {
1423
		ret_freq = policy->cur;
1424 1425 1426
		cpufreq_cpu_put(policy);
	}

D
Dave Jones 已提交
1427
	return ret_freq;
1428 1429 1430
}
EXPORT_SYMBOL(cpufreq_quick_get);

1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
/**
 * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
 * @cpu: CPU number
 *
 * Just return the max possible frequency for a given CPU.
 */
unsigned int cpufreq_quick_get_max(unsigned int cpu)
{
	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
	unsigned int ret_freq = 0;

	if (policy) {
		ret_freq = policy->max;
		cpufreq_cpu_put(policy);
	}

	return ret_freq;
}
EXPORT_SYMBOL(cpufreq_quick_get_max);

1451
static unsigned int __cpufreq_get(unsigned int cpu)
L
Linus Torvalds 已提交
1452
{
1453
	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1454
	unsigned int ret_freq = 0;
1455

1456
	if (!cpufreq_driver->get)
D
Dave Jones 已提交
1457
		return ret_freq;
L
Linus Torvalds 已提交
1458

1459
	ret_freq = cpufreq_driver->get(cpu);
L
Linus Torvalds 已提交
1460

1461
	if (ret_freq && policy->cur &&
1462
		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1463 1464 1465 1466
		/* verify no discrepancy between actual and
					saved value exists */
		if (unlikely(ret_freq != policy->cur)) {
			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
L
Linus Torvalds 已提交
1467 1468 1469 1470
			schedule_work(&policy->update);
		}
	}

D
Dave Jones 已提交
1471
	return ret_freq;
1472
}
L
Linus Torvalds 已提交
1473

1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
/**
 * cpufreq_get - get the current CPU frequency (in kHz)
 * @cpu: CPU number
 *
 * Get the CPU current (static) CPU frequency
 */
unsigned int cpufreq_get(unsigned int cpu)
{
	unsigned int ret_freq = 0;

1484 1485 1486
	if (cpufreq_disabled() || !cpufreq_driver)
		return -ENOENT;

1487 1488
	if (!down_read_trylock(&cpufreq_rwsem))
		return 0;
1489

1490
	lock_policy_rwsem_read(cpu);
1491 1492 1493 1494

	ret_freq = __cpufreq_get(cpu);

	unlock_policy_rwsem_read(cpu);
1495 1496
	up_read(&cpufreq_rwsem);

D
Dave Jones 已提交
1497
	return ret_freq;
L
Linus Torvalds 已提交
1498 1499 1500
}
EXPORT_SYMBOL(cpufreq_get);

1501 1502 1503 1504 1505
static struct subsys_interface cpufreq_interface = {
	.name		= "cpufreq",
	.subsys		= &cpu_subsys,
	.add_dev	= cpufreq_add_dev,
	.remove_dev	= cpufreq_remove_dev,
1506 1507
};

1508
/**
1509 1510 1511 1512
 * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
 *
 * This function is only executed for the boot processor.  The other CPUs
 * have been put offline by means of CPU hotplug.
1513
 */
1514
static int cpufreq_bp_suspend(void)
1515
{
1516
	int ret = 0;
1517

1518
	int cpu = smp_processor_id();
1519
	struct cpufreq_policy *policy;
1520

1521
	pr_debug("suspending cpu %u\n", cpu);
1522

1523
	/* If there's no policy for the boot CPU, we have nothing to do. */
1524 1525
	policy = cpufreq_cpu_get(cpu);
	if (!policy)
1526
		return 0;
1527

1528
	if (cpufreq_driver->suspend) {
1529
		ret = cpufreq_driver->suspend(policy);
1530
		if (ret)
1531
			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1532
					"step on CPU %u\n", policy->cpu);
1533 1534
	}

1535
	cpufreq_cpu_put(policy);
1536
	return ret;
1537 1538
}

L
Linus Torvalds 已提交
1539
/**
1540
 * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
L
Linus Torvalds 已提交
1541 1542
 *
 *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1543 1544 1545 1546 1547
 *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
 *	    restored. It will verify that the current freq is in sync with
 *	    what we believe it to be. This is a bit later than when it
 *	    should be, but nonethteless it's better than calling
 *	    cpufreq_driver->get() here which might re-enable interrupts...
1548 1549 1550
 *
 * This function is only executed for the boot CPU.  The other CPUs have not
 * been turned on yet.
L
Linus Torvalds 已提交
1551
 */
1552
static void cpufreq_bp_resume(void)
L
Linus Torvalds 已提交
1553
{
1554
	int ret = 0;
1555

1556
	int cpu = smp_processor_id();
1557
	struct cpufreq_policy *policy;
L
Linus Torvalds 已提交
1558

1559
	pr_debug("resuming cpu %u\n", cpu);
L
Linus Torvalds 已提交
1560

1561
	/* If there's no policy for the boot CPU, we have nothing to do. */
1562 1563
	policy = cpufreq_cpu_get(cpu);
	if (!policy)
1564
		return;
L
Linus Torvalds 已提交
1565

1566
	if (cpufreq_driver->resume) {
1567
		ret = cpufreq_driver->resume(policy);
L
Linus Torvalds 已提交
1568 1569
		if (ret) {
			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1570
					"step on CPU %u\n", policy->cpu);
1571
			goto fail;
L
Linus Torvalds 已提交
1572 1573 1574
		}
	}

1575
	schedule_work(&policy->update);
1576

1577
fail:
1578
	cpufreq_cpu_put(policy);
L
Linus Torvalds 已提交
1579 1580
}

1581 1582 1583
static struct syscore_ops cpufreq_syscore_ops = {
	.suspend	= cpufreq_bp_suspend,
	.resume		= cpufreq_bp_resume,
L
Linus Torvalds 已提交
1584 1585
};

1586 1587 1588 1589 1590 1591 1592 1593
/**
 *	cpufreq_get_current_driver - return current driver's name
 *
 *	Return the name string of the currently loaded cpufreq driver
 *	or NULL, if none.
 */
const char *cpufreq_get_current_driver(void)
{
1594 1595 1596 1597
	if (cpufreq_driver)
		return cpufreq_driver->name;

	return NULL;
1598 1599
}
EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
L
Linus Torvalds 已提交
1600 1601 1602 1603 1604 1605 1606 1607 1608 1609

/*********************************************************************
 *                     NOTIFIER LISTS INTERFACE                      *
 *********************************************************************/

/**
 *	cpufreq_register_notifier - register a driver with cpufreq
 *	@nb: notifier function to register
 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
 *
1610
 *	Add a driver to one of two lists: either a list of drivers that
L
Linus Torvalds 已提交
1611 1612 1613 1614 1615
 *      are notified about clock rate changes (once before and once after
 *      the transition), or a list of drivers that are notified about
 *      changes in cpufreq policy.
 *
 *	This function may sleep, and has the same return conditions as
1616
 *	blocking_notifier_chain_register.
L
Linus Torvalds 已提交
1617 1618 1619 1620 1621
 */
int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
{
	int ret;

1622 1623 1624
	if (cpufreq_disabled())
		return -EINVAL;

1625 1626
	WARN_ON(!init_cpufreq_transition_notifier_list_called);

L
Linus Torvalds 已提交
1627 1628
	switch (list) {
	case CPUFREQ_TRANSITION_NOTIFIER:
1629
		ret = srcu_notifier_chain_register(
1630
				&cpufreq_transition_notifier_list, nb);
L
Linus Torvalds 已提交
1631 1632
		break;
	case CPUFREQ_POLICY_NOTIFIER:
1633 1634
		ret = blocking_notifier_chain_register(
				&cpufreq_policy_notifier_list, nb);
L
Linus Torvalds 已提交
1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
		break;
	default:
		ret = -EINVAL;
	}

	return ret;
}
EXPORT_SYMBOL(cpufreq_register_notifier);

/**
 *	cpufreq_unregister_notifier - unregister a driver with cpufreq
 *	@nb: notifier block to be unregistered
1647
 *	@list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
L
Linus Torvalds 已提交
1648 1649 1650 1651
 *
 *	Remove a driver from the CPU frequency notifier list.
 *
 *	This function may sleep, and has the same return conditions as
1652
 *	blocking_notifier_chain_unregister.
L
Linus Torvalds 已提交
1653 1654 1655 1656 1657
 */
int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
{
	int ret;

1658 1659 1660
	if (cpufreq_disabled())
		return -EINVAL;

L
Linus Torvalds 已提交
1661 1662
	switch (list) {
	case CPUFREQ_TRANSITION_NOTIFIER:
1663
		ret = srcu_notifier_chain_unregister(
1664
				&cpufreq_transition_notifier_list, nb);
L
Linus Torvalds 已提交
1665 1666
		break;
	case CPUFREQ_POLICY_NOTIFIER:
1667 1668
		ret = blocking_notifier_chain_unregister(
				&cpufreq_policy_notifier_list, nb);
L
Linus Torvalds 已提交
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
		break;
	default:
		ret = -EINVAL;
	}

	return ret;
}
EXPORT_SYMBOL(cpufreq_unregister_notifier);


/*********************************************************************
 *                              GOVERNORS                            *
 *********************************************************************/

int __cpufreq_driver_target(struct cpufreq_policy *policy,
			    unsigned int target_freq,
			    unsigned int relation)
{
	int retval = -EINVAL;
1688
	unsigned int old_target_freq = target_freq;
1689

1690 1691 1692
	if (cpufreq_disabled())
		return -ENODEV;

1693 1694 1695 1696 1697 1698 1699 1700
	/* Make sure that target_freq is within supported range */
	if (target_freq > policy->max)
		target_freq = policy->max;
	if (target_freq < policy->min)
		target_freq = policy->min;

	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
			policy->cpu, target_freq, relation, old_target_freq);
1701

1702 1703 1704 1705 1706 1707
	/*
	 * This might look like a redundant call as we are checking it again
	 * after finding index. But it is left intentionally for cases where
	 * exactly same freq is called again and so we can save on few function
	 * calls.
	 */
1708 1709 1710
	if (target_freq == policy->cur)
		return 0;

1711 1712
	if (cpufreq_driver->target)
		retval = cpufreq_driver->target(policy, target_freq, relation);
1713 1714 1715
	else if (cpufreq_driver->target_index) {
		struct cpufreq_frequency_table *freq_table;
		int index;
1716

1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
		freq_table = cpufreq_frequency_get_table(policy->cpu);
		if (unlikely(!freq_table)) {
			pr_err("%s: Unable to find freq_table\n", __func__);
			goto out;
		}

		retval = cpufreq_frequency_table_target(policy, freq_table,
				target_freq, relation, &index);
		if (unlikely(retval)) {
			pr_err("%s: Unable to find matching freq\n", __func__);
			goto out;
		}

		if (freq_table[index].frequency == policy->cur)
			retval = 0;
		else
			retval = cpufreq_driver->target_index(policy, index);
	}

out:
L
Linus Torvalds 已提交
1737 1738 1739 1740 1741 1742 1743 1744
	return retval;
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_target);

int cpufreq_driver_target(struct cpufreq_policy *policy,
			  unsigned int target_freq,
			  unsigned int relation)
{
1745
	int ret = -EINVAL;
L
Linus Torvalds 已提交
1746

1747
	lock_policy_rwsem_write(policy->cpu);
L
Linus Torvalds 已提交
1748 1749 1750

	ret = __cpufreq_driver_target(policy, target_freq, relation);

1751
	unlock_policy_rwsem_write(policy->cpu);
L
Linus Torvalds 已提交
1752 1753 1754 1755 1756

	return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);

1757 1758 1759
/*
 * when "event" is CPUFREQ_GOV_LIMITS
 */
L
Linus Torvalds 已提交
1760

1761 1762
static int __cpufreq_governor(struct cpufreq_policy *policy,
					unsigned int event)
L
Linus Torvalds 已提交
1763
{
1764
	int ret;
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774

	/* Only must be defined when default governor is known to have latency
	   restrictions, like e.g. conservative or ondemand.
	   That this is the case is already ensured in Kconfig
	*/
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
	struct cpufreq_governor *gov = &cpufreq_gov_performance;
#else
	struct cpufreq_governor *gov = NULL;
#endif
1775 1776 1777 1778

	if (policy->governor->max_transition_latency &&
	    policy->cpuinfo.transition_latency >
	    policy->governor->max_transition_latency) {
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788
		if (!gov)
			return -EINVAL;
		else {
			printk(KERN_WARNING "%s governor failed, too long"
			       " transition latency of HW, fallback"
			       " to %s governor\n",
			       policy->governor->name,
			       gov->name);
			policy->governor = gov;
		}
1789
	}
L
Linus Torvalds 已提交
1790

1791 1792 1793
	if (event == CPUFREQ_GOV_POLICY_INIT)
		if (!try_module_get(policy->governor->owner))
			return -EINVAL;
L
Linus Torvalds 已提交
1794

1795
	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1796
						policy->cpu, event);
1797 1798

	mutex_lock(&cpufreq_governor_lock);
1799
	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
1800 1801
	    || (!policy->governor_enabled
	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
		mutex_unlock(&cpufreq_governor_lock);
		return -EBUSY;
	}

	if (event == CPUFREQ_GOV_STOP)
		policy->governor_enabled = false;
	else if (event == CPUFREQ_GOV_START)
		policy->governor_enabled = true;

	mutex_unlock(&cpufreq_governor_lock);

L
Linus Torvalds 已提交
1813 1814
	ret = policy->governor->governor(policy, event);

1815 1816 1817 1818 1819
	if (!ret) {
		if (event == CPUFREQ_GOV_POLICY_INIT)
			policy->governor->initialized++;
		else if (event == CPUFREQ_GOV_POLICY_EXIT)
			policy->governor->initialized--;
1820 1821 1822 1823 1824 1825 1826 1827
	} else {
		/* Restore original values */
		mutex_lock(&cpufreq_governor_lock);
		if (event == CPUFREQ_GOV_STOP)
			policy->governor_enabled = true;
		else if (event == CPUFREQ_GOV_START)
			policy->governor_enabled = false;
		mutex_unlock(&cpufreq_governor_lock);
1828
	}
1829

1830 1831
	if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
			((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
L
Linus Torvalds 已提交
1832 1833 1834 1835 1836 1837 1838
		module_put(policy->governor->owner);

	return ret;
}

int cpufreq_register_governor(struct cpufreq_governor *governor)
{
1839
	int err;
L
Linus Torvalds 已提交
1840 1841 1842 1843

	if (!governor)
		return -EINVAL;

1844 1845 1846
	if (cpufreq_disabled())
		return -ENODEV;

1847
	mutex_lock(&cpufreq_governor_mutex);
1848

1849
	governor->initialized = 0;
1850 1851 1852 1853
	err = -EBUSY;
	if (__find_governor(governor->name) == NULL) {
		err = 0;
		list_add(&governor->governor_list, &cpufreq_governor_list);
L
Linus Torvalds 已提交
1854 1855
	}

1856
	mutex_unlock(&cpufreq_governor_mutex);
1857
	return err;
L
Linus Torvalds 已提交
1858 1859 1860 1861 1862
}
EXPORT_SYMBOL_GPL(cpufreq_register_governor);

void cpufreq_unregister_governor(struct cpufreq_governor *governor)
{
1863 1864 1865 1866
#ifdef CONFIG_HOTPLUG_CPU
	int cpu;
#endif

L
Linus Torvalds 已提交
1867 1868 1869
	if (!governor)
		return;

1870 1871 1872
	if (cpufreq_disabled())
		return;

1873 1874 1875 1876 1877 1878 1879 1880 1881
#ifdef CONFIG_HOTPLUG_CPU
	for_each_present_cpu(cpu) {
		if (cpu_online(cpu))
			continue;
		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
	}
#endif

1882
	mutex_lock(&cpufreq_governor_mutex);
L
Linus Torvalds 已提交
1883
	list_del(&governor->governor_list);
1884
	mutex_unlock(&cpufreq_governor_mutex);
L
Linus Torvalds 已提交
1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
	return;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);


/*********************************************************************
 *                          POLICY INTERFACE                         *
 *********************************************************************/

/**
 * cpufreq_get_policy - get the current cpufreq_policy
1896 1897
 * @policy: struct cpufreq_policy into which the current cpufreq_policy
 *	is written
L
Linus Torvalds 已提交
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
 *
 * Reads the current cpufreq policy.
 */
int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
{
	struct cpufreq_policy *cpu_policy;
	if (!policy)
		return -EINVAL;

	cpu_policy = cpufreq_cpu_get(cpu);
	if (!cpu_policy)
		return -EINVAL;

1911
	memcpy(policy, cpu_policy, sizeof(*policy));
L
Linus Torvalds 已提交
1912 1913 1914 1915 1916 1917

	cpufreq_cpu_put(cpu_policy);
	return 0;
}
EXPORT_SYMBOL(cpufreq_get_policy);

1918
/*
1919 1920
 * policy : current policy.
 * new_policy: policy to be set.
1921
 */
1922
static int cpufreq_set_policy(struct cpufreq_policy *policy,
1923
				struct cpufreq_policy *new_policy)
L
Linus Torvalds 已提交
1924
{
1925
	int ret = 0, failed = 1;
L
Linus Torvalds 已提交
1926

1927 1928
	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", new_policy->cpu,
		new_policy->min, new_policy->max);
L
Linus Torvalds 已提交
1929

1930
	memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
L
Linus Torvalds 已提交
1931

1932
	if (new_policy->min > policy->max || new_policy->max < policy->min) {
1933 1934 1935 1936
		ret = -EINVAL;
		goto error_out;
	}

L
Linus Torvalds 已提交
1937
	/* verify the cpu speed can be set within this limit */
1938
	ret = cpufreq_driver->verify(new_policy);
L
Linus Torvalds 已提交
1939 1940 1941 1942
	if (ret)
		goto error_out;

	/* adjust if necessary - all reasons */
1943
	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1944
			CPUFREQ_ADJUST, new_policy);
L
Linus Torvalds 已提交
1945 1946

	/* adjust if necessary - hardware incompatibility*/
1947
	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1948
			CPUFREQ_INCOMPATIBLE, new_policy);
L
Linus Torvalds 已提交
1949

1950 1951 1952 1953
	/*
	 * verify the cpu speed can be set within this limit, which might be
	 * different to the first one
	 */
1954
	ret = cpufreq_driver->verify(new_policy);
1955
	if (ret)
L
Linus Torvalds 已提交
1956 1957 1958
		goto error_out;

	/* notification of the new policy */
1959
	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1960
			CPUFREQ_NOTIFY, new_policy);
L
Linus Torvalds 已提交
1961

1962 1963
	policy->min = new_policy->min;
	policy->max = new_policy->max;
L
Linus Torvalds 已提交
1964

1965
	pr_debug("new min and max freqs are %u - %u kHz\n",
1966
					policy->min, policy->max);
L
Linus Torvalds 已提交
1967

1968
	if (cpufreq_driver->setpolicy) {
1969
		policy->policy = new_policy->policy;
1970
		pr_debug("setting range\n");
1971
		ret = cpufreq_driver->setpolicy(new_policy);
L
Linus Torvalds 已提交
1972
	} else {
1973
		if (new_policy->governor != policy->governor) {
L
Linus Torvalds 已提交
1974
			/* save old, working values */
1975
			struct cpufreq_governor *old_gov = policy->governor;
L
Linus Torvalds 已提交
1976

1977
			pr_debug("governor switch\n");
L
Linus Torvalds 已提交
1978 1979

			/* end old governor */
1980 1981 1982 1983
			if (policy->governor) {
				__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
				unlock_policy_rwsem_write(new_policy->cpu);
				__cpufreq_governor(policy,
1984
						CPUFREQ_GOV_POLICY_EXIT);
1985
				lock_policy_rwsem_write(new_policy->cpu);
1986
			}
L
Linus Torvalds 已提交
1987 1988

			/* start new governor */
1989 1990 1991
			policy->governor = new_policy->governor;
			if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) {
				if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) {
1992
					failed = 0;
1993
				} else {
1994 1995
					unlock_policy_rwsem_write(new_policy->cpu);
					__cpufreq_governor(policy,
1996
							CPUFREQ_GOV_POLICY_EXIT);
1997
					lock_policy_rwsem_write(new_policy->cpu);
1998
				}
1999 2000 2001
			}

			if (failed) {
L
Linus Torvalds 已提交
2002
				/* new governor failed, so re-start old one */
2003
				pr_debug("starting governor %s failed\n",
2004
							policy->governor->name);
L
Linus Torvalds 已提交
2005
				if (old_gov) {
2006 2007
					policy->governor = old_gov;
					__cpufreq_governor(policy,
2008
							CPUFREQ_GOV_POLICY_INIT);
2009
					__cpufreq_governor(policy,
2010
							   CPUFREQ_GOV_START);
L
Linus Torvalds 已提交
2011 2012 2013 2014 2015 2016
				}
				ret = -EINVAL;
				goto error_out;
			}
			/* might be a policy change, too, so fall through */
		}
2017
		pr_debug("governor: change or update limits\n");
2018
		ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
L
Linus Torvalds 已提交
2019 2020
	}

2021
error_out:
L
Linus Torvalds 已提交
2022 2023 2024 2025 2026 2027 2028
	return ret;
}

/**
 *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
 *	@cpu: CPU which shall be re-evaluated
 *
L
Lucas De Marchi 已提交
2029
 *	Useful for policy notifiers which have different necessities
L
Linus Torvalds 已提交
2030 2031 2032 2033
 *	at different times.
 */
int cpufreq_update_policy(unsigned int cpu)
{
2034 2035
	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
	struct cpufreq_policy new_policy;
2036
	int ret;
L
Linus Torvalds 已提交
2037

2038
	if (!policy) {
2039 2040 2041
		ret = -ENODEV;
		goto no_policy;
	}
L
Linus Torvalds 已提交
2042

2043
	lock_policy_rwsem_write(cpu);
L
Linus Torvalds 已提交
2044

2045
	pr_debug("updating policy for CPU %u\n", cpu);
2046
	memcpy(&new_policy, policy, sizeof(*policy));
2047 2048 2049 2050
	new_policy.min = policy->user_policy.min;
	new_policy.max = policy->user_policy.max;
	new_policy.policy = policy->user_policy.policy;
	new_policy.governor = policy->user_policy.governor;
L
Linus Torvalds 已提交
2051

2052 2053 2054 2055
	/*
	 * BIOS might change freq behind our back
	 * -> ask driver for current freq and notify governors about a change
	 */
2056
	if (cpufreq_driver->get) {
2057 2058
		new_policy.cur = cpufreq_driver->get(cpu);
		if (!policy->cur) {
2059
			pr_debug("Driver did not initialize current freq");
2060
			policy->cur = new_policy.cur;
2061
		} else {
2062
			if (policy->cur != new_policy.cur && has_target())
2063 2064
				cpufreq_out_of_sync(cpu, policy->cur,
								new_policy.cur);
2065
		}
2066 2067
	}

2068
	ret = cpufreq_set_policy(policy, &new_policy);
L
Linus Torvalds 已提交
2069

2070 2071
	unlock_policy_rwsem_write(cpu);

2072
	cpufreq_cpu_put(policy);
2073
no_policy:
L
Linus Torvalds 已提交
2074 2075 2076 2077
	return ret;
}
EXPORT_SYMBOL(cpufreq_update_policy);

2078
static int cpufreq_cpu_callback(struct notifier_block *nfb,
2079 2080 2081
					unsigned long action, void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
2082
	struct device *dev;
2083
	bool frozen = false;
2084

2085 2086
	dev = get_cpu_device(cpu);
	if (dev) {
2087 2088 2089 2090 2091

		if (action & CPU_TASKS_FROZEN)
			frozen = true;

		switch (action & ~CPU_TASKS_FROZEN) {
2092
		case CPU_ONLINE:
2093
			__cpufreq_add_dev(dev, NULL, frozen);
2094
			cpufreq_update_policy(cpu);
2095
			break;
2096

2097
		case CPU_DOWN_PREPARE:
2098
			__cpufreq_remove_dev_prepare(dev, NULL, frozen);
2099 2100 2101
			break;

		case CPU_POST_DEAD:
2102
			__cpufreq_remove_dev_finish(dev, NULL, frozen);
2103
			break;
2104

2105
		case CPU_DOWN_FAILED:
2106
			__cpufreq_add_dev(dev, NULL, frozen);
2107 2108 2109 2110 2111 2112
			break;
		}
	}
	return NOTIFY_OK;
}

2113
static struct notifier_block __refdata cpufreq_cpu_notifier = {
2114
	.notifier_call = cpufreq_cpu_callback,
2115
};
L
Linus Torvalds 已提交
2116 2117 2118 2119 2120 2121 2122 2123 2124 2125

/*********************************************************************
 *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
 *********************************************************************/

/**
 * cpufreq_register_driver - register a CPU Frequency driver
 * @driver_data: A struct cpufreq_driver containing the values#
 * submitted by the CPU Frequency driver.
 *
2126
 * Registers a CPU Frequency driver to this core code. This code
L
Linus Torvalds 已提交
2127
 * returns zero on success, -EBUSY when another driver got here first
2128
 * (and isn't unregistered in the meantime).
L
Linus Torvalds 已提交
2129 2130
 *
 */
2131
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
L
Linus Torvalds 已提交
2132 2133 2134 2135
{
	unsigned long flags;
	int ret;

2136 2137 2138
	if (cpufreq_disabled())
		return -ENODEV;

L
Linus Torvalds 已提交
2139
	if (!driver_data || !driver_data->verify || !driver_data->init ||
2140 2141
	    !(driver_data->setpolicy || driver_data->target_index ||
		    driver_data->target))
L
Linus Torvalds 已提交
2142 2143
		return -EINVAL;

2144
	pr_debug("trying to register driver %s\n", driver_data->name);
L
Linus Torvalds 已提交
2145 2146 2147 2148

	if (driver_data->setpolicy)
		driver_data->flags |= CPUFREQ_CONST_LOOPS;

2149
	write_lock_irqsave(&cpufreq_driver_lock, flags);
2150
	if (cpufreq_driver) {
2151
		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2152
		return -EEXIST;
L
Linus Torvalds 已提交
2153
	}
2154
	cpufreq_driver = driver_data;
2155
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
L
Linus Torvalds 已提交
2156

2157
	ret = subsys_interface_register(&cpufreq_interface);
2158 2159
	if (ret)
		goto err_null_driver;
L
Linus Torvalds 已提交
2160

2161
	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
L
Linus Torvalds 已提交
2162 2163 2164 2165
		int i;
		ret = -ENODEV;

		/* check for at least one working CPU */
2166 2167
		for (i = 0; i < nr_cpu_ids; i++)
			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
L
Linus Torvalds 已提交
2168
				ret = 0;
2169 2170
				break;
			}
L
Linus Torvalds 已提交
2171 2172 2173

		/* if all ->init() calls failed, unregister */
		if (ret) {
2174
			pr_debug("no CPU initialized for driver %s\n",
2175
							driver_data->name);
2176
			goto err_if_unreg;
L
Linus Torvalds 已提交
2177 2178 2179
		}
	}

2180
	register_hotcpu_notifier(&cpufreq_cpu_notifier);
2181
	pr_debug("driver %s up and running\n", driver_data->name);
L
Linus Torvalds 已提交
2182

2183
	return 0;
2184 2185
err_if_unreg:
	subsys_interface_unregister(&cpufreq_interface);
2186
err_null_driver:
2187
	write_lock_irqsave(&cpufreq_driver_lock, flags);
2188
	cpufreq_driver = NULL;
2189
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
D
Dave Jones 已提交
2190
	return ret;
L
Linus Torvalds 已提交
2191 2192 2193 2194 2195 2196
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);

/**
 * cpufreq_unregister_driver - unregister the current CPUFreq driver
 *
2197
 * Unregister the current CPUFreq driver. Only call this if you have
L
Linus Torvalds 已提交
2198 2199 2200 2201
 * the right to do so, i.e. if you have succeeded in initialising before!
 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
 * currently not initialised.
 */
2202
int cpufreq_unregister_driver(struct cpufreq_driver *driver)
L
Linus Torvalds 已提交
2203 2204 2205
{
	unsigned long flags;

2206
	if (!cpufreq_driver || (driver != cpufreq_driver))
L
Linus Torvalds 已提交
2207 2208
		return -EINVAL;

2209
	pr_debug("unregistering driver %s\n", driver->name);
L
Linus Torvalds 已提交
2210

2211
	subsys_interface_unregister(&cpufreq_interface);
2212
	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
L
Linus Torvalds 已提交
2213

2214
	down_write(&cpufreq_rwsem);
2215
	write_lock_irqsave(&cpufreq_driver_lock, flags);
2216

2217
	cpufreq_driver = NULL;
2218

2219
	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2220
	up_write(&cpufreq_rwsem);
L
Linus Torvalds 已提交
2221 2222 2223 2224

	return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
2225 2226 2227 2228 2229

static int __init cpufreq_core_init(void)
{
	int cpu;

2230 2231 2232
	if (cpufreq_disabled())
		return -ENODEV;

2233
	for_each_possible_cpu(cpu)
2234
		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
2235

2236
	cpufreq_global_kobject = kobject_create();
2237
	BUG_ON(!cpufreq_global_kobject);
2238
	register_syscore_ops(&cpufreq_syscore_ops);
2239

2240 2241 2242
	return 0;
}
core_initcall(cpufreq_core_init);