main.c 21.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*  Generic MTRR (Memory Type Range Register) driver.

    Copyright (C) 1997-2000  Richard Gooch
    Copyright (c) 2002	     Patrick Mochel

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
    The postal address is:
      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.

    Source: "Pentium Pro Family Developer's Manual, Volume 3:
    Operating System Writer's Guide" (Intel document number 242692),
    section 11.11.7

28 29 30
    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
    on 6-7 March 2002.
    Source: Intel Architecture Software Developers Manual, Volume 3:
L
Linus Torvalds 已提交
31 32 33
    System Programming Guide; Section 9.11. (1997 edition - PPro).
*/

34 35 36 37
#define DEBUG

#include <linux/types.h> /* FIXME: kvm_para.h needs this */

38
#include <linux/stop_machine.h>
39 40
#include <linux/kvm_para.h>
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
41
#include <linux/module.h>
42
#include <linux/mutex.h>
L
Linus Torvalds 已提交
43
#include <linux/init.h>
44 45
#include <linux/sort.h>
#include <linux/cpu.h>
L
Linus Torvalds 已提交
46 47 48
#include <linux/pci.h>
#include <linux/smp.h>

49
#include <asm/processor.h>
50
#include <asm/e820.h>
L
Linus Torvalds 已提交
51 52
#include <asm/mtrr.h>
#include <asm/msr.h>
53

L
Linus Torvalds 已提交
54 55
#include "mtrr.h"

56
u32 num_var_ranges;
L
Linus Torvalds 已提交
57

58
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
59
static DEFINE_MUTEX(mtrr_mutex);
L
Linus Torvalds 已提交
60

61
u64 size_or_mask, size_and_mask;
62
static bool mtrr_aps_delayed_init;
L
Linus Torvalds 已提交
63

64
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
L
Linus Torvalds 已提交
65

66
const struct mtrr_ops *mtrr_if;
L
Linus Torvalds 已提交
67 68 69 70

static void set_mtrr(unsigned int reg, unsigned long base,
		     unsigned long size, mtrr_type type);

71
void set_mtrr_ops(const struct mtrr_ops *ops)
L
Linus Torvalds 已提交
72 73 74 75 76 77 78 79 80
{
	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
		mtrr_ops[ops->vendor] = ops;
}

/*  Returns non-zero if we have the write-combining memory type  */
static int have_wrcomb(void)
{
	struct pci_dev *dev;
81
	u8 rev;
82 83 84 85 86 87 88 89

	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
	if (dev != NULL) {
		/*
		 * ServerWorks LE chipsets < rev 6 have problems with
		 * write-combining. Don't allow it and leave room for other
		 * chipsets to be tagged
		 */
L
Linus Torvalds 已提交
90 91
		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
92 93
			pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
			if (rev <= 5) {
94
				pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
95 96 97
				pci_dev_put(dev);
				return 0;
			}
L
Linus Torvalds 已提交
98
		}
99 100 101 102
		/*
		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
		 * write combining memory may resulting in data corruption
		 */
L
Linus Torvalds 已提交
103 104
		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
105
			pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
L
Linus Torvalds 已提交
106 107 108 109
			pci_dev_put(dev);
			return 0;
		}
		pci_dev_put(dev);
110 111
	}
	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
L
Linus Torvalds 已提交
112 113 114 115 116 117 118
}

/*  This function returns the number of variable MTRRs  */
static void __init set_num_var_ranges(void)
{
	unsigned long config = 0, dummy;

119
	if (use_intel())
120
		rdmsr(MSR_MTRRcap, config, dummy);
121
	else if (is_cpu(AMD))
L
Linus Torvalds 已提交
122 123 124
		config = 2;
	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
		config = 8;
125

L
Linus Torvalds 已提交
126 127 128 129 130 131 132 133 134
	num_var_ranges = config & 0xff;
}

static void __init init_table(void)
{
	int i, max;

	max = num_var_ranges;
	for (i = 0; i < max; i++)
135
		mtrr_usage_table[i] = 1;
L
Linus Torvalds 已提交
136 137 138 139 140 141 142 143 144 145 146
}

struct set_mtrr_data {
	atomic_t	count;
	atomic_t	gate;
	unsigned long	smp_base;
	unsigned long	smp_size;
	unsigned int	smp_reg;
	mtrr_type	smp_type;
};

147 148
static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);

149
/**
150
 * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
R
Randy Dunlap 已提交
151
 * @info: pointer to mtrr configuration data
152 153 154
 *
 * Returns nothing.
 */
155
static int mtrr_work_handler(void *info)
L
Linus Torvalds 已提交
156
{
157
#ifdef CONFIG_SMP
L
Linus Torvalds 已提交
158 159 160
	struct set_mtrr_data *data = info;
	unsigned long flags;

161 162 163 164
	atomic_dec(&data->count);
	while (!atomic_read(&data->gate))
		cpu_relax();

L
Linus Torvalds 已提交
165 166 167
	local_irq_save(flags);

	atomic_dec(&data->count);
168
	while (atomic_read(&data->gate))
L
Linus Torvalds 已提交
169 170 171
		cpu_relax();

	/*  The master has cleared me to execute  */
172 173
	if (data->smp_reg != ~0U) {
		mtrr_if->set(data->smp_reg, data->smp_base,
L
Linus Torvalds 已提交
174
			     data->smp_size, data->smp_type);
175 176 177 178
	} else if (mtrr_aps_delayed_init) {
		/*
		 * Initialize the MTRRs inaddition to the synchronisation.
		 */
L
Linus Torvalds 已提交
179
		mtrr_if->set_all();
180
	}
L
Linus Torvalds 已提交
181 182

	atomic_dec(&data->count);
183
	while (!atomic_read(&data->gate))
L
Linus Torvalds 已提交
184 185 186 187 188
		cpu_relax();

	atomic_dec(&data->count);
	local_irq_restore(flags);
#endif
189
	return 0;
190
}
L
Linus Torvalds 已提交
191

192 193
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
{
J
Jan Beulich 已提交
194 195 196 197 198 199
	return type1 == MTRR_TYPE_UNCACHABLE ||
	       type2 == MTRR_TYPE_UNCACHABLE ||
	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
}

L
Linus Torvalds 已提交
200 201 202 203 204 205 206 207
/**
 * set_mtrr - update mtrrs on all processors
 * @reg:	mtrr in question
 * @base:	mtrr base
 * @size:	mtrr size
 * @type:	mtrr type
 *
 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
208
 *
209
 * 1. Queue work to do the following on all processors:
L
Linus Torvalds 已提交
210
 * 2. Disable Interrupts
211
 * 3. Wait for all procs to do so
L
Linus Torvalds 已提交
212 213 214 215 216 217 218 219 220
 * 4. Enter no-fill cache mode
 * 5. Flush caches
 * 6. Clear PGE bit
 * 7. Flush all TLBs
 * 8. Disable all range registers
 * 9. Update the MTRRs
 * 10. Enable all range registers
 * 11. Flush all TLBs and caches again
 * 12. Enter normal cache mode and reenable caching
221
 * 13. Set PGE
L
Linus Torvalds 已提交
222 223
 * 14. Wait for buddies to catch up
 * 15. Enable interrupts.
224
 *
L
Linus Torvalds 已提交
225
 * What does that mean for us? Well, first we set data.count to the number
226 227 228 229 230 231
 * of CPUs. As each CPU announces that it started the rendezvous handler by
 * decrementing the count, We reset data.count and set the data.gate flag
 * allowing all the cpu's to proceed with the work. As each cpu disables
 * interrupts, it'll decrement data.count once. We wait until it hits 0 and
 * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
 * are waiting for that flag to be cleared. Once it's cleared, each
232 233 234 235
 * CPU goes through the transition of updating MTRRs.
 * The CPU vendors may each do it differently,
 * so we call mtrr_if->set() callback and let them take care of it.
 * When they're done, they again decrement data->count and wait for data.gate
236
 * to be set.
237
 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
L
Linus Torvalds 已提交
238 239 240 241 242
 * Everyone then enables interrupts and we all continue on.
 *
 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
 * becomes nops.
 */
243 244
static void
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
L
Linus Torvalds 已提交
245 246 247
{
	struct set_mtrr_data data;
	unsigned long flags;
248 249 250
	int cpu;

	preempt_disable();
L
Linus Torvalds 已提交
251 252 253 254 255 256

	data.smp_reg = reg;
	data.smp_base = base;
	data.smp_size = size;
	data.smp_type = type;
	atomic_set(&data.count, num_booting_cpus() - 1);
257 258

	/* Make sure data.count is visible before unleashing other CPUs */
259
	smp_wmb();
260
	atomic_set(&data.gate, 0);
L
Linus Torvalds 已提交
261

262
	/* Start the ball rolling on other CPUs */
263 264 265 266 267 268 269 270
	for_each_online_cpu(cpu) {
		struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);

		if (cpu == smp_processor_id())
			continue;

		stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
	}
L
Linus Torvalds 已提交
271 272


273
	while (atomic_read(&data.count))
L
Linus Torvalds 已提交
274 275
		cpu_relax();

276
	/* Ok, reset count and toggle gate */
L
Linus Torvalds 已提交
277
	atomic_set(&data.count, num_booting_cpus() - 1);
278
	smp_wmb();
279
	atomic_set(&data.gate, 1);
L
Linus Torvalds 已提交
280

281 282 283 284 285 286 287 288 289 290
	local_irq_save(flags);

	while (atomic_read(&data.count))
		cpu_relax();

	/* Ok, reset count and toggle gate */
	atomic_set(&data.count, num_booting_cpus() - 1);
	smp_wmb();
	atomic_set(&data.gate, 0);

291
	/* Do our MTRR business */
L
Linus Torvalds 已提交
292

293 294
	/*
	 * HACK!
L
Linus Torvalds 已提交
295 296
	 * We use this same function to initialize the mtrrs on boot.
	 * The state of the boot cpu's mtrrs has been saved, and we want
297
	 * to replicate across all the APs.
L
Linus Torvalds 已提交
298 299
	 * If we're doing that @reg is set to something special...
	 */
300 301
	if (reg != ~0U)
		mtrr_if->set(reg, base, size, type);
302 303
	else if (!mtrr_aps_delayed_init)
		mtrr_if->set_all();
L
Linus Torvalds 已提交
304

305 306
	/* Wait for the others */
	while (atomic_read(&data.count))
L
Linus Torvalds 已提交
307 308 309
		cpu_relax();

	atomic_set(&data.count, num_booting_cpus() - 1);
310
	smp_wmb();
311
	atomic_set(&data.gate, 1);
L
Linus Torvalds 已提交
312 313 314 315 316

	/*
	 * Wait here for everyone to have seen the gate change
	 * So we're the last ones to touch 'data'
	 */
317
	while (atomic_read(&data.count))
L
Linus Torvalds 已提交
318 319 320
		cpu_relax();

	local_irq_restore(flags);
321
	preempt_enable();
L
Linus Torvalds 已提交
322 323 324
}

/**
325 326 327 328 329
 * mtrr_add_page - Add a memory type region
 * @base: Physical base address of region in pages (in units of 4 kB!)
 * @size: Physical size of region in pages (4 kB)
 * @type: Type of MTRR desired
 * @increment: If this is true do usage counting on the region
L
Linus Torvalds 已提交
330
 *
331 332 333 334 335 336
 * Memory type region registers control the caching on newer Intel and
 * non Intel processors. This function allows drivers to request an
 * MTRR is added. The details and hardware specifics of each processor's
 * implementation are hidden from the caller, but nevertheless the
 * caller should expect to need to provide a power of two size on an
 * equivalent power of two boundary.
L
Linus Torvalds 已提交
337
 *
338 339 340 341
 * If the region cannot be added either because all regions are in use
 * or the CPU cannot support it a negative value is returned. On success
 * the register number for this entry is returned, but should be treated
 * as a cookie only.
L
Linus Torvalds 已提交
342
 *
343 344
 * On a multiprocessor machine the changes are made to all processors.
 * This is required on x86 by the Intel processors.
L
Linus Torvalds 已提交
345
 *
346
 * The available types are
L
Linus Torvalds 已提交
347
 *
348
 * %MTRR_TYPE_UNCACHABLE - No caching
L
Linus Torvalds 已提交
349
 *
350
 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
L
Linus Torvalds 已提交
351
 *
352
 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
L
Linus Torvalds 已提交
353
 *
354
 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
L
Linus Torvalds 已提交
355
 *
356 357
 * BUGS: Needs a quiet flag for the cases where drivers do not mind
 * failures and do not wish system log messages to be sent.
L
Linus Torvalds 已提交
358
 */
359
int mtrr_add_page(unsigned long base, unsigned long size,
360
		  unsigned int type, bool increment)
L
Linus Torvalds 已提交
361
{
362
	unsigned long lbase, lsize;
J
Jan Beulich 已提交
363
	int i, replace, error;
L
Linus Torvalds 已提交
364 365 366 367
	mtrr_type ltype;

	if (!mtrr_if)
		return -ENXIO;
368 369 370

	error = mtrr_if->validate_add_page(base, size, type);
	if (error)
L
Linus Torvalds 已提交
371 372 373
		return error;

	if (type >= MTRR_NUM_TYPES) {
374
		pr_warning("mtrr: type: %u invalid\n", type);
L
Linus Torvalds 已提交
375 376 377
		return -EINVAL;
	}

378
	/* If the type is WC, check that this processor supports it */
L
Linus Torvalds 已提交
379
	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
380
		pr_warning("mtrr: your processor doesn't support write-combining\n");
L
Linus Torvalds 已提交
381 382 383
		return -ENOSYS;
	}

J
Jan Beulich 已提交
384
	if (!size) {
385
		pr_warning("mtrr: zero sized request\n");
J
Jan Beulich 已提交
386 387 388
		return -EINVAL;
	}

L
Linus Torvalds 已提交
389
	if (base & size_or_mask || size & size_or_mask) {
390
		pr_warning("mtrr: base or size exceeds the MTRR width\n");
L
Linus Torvalds 已提交
391 392 393 394
		return -EINVAL;
	}

	error = -EINVAL;
J
Jan Beulich 已提交
395
	replace = -1;
L
Linus Torvalds 已提交
396

S
Shaohua Li 已提交
397
	/* No CPU hotplug when we change MTRR entries */
398
	get_online_cpus();
399 400

	/* Search for existing MTRR  */
401
	mutex_lock(&mtrr_mutex);
L
Linus Torvalds 已提交
402 403
	for (i = 0; i < num_var_ranges; ++i) {
		mtrr_if->get(i, &lbase, &lsize, &ltype);
404 405
		if (!lsize || base > lbase + lsize - 1 ||
		    base + size - 1 < lbase)
L
Linus Torvalds 已提交
406
			continue;
407 408 409 410
		/*
		 * At this point we know there is some kind of
		 * overlap/enclosure
		 */
J
Jan Beulich 已提交
411
		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
412 413
			if (base <= lbase &&
			    base + size - 1 >= lbase + lsize - 1) {
J
Jan Beulich 已提交
414 415 416 417
				/*  New region encloses an existing region  */
				if (type == ltype) {
					replace = replace == -1 ? i : -2;
					continue;
418
				} else if (types_compatible(type, ltype))
J
Jan Beulich 已提交
419 420
					continue;
			}
421 422 423
			pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
				" 0x%lx000,0x%lx000\n", base, size, lbase,
				lsize);
L
Linus Torvalds 已提交
424 425
			goto out;
		}
426
		/* New region is enclosed by an existing region */
L
Linus Torvalds 已提交
427
		if (ltype != type) {
J
Jan Beulich 已提交
428
			if (types_compatible(type, ltype))
L
Linus Torvalds 已提交
429
				continue;
430 431 432
			pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
				base, size, mtrr_attrib_to_str(ltype),
				mtrr_attrib_to_str(type));
L
Linus Torvalds 已提交
433 434 435
			goto out;
		}
		if (increment)
436
			++mtrr_usage_table[i];
L
Linus Torvalds 已提交
437 438 439
		error = i;
		goto out;
	}
440
	/* Search for an empty MTRR */
J
Jan Beulich 已提交
441
	i = mtrr_if->get_free_region(base, size, replace);
L
Linus Torvalds 已提交
442 443
	if (i >= 0) {
		set_mtrr(i, base, size, type);
444 445 446 447
		if (likely(replace < 0)) {
			mtrr_usage_table[i] = 1;
		} else {
			mtrr_usage_table[i] = mtrr_usage_table[replace];
448
			if (increment)
449
				mtrr_usage_table[i]++;
J
Jan Beulich 已提交
450 451
			if (unlikely(replace != i)) {
				set_mtrr(replace, 0, 0, 0);
452
				mtrr_usage_table[replace] = 0;
J
Jan Beulich 已提交
453 454
			}
		}
455 456 457
	} else {
		pr_info("mtrr: no more MTRRs available\n");
	}
L
Linus Torvalds 已提交
458 459
	error = i;
 out:
460
	mutex_unlock(&mtrr_mutex);
461
	put_online_cpus();
L
Linus Torvalds 已提交
462 463 464
	return error;
}

465 466 467
static int mtrr_check(unsigned long base, unsigned long size)
{
	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
468 469
		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
470 471 472 473 474 475
		dump_stack();
		return -1;
	}
	return 0;
}

L
Linus Torvalds 已提交
476
/**
477 478 479 480 481
 * mtrr_add - Add a memory type region
 * @base: Physical base address of region
 * @size: Physical size of region
 * @type: Type of MTRR desired
 * @increment: If this is true do usage counting on the region
L
Linus Torvalds 已提交
482
 *
483 484 485 486 487 488
 * Memory type region registers control the caching on newer Intel and
 * non Intel processors. This function allows drivers to request an
 * MTRR is added. The details and hardware specifics of each processor's
 * implementation are hidden from the caller, but nevertheless the
 * caller should expect to need to provide a power of two size on an
 * equivalent power of two boundary.
L
Linus Torvalds 已提交
489
 *
490 491 492 493
 * If the region cannot be added either because all regions are in use
 * or the CPU cannot support it a negative value is returned. On success
 * the register number for this entry is returned, but should be treated
 * as a cookie only.
L
Linus Torvalds 已提交
494
 *
495 496
 * On a multiprocessor machine the changes are made to all processors.
 * This is required on x86 by the Intel processors.
L
Linus Torvalds 已提交
497
 *
498
 * The available types are
L
Linus Torvalds 已提交
499
 *
500
 * %MTRR_TYPE_UNCACHABLE - No caching
L
Linus Torvalds 已提交
501
 *
502
 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
L
Linus Torvalds 已提交
503
 *
504
 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
L
Linus Torvalds 已提交
505
 *
506
 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
L
Linus Torvalds 已提交
507
 *
508 509
 * BUGS: Needs a quiet flag for the cases where drivers do not mind
 * failures and do not wish system log messages to be sent.
L
Linus Torvalds 已提交
510
 */
511 512
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
	     bool increment)
L
Linus Torvalds 已提交
513
{
514
	if (mtrr_check(base, size))
L
Linus Torvalds 已提交
515 516 517 518
		return -EINVAL;
	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
			     increment);
}
519
EXPORT_SYMBOL(mtrr_add);
L
Linus Torvalds 已提交
520 521

/**
522 523 524 525
 * mtrr_del_page - delete a memory type region
 * @reg: Register returned by mtrr_add
 * @base: Physical base address
 * @size: Size of region
L
Linus Torvalds 已提交
526
 *
527 528
 * If register is supplied then base and size are ignored. This is
 * how drivers should call it.
L
Linus Torvalds 已提交
529
 *
530 531 532 533
 * Releases an MTRR region. If the usage count drops to zero the
 * register is freed and the region returns to default state.
 * On success the register is returned, on failure a negative error
 * code.
L
Linus Torvalds 已提交
534 535 536 537 538
 */
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
{
	int i, max;
	mtrr_type ltype;
J
Jan Beulich 已提交
539
	unsigned long lbase, lsize;
L
Linus Torvalds 已提交
540 541 542 543 544 545
	int error = -EINVAL;

	if (!mtrr_if)
		return -ENXIO;

	max = num_var_ranges;
S
Shaohua Li 已提交
546
	/* No CPU hotplug when we change MTRR entries */
547
	get_online_cpus();
548
	mutex_lock(&mtrr_mutex);
L
Linus Torvalds 已提交
549 550 551 552 553 554 555 556 557 558
	if (reg < 0) {
		/*  Search for existing MTRR  */
		for (i = 0; i < max; ++i) {
			mtrr_if->get(i, &lbase, &lsize, &ltype);
			if (lbase == base && lsize == size) {
				reg = i;
				break;
			}
		}
		if (reg < 0) {
559 560
			pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
				 base, size);
L
Linus Torvalds 已提交
561 562 563 564
			goto out;
		}
	}
	if (reg >= max) {
565
		pr_warning("mtrr: register: %d too big\n", reg);
L
Linus Torvalds 已提交
566 567 568 569
		goto out;
	}
	mtrr_if->get(reg, &lbase, &lsize, &ltype);
	if (lsize < 1) {
570
		pr_warning("mtrr: MTRR %d not used\n", reg);
L
Linus Torvalds 已提交
571 572
		goto out;
	}
573
	if (mtrr_usage_table[reg] < 1) {
574
		pr_warning("mtrr: reg: %d has count=0\n", reg);
L
Linus Torvalds 已提交
575 576
		goto out;
	}
577
	if (--mtrr_usage_table[reg] < 1)
L
Linus Torvalds 已提交
578 579 580
		set_mtrr(reg, 0, 0, 0);
	error = reg;
 out:
581
	mutex_unlock(&mtrr_mutex);
582
	put_online_cpus();
L
Linus Torvalds 已提交
583 584
	return error;
}
585

L
Linus Torvalds 已提交
586
/**
587 588 589 590
 * mtrr_del - delete a memory type region
 * @reg: Register returned by mtrr_add
 * @base: Physical base address
 * @size: Size of region
L
Linus Torvalds 已提交
591
 *
592 593
 * If register is supplied then base and size are ignored. This is
 * how drivers should call it.
L
Linus Torvalds 已提交
594
 *
595 596 597 598
 * Releases an MTRR region. If the usage count drops to zero the
 * register is freed and the region returns to default state.
 * On success the register is returned, on failure a negative error
 * code.
L
Linus Torvalds 已提交
599
 */
600
int mtrr_del(int reg, unsigned long base, unsigned long size)
L
Linus Torvalds 已提交
601
{
602
	if (mtrr_check(base, size))
L
Linus Torvalds 已提交
603 604 605 606 607
		return -EINVAL;
	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
}
EXPORT_SYMBOL(mtrr_del);

608 609
/*
 * HACK ALERT!
L
Linus Torvalds 已提交
610 611 612 613 614
 * These should be called implicitly, but we can't yet until all the initcall
 * stuff is done...
 */
static void __init init_ifs(void)
{
615
#ifndef CONFIG_X86_64
L
Linus Torvalds 已提交
616 617 618
	amd_init_mtrr();
	cyrix_init_mtrr();
	centaur_init_mtrr();
619
#endif
L
Linus Torvalds 已提交
620 621
}

S
Shaohua Li 已提交
622 623 624
/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
 * MTRR driver doesn't require this
 */
L
Linus Torvalds 已提交
625 626 627
struct mtrr_value {
	mtrr_type	ltype;
	unsigned long	lbase;
J
Jan Beulich 已提交
628
	unsigned long	lsize;
L
Linus Torvalds 已提交
629 630
};

631
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
L
Linus Torvalds 已提交
632

633
static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
L
Linus Torvalds 已提交
634 635 636 637
{
	int i;

	for (i = 0; i < num_var_ranges; i++) {
638 639 640
		mtrr_if->get(i, &mtrr_value[i].lbase,
				&mtrr_value[i].lsize,
				&mtrr_value[i].ltype);
L
Linus Torvalds 已提交
641 642 643 644
	}
	return 0;
}

645
static int mtrr_restore(struct sys_device *sysdev)
L
Linus Torvalds 已提交
646 647 648 649
{
	int i;

	for (i = 0; i < num_var_ranges; i++) {
650 651 652 653 654
		if (mtrr_value[i].lsize) {
			set_mtrr(i, mtrr_value[i].lbase,
				    mtrr_value[i].lsize,
				    mtrr_value[i].ltype);
		}
L
Linus Torvalds 已提交
655 656 657 658 659 660 661 662 663 664 665
	}
	return 0;
}



static struct sysdev_driver mtrr_sysdev_driver = {
	.suspend	= mtrr_save,
	.resume		= mtrr_restore,
};

666
int __initdata changed_by_mtrr_cleanup;
L
Linus Torvalds 已提交
667 668

/**
S
Shaohua Li 已提交
669
 * mtrr_bp_init - initialize mtrrs on the boot CPU
L
Linus Torvalds 已提交
670
 *
671
 * This needs to be called early; before any of the other CPUs are
L
Linus Torvalds 已提交
672
 * initialized (i.e. before smp_init()).
673
 *
L
Linus Torvalds 已提交
674
 */
675
void __init mtrr_bp_init(void)
L
Linus Torvalds 已提交
676
{
677
	u32 phys_addr;
678

L
Linus Torvalds 已提交
679 680
	init_ifs();

681 682
	phys_addr = 32;

L
Linus Torvalds 已提交
683 684
	if (cpu_has_mtrr) {
		mtrr_if = &generic_mtrr_ops;
685
		size_or_mask = 0xff000000;			/* 36 bits */
L
Linus Torvalds 已提交
686
		size_and_mask = 0x00f00000;
687
		phys_addr = 36;
688

689 690 691 692 693
		/*
		 * This is an AMD specific MSR, but we assume(hope?) that
		 * Intel will implement it to when they extend the address
		 * bus of the Xeon.
		 */
694 695
		if (cpuid_eax(0x80000000) >= 0x80000008) {
			phys_addr = cpuid_eax(0x80000008) & 0xff;
696 697 698 699 700 701 702 703
			/* CPUID workaround for Intel 0F33/0F34 CPU */
			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
			    boot_cpu_data.x86 == 0xF &&
			    boot_cpu_data.x86_model == 0x3 &&
			    (boot_cpu_data.x86_mask == 0x3 ||
			     boot_cpu_data.x86_mask == 0x4))
				phys_addr = 36;

704 705
			size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
706 707
		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
			   boot_cpu_data.x86 == 6) {
708 709 710 711 712
			/*
			 * VIA C* family have Intel style MTRRs,
			 * but don't support PAE
			 */
			size_or_mask = 0xfff00000;		/* 32 bits */
713
			size_and_mask = 0;
714
			phys_addr = 32;
L
Linus Torvalds 已提交
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
		}
	} else {
		switch (boot_cpu_data.x86_vendor) {
		case X86_VENDOR_AMD:
			if (cpu_has_k6_mtrr) {
				/* Pre-Athlon (K6) AMD CPU MTRRs */
				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
				size_or_mask = 0xfff00000;	/* 32 bits */
				size_and_mask = 0;
			}
			break;
		case X86_VENDOR_CENTAUR:
			if (cpu_has_centaur_mcr) {
				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
				size_or_mask = 0xfff00000;	/* 32 bits */
				size_and_mask = 0;
			}
			break;
		case X86_VENDOR_CYRIX:
			if (cpu_has_cyrix_arr) {
				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
				size_or_mask = 0xfff00000;	/* 32 bits */
				size_and_mask = 0;
			}
			break;
		default:
			break;
		}
	}

	if (mtrr_if) {
		set_num_var_ranges();
		init_table();
748
		if (use_intel()) {
S
Shaohua Li 已提交
749
			get_mtrr_state();
750

751 752
			if (mtrr_cleanup(phys_addr)) {
				changed_by_mtrr_cleanup = 1;
753
				mtrr_if->set_all();
754
			}
755
		}
L
Linus Torvalds 已提交
756 757 758
	}
}

S
Shaohua Li 已提交
759 760
void mtrr_ap_init(void)
{
761
	if (!use_intel() || mtrr_aps_delayed_init)
S
Shaohua Li 已提交
762 763
		return;
	/*
764 765 766 767 768 769 770 771 772 773 774
	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
	 * changed, but this routine will be called in cpu boot time,
	 * holding the lock breaks it.
	 *
	 * This routine is called in two cases:
	 *
	 *   1. very earily time of software resume, when there absolutely
	 *      isn't mtrr entry changes;
	 *
	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
	 *      lock to prevent mtrr entry changes
S
Shaohua Li 已提交
775
	 */
776
	set_mtrr(~0U, 0, 0, 0);
S
Shaohua Li 已提交
777 778
}

779 780 781 782 783
/**
 * Save current fixed-range MTRR state of the BSP
 */
void mtrr_save_state(void)
{
784
	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
785 786
}

787 788 789 790 791
void set_mtrr_aps_delayed_init(void)
{
	if (!use_intel())
		return;

792
	mtrr_aps_delayed_init = true;
793 794 795 796 797 798 799 800 801 802 803
}

/*
 * MTRR initialization for all AP's
 */
void mtrr_aps_init(void)
{
	if (!use_intel())
		return;

	set_mtrr(~0U, 0, 0, 0);
804
	mtrr_aps_delayed_init = false;
805 806 807 808 809 810 811 812 813 814
}

void mtrr_bp_restore(void)
{
	if (!use_intel())
		return;

	mtrr_if->set_all();
}

S
Shaohua Li 已提交
815 816 817 818
static int __init mtrr_init_finialize(void)
{
	if (!mtrr_if)
		return 0;
819

820
	if (use_intel()) {
821
		if (!changed_by_mtrr_cleanup)
822
			mtrr_state_warn();
823
		return 0;
S
Shaohua Li 已提交
824
	}
825 826 827 828 829 830 831 832 833 834 835

	/*
	 * The CPU has no MTRR and seems to not support SMP. They have
	 * specific drivers, we use a tricky method to support
	 * suspend/resume for them.
	 *
	 * TBD: is there any system with such CPU which supports
	 * suspend/resume? If no, we should remove the code.
	 */
	sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);

S
Shaohua Li 已提交
836 837 838
	return 0;
}
subsys_initcall(mtrr_init_finialize);