intel_cacheinfo.c 25.8 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *	Routines to identify caches on Intel CPU.
L
Linus Torvalds 已提交
3
 *
4 5
 *	Changes:
 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
A
Alan Cox 已提交
6
 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7
 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
L
Linus Torvalds 已提交
8 9 10
 */

#include <linux/slab.h>
11
#include <linux/cacheinfo.h>
L
Linus Torvalds 已提交
12
#include <linux/cpu.h>
T
Tim Schmielau 已提交
13
#include <linux/sched.h>
14
#include <linux/sysfs.h>
15
#include <linux/pci.h>
L
Linus Torvalds 已提交
16 17

#include <asm/processor.h>
18
#include <asm/amd_nb.h>
19
#include <asm/smp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26

#define LVL_1_INST	1
#define LVL_1_DATA	2
#define LVL_2		3
#define LVL_3		4
#define LVL_TRACE	5

A
Alan Cox 已提交
27
struct _cache_table {
L
Linus Torvalds 已提交
28 29 30 31 32
	unsigned char descriptor;
	char cache_type;
	short size;
};

D
Dave Jones 已提交
33 34
#define MB(x)	((x) * 1024)

A
Alan Cox 已提交
35 36 37
/* All the cache descriptor types we care about (no TLB or
   trace cache entries) */

38
static const struct _cache_table cache_table[] =
L
Linus Torvalds 已提交
39 40 41
{
	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
42
	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
L
Linus Torvalds 已提交
43 44
	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
45
	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
46
	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
47
	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
L
Linus Torvalds 已提交
48
	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
D
Dave Jones 已提交
49 50 51
	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
L
Linus Torvalds 已提交
52 53 54
	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
55
	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
L
Linus Torvalds 已提交
56 57
	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
58 59
	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60
	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
L
Linus Torvalds 已提交
61 62 63
	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
D
Dave Jones 已提交
64 65 66 67
	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
68
	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
D
Dave Jones 已提交
69 70 71 72 73 74
	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
L
Linus Torvalds 已提交
75 76 77 78 79 80 81
	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
82
	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
D
Dave Jones 已提交
83 84 85 86 87 88 89
	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
90
	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
D
Dave Jones 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
L
Linus Torvalds 已提交
112 113 114 115
	{ 0x00, 0, 0}
};


A
Alan Cox 已提交
116
enum _cache_type {
117 118 119 120
	CTYPE_NULL = 0,
	CTYPE_DATA = 1,
	CTYPE_INST = 2,
	CTYPE_UNIFIED = 3
L
Linus Torvalds 已提交
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
};

union _cpuid4_leaf_eax {
	struct {
		enum _cache_type	type:5;
		unsigned int		level:3;
		unsigned int		is_self_initializing:1;
		unsigned int		is_fully_associative:1;
		unsigned int		reserved:4;
		unsigned int		num_threads_sharing:12;
		unsigned int		num_cores_on_die:6;
	} split;
	u32 full;
};

union _cpuid4_leaf_ebx {
	struct {
		unsigned int		coherency_line_size:12;
		unsigned int		physical_line_partition:10;
		unsigned int		ways_of_associativity:10;
	} split;
	u32 full;
};

union _cpuid4_leaf_ecx {
	struct {
		unsigned int		number_of_sets:32;
	} split;
	u32 full;
};

152
struct _cpuid4_info_regs {
L
Linus Torvalds 已提交
153 154 155 156
	union _cpuid4_leaf_eax eax;
	union _cpuid4_leaf_ebx ebx;
	union _cpuid4_leaf_ecx ecx;
	unsigned long size;
157
	struct amd_northbridge *nb;
158 159
};

160 161 162 163
unsigned short			num_cache_leaves;

/* AMD doesn't have CPUID4. Emulate it here to report the same
   information to the user.  This makes some assumptions about the machine:
164
   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
165 166 167 168 169

   In theory the TLBs could be reported as fake type (they are in "dummy").
   Maybe later */
union l1_cache {
	struct {
A
Alan Cox 已提交
170 171 172 173
		unsigned line_size:8;
		unsigned lines_per_tag:8;
		unsigned assoc:8;
		unsigned size_in_kb:8;
174 175 176 177 178 179
	};
	unsigned val;
};

union l2_cache {
	struct {
A
Alan Cox 已提交
180 181 182 183
		unsigned line_size:8;
		unsigned lines_per_tag:4;
		unsigned assoc:4;
		unsigned size_in_kb:16;
184 185 186 187
	};
	unsigned val;
};

188 189
union l3_cache {
	struct {
A
Alan Cox 已提交
190 191 192 193 194
		unsigned line_size:8;
		unsigned lines_per_tag:4;
		unsigned assoc:4;
		unsigned res:2;
		unsigned size_encoded:14;
195 196 197 198
	};
	unsigned val;
};

199
static const unsigned short assocs[] = {
200 201 202 203 204 205 206
	[1] = 1,
	[2] = 2,
	[4] = 4,
	[6] = 8,
	[8] = 16,
	[0xa] = 32,
	[0xb] = 48,
207
	[0xc] = 64,
208 209 210
	[0xd] = 96,
	[0xe] = 128,
	[0xf] = 0xffff /* fully associative - no way to show this currently */
211 212
};

213 214
static const unsigned char levels[] = { 1, 1, 2, 3 };
static const unsigned char types[] = { 1, 2, 3, 3 };
215

216 217 218 219 220 221 222
static const enum cache_type cache_type_map[] = {
	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
	[CTYPE_DATA] = CACHE_TYPE_DATA,
	[CTYPE_INST] = CACHE_TYPE_INST,
	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
};

223
static void
224 225 226
amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
		     union _cpuid4_leaf_ebx *ebx,
		     union _cpuid4_leaf_ecx *ecx)
227 228 229 230 231
{
	unsigned dummy;
	unsigned line_size, lines_per_tag, assoc, size_in_kb;
	union l1_cache l1i, l1d;
	union l2_cache l2;
232 233
	union l3_cache l3;
	union l1_cache *l1 = &l1d;
234 235 236 237 238 239

	eax->full = 0;
	ebx->full = 0;
	ecx->full = 0;

	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240
	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241

242 243 244 245 246 247
	switch (leaf) {
	case 1:
		l1 = &l1i;
	case 0:
		if (!l1->val)
			return;
248
		assoc = assocs[l1->assoc];
249 250 251
		line_size = l1->line_size;
		lines_per_tag = l1->lines_per_tag;
		size_in_kb = l1->size_in_kb;
252 253 254 255
		break;
	case 2:
		if (!l2.val)
			return;
256
		assoc = assocs[l2.assoc];
257 258 259
		line_size = l2.line_size;
		lines_per_tag = l2.lines_per_tag;
		/* cpu_data has errata corrections for K7 applied */
260
		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261 262 263 264
		break;
	case 3:
		if (!l3.val)
			return;
265
		assoc = assocs[l3.assoc];
266 267 268
		line_size = l3.line_size;
		lines_per_tag = l3.lines_per_tag;
		size_in_kb = l3.size_encoded * 512;
269 270 271 272
		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
			size_in_kb = size_in_kb >> 1;
			assoc = assoc >> 1;
		}
273 274 275
		break;
	default:
		return;
276 277
	}

278 279 280
	eax->split.is_self_initializing = 1;
	eax->split.type = types[leaf];
	eax->split.level = levels[leaf];
281
	eax->split.num_threads_sharing = 0;
282
	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283 284


285
	if (assoc == 0xffff)
286 287
		eax->split.is_fully_associative = 1;
	ebx->split.coherency_line_size = line_size - 1;
288
	ebx->split.ways_of_associativity = assoc - 1;
289 290 291 292
	ebx->split.physical_line_partition = lines_per_tag - 1;
	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
		(ebx->split.ways_of_associativity + 1) - 1;
}
L
Linus Torvalds 已提交
293

294
#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
295

296 297 298
/*
 * L3 cache descriptors
 */
299
static void amd_calc_l3_indices(struct amd_northbridge *nb)
300
{
301
	struct amd_l3_cache *l3 = &nb->l3_cache;
302
	unsigned int sc0, sc1, sc2, sc3;
303
	u32 val = 0;
304

305
	pci_read_config_dword(nb->misc, 0x1C4, &val);
306 307

	/* calculate subcache sizes */
308 309
	l3->subcaches[0] = sc0 = !(val & BIT(0));
	l3->subcaches[1] = sc1 = !(val & BIT(4));
310 311 312 313 314 315

	if (boot_cpu_data.x86 == 0x15) {
		l3->subcaches[0] = sc0 += !(val & BIT(1));
		l3->subcaches[1] = sc1 += !(val & BIT(5));
	}

316 317 318
	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));

319
	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
320 321
}

322 323 324 325 326 327 328
/*
 * check whether a slot used for disabling an L3 index is occupied.
 * @l3: L3 cache descriptor
 * @slot: slot number (0..1)
 *
 * @returns: the disabled index if used or negative value if slot free.
 */
329
int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
330 331 332
{
	unsigned int reg = 0;

333
	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
334 335 336 337 338 339 340 341

	/* check whether this slot is activated already */
	if (reg & (3UL << 30))
		return reg & 0xfff;

	return -1;
}

342
static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
343
				  unsigned int slot)
344
{
345
	int index;
346
	struct amd_northbridge *nb = this_leaf->priv;
347

348
	index = amd_get_l3_disable_slot(nb, slot);
349 350
	if (index >= 0)
		return sprintf(buf, "%d\n", index);
351

352
	return sprintf(buf, "FREE\n");
353 354
}

355
#define SHOW_CACHE_DISABLE(slot)					\
356
static ssize_t								\
357 358
cache_disable_##slot##_show(struct device *dev,				\
			    struct device_attribute *attr, char *buf)	\
359
{									\
360
	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
361
	return show_cache_disable(this_leaf, buf, slot);		\
362 363 364 365
}
SHOW_CACHE_DISABLE(0)
SHOW_CACHE_DISABLE(1)

366
static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
367 368 369 370 371 372 373 374 375 376 377 378
				 unsigned slot, unsigned long idx)
{
	int i;

	idx |= BIT(30);

	/*
	 *  disable index in all 4 subcaches
	 */
	for (i = 0; i < 4; i++) {
		u32 reg = idx | (i << 20);

379
		if (!nb->l3_cache.subcaches[i])
380 381
			continue;

382
		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
383 384 385 386 387 388 389 390 391

		/*
		 * We need to WBINVD on a core on the node containing the L3
		 * cache which indices we disable therefore a simple wbinvd()
		 * is not sufficient.
		 */
		wbinvd_on_cpu(cpu);

		reg |= BIT(31);
392
		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
393 394 395
	}
}

396 397 398 399 400 401 402 403 404 405
/*
 * disable a L3 cache index by using a disable-slot
 *
 * @l3:    L3 cache descriptor
 * @cpu:   A CPU on the node containing the L3 cache
 * @slot:  slot number (0..1)
 * @index: index to disable
 *
 * @return: 0 on success, error status on failure
 */
406
int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
407
			    unsigned long index)
408
{
409
	int ret = 0;
410

411
	/*  check if @slot is already used or the index is already disabled */
412
	ret = amd_get_l3_disable_slot(nb, slot);
413
	if (ret >= 0)
414
		return -EEXIST;
415

416
	if (index > nb->l3_cache.indices)
417 418
		return -EINVAL;

419
	/* check whether the other slot has disabled the same index already */
420
	if (index == amd_get_l3_disable_slot(nb, !slot))
421
		return -EEXIST;
422

423
	amd_l3_disable_index(nb, cpu, slot, index);
424 425 426 427

	return 0;
}

428 429 430
static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
				   const char *buf, size_t count,
				   unsigned int slot)
431 432 433
{
	unsigned long val = 0;
	int cpu, err = 0;
434
	struct amd_northbridge *nb = this_leaf->priv;
435

436 437 438
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

439
	cpu = cpumask_first(&this_leaf->shared_cpu_map);
440

441
	if (kstrtoul(buf, 10, &val) < 0)
442 443
		return -EINVAL;

444
	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
445 446
	if (err) {
		if (err == -EEXIST)
447 448
			pr_warning("L3 slot %d in use/index already disabled!\n",
				   slot);
449 450
		return err;
	}
451 452 453
	return count;
}

454
#define STORE_CACHE_DISABLE(slot)					\
455
static ssize_t								\
456 457 458
cache_disable_##slot##_store(struct device *dev,			\
			     struct device_attribute *attr,		\
			     const char *buf, size_t count)		\
459
{									\
460
	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
461
	return store_cache_disable(this_leaf, buf, count, slot);	\
462
}
463 464 465
STORE_CACHE_DISABLE(0)
STORE_CACHE_DISABLE(1)

466 467
static ssize_t subcaches_show(struct device *dev,
			      struct device_attribute *attr, char *buf)
468
{
469 470
	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
471 472 473 474

	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
}

475 476 477
static ssize_t subcaches_store(struct device *dev,
			       struct device_attribute *attr,
			       const char *buf, size_t count)
478
{
479 480
	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
481 482 483 484 485
	unsigned long val;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

486
	if (kstrtoul(buf, 16, &val) < 0)
487 488 489 490 491 492 493 494
		return -EINVAL;

	if (amd_set_subcaches(cpu, val))
		return -EINVAL;

	return count;
}

495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
static DEVICE_ATTR_RW(cache_disable_0);
static DEVICE_ATTR_RW(cache_disable_1);
static DEVICE_ATTR_RW(subcaches);

static umode_t
cache_private_attrs_is_visible(struct kobject *kobj,
			       struct attribute *attr, int unused)
{
	struct device *dev = kobj_to_dev(kobj);
	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
	umode_t mode = attr->mode;

	if (!this_leaf->priv)
		return 0;

	if ((attr == &dev_attr_subcaches.attr) &&
	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
		return mode;

	if ((attr == &dev_attr_cache_disable_0.attr ||
	     attr == &dev_attr_cache_disable_1.attr) &&
	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
		return mode;

	return 0;
}

static struct attribute_group cache_private_group = {
	.is_visible = cache_private_attrs_is_visible,
};

static void init_amd_l3_attrs(void)
{
	int n = 1;
	static struct attribute **amd_l3_attrs;

	if (amd_l3_attrs) /* already initialized */
		return;

	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
		n += 2;
	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
		n += 1;

	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
	if (!amd_l3_attrs)
		return;

	n = 0;
	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
	}
	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
550

551 552 553 554 555 556 557 558
	cache_private_group.attrs = amd_l3_attrs;
}

const struct attribute_group *
cache_get_priv_group(struct cacheinfo *this_leaf)
{
	struct amd_northbridge *nb = this_leaf->priv;

559
	if (this_leaf->level < 3 || !nb)
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
		return NULL;

	if (nb && nb->l3_cache.indices)
		init_amd_l3_attrs();

	return &cache_private_group;
}

static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
	int node;

	/* only for L3, and not in virtualized environments */
	if (index < 3)
		return;

	node = amd_get_nb_id(smp_processor_id());
	this_leaf->nb = node_to_amd_nb(node);
	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
		amd_calc_l3_indices(this_leaf->nb);
}
581
#else
582
#define amd_init_l3_cache(x, y)
583
#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
584

585
static int
586
cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
L
Linus Torvalds 已提交
587
{
588 589 590
	union _cpuid4_leaf_eax	eax;
	union _cpuid4_leaf_ebx	ebx;
	union _cpuid4_leaf_ecx	ecx;
591
	unsigned		edx;
L
Linus Torvalds 已提交
592

593
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
594 595 596 597 598
		if (cpu_has_topoext)
			cpuid_count(0x8000001d, index, &eax.full,
				    &ebx.full, &ecx.full, &edx);
		else
			amd_cpuid4(index, &eax, &ebx, &ecx);
599
		amd_init_l3_cache(this_leaf, index);
600 601 602 603
	} else {
		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
	}

604
	if (eax.split.type == CTYPE_NULL)
605
		return -EIO; /* better error ? */
L
Linus Torvalds 已提交
606

607 608 609
	this_leaf->eax = eax;
	this_leaf->ebx = ebx;
	this_leaf->ecx = ecx;
610 611 612 613
	this_leaf->size = (ecx.split.number_of_sets          + 1) *
			  (ebx.split.coherency_line_size     + 1) *
			  (ebx.split.physical_line_partition + 1) *
			  (ebx.split.ways_of_associativity   + 1);
L
Linus Torvalds 已提交
614 615 616
	return 0;
}

617
static int find_num_cache_leaves(struct cpuinfo_x86 *c)
L
Linus Torvalds 已提交
618
{
619
	unsigned int		eax, ebx, ecx, edx, op;
L
Linus Torvalds 已提交
620
	union _cpuid4_leaf_eax	cache_eax;
621
	int 			i = -1;
L
Linus Torvalds 已提交
622

623 624 625 626 627
	if (c->x86_vendor == X86_VENDOR_AMD)
		op = 0x8000001d;
	else
		op = 4;

628 629
	do {
		++i;
630 631
		/* Do cpuid(op) loop to find out num_cache_leaves */
		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
L
Linus Torvalds 已提交
632
		cache_eax.full = eax;
633
	} while (cache_eax.split.type != CTYPE_NULL);
634
	return i;
L
Linus Torvalds 已提交
635 636
}

637
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
638 639 640 641 642 643 644 645 646 647 648 649
{

	if (cpu_has_topoext) {
		num_cache_leaves = find_num_cache_leaves(c);
	} else if (c->extended_cpuid_level >= 0x80000006) {
		if (cpuid_edx(0x80000006) & 0xf000)
			num_cache_leaves = 4;
		else
			num_cache_leaves = 3;
	}
}

650
unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
L
Linus Torvalds 已提交
651
{
A
Alan Cox 已提交
652 653
	/* Cache sizes */
	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
L
Linus Torvalds 已提交
654 655
	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656
	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657
#ifdef CONFIG_X86_HT
658
	unsigned int cpu = c->cpu_index;
659
#endif
L
Linus Torvalds 已提交
660

661
	if (c->cpuid_level > 3) {
L
Linus Torvalds 已提交
662 663 664 665
		static int is_initialized;

		if (is_initialized == 0) {
			/* Init num_cache_leaves from boot CPU */
666
			num_cache_leaves = find_num_cache_leaves(c);
L
Linus Torvalds 已提交
667 668 669 670 671 672 673 674
			is_initialized++;
		}

		/*
		 * Whenever possible use cpuid(4), deterministic cache
		 * parameters cpuid leaf to find the cache details
		 */
		for (i = 0; i < num_cache_leaves; i++) {
675
			struct _cpuid4_info_regs this_leaf = {};
L
Linus Torvalds 已提交
676 677
			int retval;

678
			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
679 680 681 682 683
			if (retval < 0)
				continue;

			switch (this_leaf.eax.split.level) {
			case 1:
684
				if (this_leaf.eax.split.type == CTYPE_DATA)
685
					new_l1d = this_leaf.size/1024;
686
				else if (this_leaf.eax.split.type == CTYPE_INST)
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
					new_l1i = this_leaf.size/1024;
				break;
			case 2:
				new_l2 = this_leaf.size/1024;
				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
				index_msb = get_count_order(num_threads_sharing);
				l2_id = c->apicid & ~((1 << index_msb) - 1);
				break;
			case 3:
				new_l3 = this_leaf.size/1024;
				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
				index_msb = get_count_order(num_threads_sharing);
				l3_id = c->apicid & ~((1 << index_msb) - 1);
				break;
			default:
				break;
L
Linus Torvalds 已提交
703 704 705
			}
		}
	}
706 707 708 709 710
	/*
	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
	 * trace cache
	 */
	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
L
Linus Torvalds 已提交
711
		/* supports eax=2  call */
712 713
		int j, n;
		unsigned int regs[4];
L
Linus Torvalds 已提交
714
		unsigned char *dp = (unsigned char *)regs;
715 716 717 718
		int only_trace = 0;

		if (num_cache_leaves != 0 && c->x86 == 15)
			only_trace = 1;
L
Linus Torvalds 已提交
719 720 721 722

		/* Number of times to iterate */
		n = cpuid_eax(2) & 0xFF;

A
Alan Cox 已提交
723
		for (i = 0 ; i < n ; i++) {
L
Linus Torvalds 已提交
724 725 726
			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);

			/* If bit 31 is set, this is an unknown format */
A
Alan Cox 已提交
727 728 729
			for (j = 0 ; j < 3 ; j++)
				if (regs[j] & (1 << 31))
					regs[j] = 0;
L
Linus Torvalds 已提交
730 731

			/* Byte 0 is level count, not a descriptor */
A
Alan Cox 已提交
732
			for (j = 1 ; j < 16 ; j++) {
L
Linus Torvalds 已提交
733 734 735 736
				unsigned char des = dp[j];
				unsigned char k = 0;

				/* look up this descriptor in the table */
A
Alan Cox 已提交
737
				while (cache_table[k].descriptor != 0) {
L
Linus Torvalds 已提交
738
					if (cache_table[k].descriptor == des) {
739 740
						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
							break;
L
Linus Torvalds 已提交
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
						switch (cache_table[k].cache_type) {
						case LVL_1_INST:
							l1i += cache_table[k].size;
							break;
						case LVL_1_DATA:
							l1d += cache_table[k].size;
							break;
						case LVL_2:
							l2 += cache_table[k].size;
							break;
						case LVL_3:
							l3 += cache_table[k].size;
							break;
						case LVL_TRACE:
							trace += cache_table[k].size;
							break;
						}

						break;
					}

					k++;
				}
			}
		}
766
	}
L
Linus Torvalds 已提交
767

768 769
	if (new_l1d)
		l1d = new_l1d;
L
Linus Torvalds 已提交
770

771 772
	if (new_l1i)
		l1i = new_l1i;
L
Linus Torvalds 已提交
773

774 775
	if (new_l2) {
		l2 = new_l2;
776
#ifdef CONFIG_X86_HT
777
		per_cpu(cpu_llc_id, cpu) = l2_id;
778
#endif
779
	}
L
Linus Torvalds 已提交
780

781 782
	if (new_l3) {
		l3 = new_l3;
783
#ifdef CONFIG_X86_HT
784
		per_cpu(cpu_llc_id, cpu) = l3_id;
785
#endif
L
Linus Torvalds 已提交
786 787
	}

788 789 790 791 792 793 794 795 796 797 798 799
#ifdef CONFIG_X86_HT
	/*
	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
	 * turns means that the only possibility is SMT (as indicated in
	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
	 * c->phys_proc_id.
	 */
	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
#endif

800 801
	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));

L
Linus Torvalds 已提交
802 803 804
	return l2;
}

805 806
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
				    struct _cpuid4_info_regs *base)
L
Linus Torvalds 已提交
807
{
808 809
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
	struct cacheinfo *this_leaf;
810
	int i, sibling;
L
Linus Torvalds 已提交
811

812 813 814
	if (cpu_has_topoext) {
		unsigned int apicid, nshared, first, last;

815 816
		this_leaf = this_cpu_ci->info_list + index;
		nshared = base->eax.split.num_threads_sharing + 1;
817 818 819 820 821
		apicid = cpu_data(cpu).apicid;
		first = apicid - (apicid % nshared);
		last = first + nshared - 1;

		for_each_online_cpu(i) {
822 823 824 825
			this_cpu_ci = get_cpu_cacheinfo(i);
			if (!this_cpu_ci->info_list)
				continue;

826 827 828
			apicid = cpu_data(i).apicid;
			if ((apicid < first) || (apicid > last))
				continue;
829 830

			this_leaf = this_cpu_ci->info_list + index;
831 832 833 834

			for_each_online_cpu(sibling) {
				apicid = cpu_data(sibling).apicid;
				if ((apicid < first) || (apicid > last))
835
					continue;
836 837
				cpumask_set_cpu(sibling,
						&this_leaf->shared_cpu_map);
838
			}
839
		}
840 841
	} else if (index == 3) {
		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
842 843
			this_cpu_ci = get_cpu_cacheinfo(i);
			if (!this_cpu_ci->info_list)
844
				continue;
845
			this_leaf = this_cpu_ci->info_list + index;
846
			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
847 848
				if (!cpu_online(sibling))
					continue;
849 850
				cpumask_set_cpu(sibling,
						&this_leaf->shared_cpu_map);
851 852
			}
		}
853 854
	} else
		return 0;
855

856
	return 1;
857 858
}

859 860
static void __cache_cpumap_setup(unsigned int cpu, int index,
				 struct _cpuid4_info_regs *base)
861
{
862 863
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
	struct cacheinfo *this_leaf, *sibling_leaf;
864 865 866 867 868
	unsigned long num_threads_sharing;
	int index_msb, i;
	struct cpuinfo_x86 *c = &cpu_data(cpu);

	if (c->x86_vendor == X86_VENDOR_AMD) {
869
		if (__cache_amd_cpumap_setup(cpu, index, base))
870 871 872
			return;
	}

873 874
	this_leaf = this_cpu_ci->info_list + index;
	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
L
Linus Torvalds 已提交
875

876
	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
L
Linus Torvalds 已提交
877
	if (num_threads_sharing == 1)
878
		return;
879

880
	index_msb = get_count_order(num_threads_sharing);
881

882 883 884
	for_each_online_cpu(i)
		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
885

886 887 888 889 890
			if (i == cpu || !sib_cpu_ci->info_list)
				continue;/* skip if itself or no cacheinfo */
			sibling_leaf = sib_cpu_ci->info_list + index;
			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
891
		}
892 893
}

894 895
static void ci_leaf_init(struct cacheinfo *this_leaf,
			 struct _cpuid4_info_regs *base)
896
{
897 898 899 900 901 902 903 904 905 906 907
	this_leaf->level = base->eax.split.level;
	this_leaf->type = cache_type_map[base->eax.split.type];
	this_leaf->coherency_line_size =
				base->ebx.split.coherency_line_size + 1;
	this_leaf->ways_of_associativity =
				base->ebx.split.ways_of_associativity + 1;
	this_leaf->size = base->size;
	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
	this_leaf->physical_line_partition =
				base->ebx.split.physical_line_partition + 1;
	this_leaf->priv = base->nb;
L
Linus Torvalds 已提交
908 909
}

910
static int __init_cache_level(unsigned int cpu)
911
{
912
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
913

914
	if (!num_cache_leaves)
L
Linus Torvalds 已提交
915
		return -ENOENT;
916 917 918 919
	if (!this_cpu_ci)
		return -EINVAL;
	this_cpu_ci->num_levels = 3;
	this_cpu_ci->num_leaves = num_cache_leaves;
L
Linus Torvalds 已提交
920 921 922
	return 0;
}

923
static int __populate_cache_leaves(unsigned int cpu)
L
Linus Torvalds 已提交
924
{
925 926 927 928
	unsigned int idx, ret;
	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
	struct _cpuid4_info_regs id4_regs = {};
L
Linus Torvalds 已提交
929

930 931 932 933 934 935
	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
		if (ret)
			return ret;
		ci_leaf_init(this_leaf++, &id4_regs);
		__cache_cpumap_setup(cpu, idx, &id4_regs);
L
Linus Torvalds 已提交
936
	}
937
	return 0;
L
Linus Torvalds 已提交
938 939
}

940 941
DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)