topology.c 10.0 KB
Newer Older
1
/*
2
 *    Copyright IBM Corp. 2007,2011
3 4 5
 *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
 */

6 7 8
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

9
#include <linux/workqueue.h>
10
#include <linux/bootmem.h>
11 12 13
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/kernel.h>
14
#include <linux/sched.h>
15 16
#include <linux/init.h>
#include <linux/delay.h>
17 18
#include <linux/cpu.h>
#include <linux/smp.h>
19
#include <linux/mm.h>
20

H
Heiko Carstens 已提交
21 22 23
#define PTF_HORIZONTAL	(0UL)
#define PTF_VERTICAL	(1UL)
#define PTF_CHECK	(2UL)
24

25 26
struct mask_info {
	struct mask_info *next;
27
	unsigned char id;
28 29 30
	cpumask_t mask;
};

H
Heiko Carstens 已提交
31
static int topology_enabled = 1;
32
static void topology_work_fn(struct work_struct *work);
33
static struct sysinfo_15_1_x *tl_info;
34 35
static void set_topology_timer(void);
static DECLARE_WORK(topology_work, topology_work_fn);
H
Heiko Carstens 已提交
36 37
/* topology_lock protects the core linked list */
static DEFINE_SPINLOCK(topology_lock);
38

39
static struct mask_info core_info;
40
cpumask_t cpu_core_map[NR_CPUS];
41
unsigned char cpu_core_id[NR_CPUS];
42

43 44 45
static struct mask_info book_info;
cpumask_t cpu_book_map[NR_CPUS];
unsigned char cpu_book_id[NR_CPUS];
46 47 48

/* smp_cpu_state_mutex must be held when accessing this array */
int cpu_polarization[NR_CPUS];
49 50

static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
51 52 53
{
	cpumask_t mask;

54
	cpumask_clear(&mask);
55 56 57 58
	if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) {
		cpumask_copy(&mask, cpumask_of(cpu));
		return mask;
	}
59
	while (info) {
60
		if (cpumask_test_cpu(cpu, &info->mask)) {
61
			mask = info->mask;
62 63
			break;
		}
64
		info = info->next;
65
	}
66 67
	if (cpumask_empty(&mask))
		cpumask_copy(&mask, cpumask_of(cpu));
68 69 70
	return mask;
}

71 72 73
static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
					  struct mask_info *book,
					  struct mask_info *core,
74
					  int one_core_per_cpu)
75 76 77
{
	unsigned int cpu;

78 79 80
	for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS);
	     cpu < TOPOLOGY_CPU_BITS;
	     cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1))
81 82 83
	{
		unsigned int rcpu, lcpu;

84
		rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
85
		for_each_present_cpu(lcpu) {
86 87
			if (cpu_logical_map(lcpu) != rcpu)
				continue;
88
			cpumask_set_cpu(lcpu, &book->mask);
89
			cpu_book_id[lcpu] = book->id;
90
			cpumask_set_cpu(lcpu, &core->mask);
91
			if (one_core_per_cpu) {
92 93 94 95 96
				cpu_core_id[lcpu] = rcpu;
				core = core->next;
			} else {
				cpu_core_id[lcpu] = core->id;
			}
97
			cpu_set_polarization(lcpu, tl_cpu->pp);
98 99
		}
	}
100
	return core;
101 102
}

103
static void clear_masks(void)
104
{
105
	struct mask_info *info;
106

107 108
	info = &core_info;
	while (info) {
109
		cpumask_clear(&info->mask);
110 111 112 113
		info = info->next;
	}
	info = &book_info;
	while (info) {
114
		cpumask_clear(&info->mask);
115
		info = info->next;
116 117 118
	}
}

119
static union topology_entry *next_tle(union topology_entry *tle)
120
{
121 122 123
	if (!tle->nl)
		return (union topology_entry *)((struct topology_cpu *)tle + 1);
	return (union topology_entry *)((struct topology_container *)tle + 1);
124 125
}

126
static void __tl_to_cores_generic(struct sysinfo_15_1_x *info)
127
{
128
	struct mask_info *core = &core_info;
129
	struct mask_info *book = &book_info;
130
	union topology_entry *tle, *end;
131

H
Heiko Carstens 已提交
132
	tle = info->tle;
133
	end = (union topology_entry *)((unsigned long)info + info->length);
134 135 136
	while (tle < end) {
		switch (tle->nl) {
		case 2:
137 138
			book = book->next;
			book->id = tle->container.id;
139 140 141
			break;
		case 1:
			core = core->next;
142
			core->id = tle->container.id;
143 144
			break;
		case 0:
145
			add_cpus_to_mask(&tle->cpu, book, core, 0);
146 147
			break;
		default:
148
			clear_masks();
149
			return;
150 151 152
		}
		tle = next_tle(tle);
	}
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
}

static void __tl_to_cores_z10(struct sysinfo_15_1_x *info)
{
	struct mask_info *core = &core_info;
	struct mask_info *book = &book_info;
	union topology_entry *tle, *end;

	tle = info->tle;
	end = (union topology_entry *)((unsigned long)info + info->length);
	while (tle < end) {
		switch (tle->nl) {
		case 1:
			book = book->next;
			book->id = tle->container.id;
			break;
		case 0:
			core = add_cpus_to_mask(&tle->cpu, book, core, 1);
			break;
		default:
			clear_masks();
			return;
		}
		tle = next_tle(tle);
	}
}

static void tl_to_cores(struct sysinfo_15_1_x *info)
{
	struct cpuid cpu_id;

	get_cpu_id(&cpu_id);
	spin_lock_irq(&topology_lock);
	clear_masks();
	switch (cpu_id.machine) {
	case 0x2097:
	case 0x2098:
		__tl_to_cores_z10(info);
		break;
	default:
		__tl_to_cores_generic(info);
	}
H
Heiko Carstens 已提交
195
	spin_unlock_irq(&topology_lock);
196 197
}

H
Heiko Carstens 已提交
198 199 200 201 202
static void topology_update_polarization_simple(void)
{
	int cpu;

	mutex_lock(&smp_cpu_state_mutex);
203
	for_each_possible_cpu(cpu)
204
		cpu_set_polarization(cpu, POLARIZATION_HRZ);
H
Heiko Carstens 已提交
205 206 207 208
	mutex_unlock(&smp_cpu_state_mutex);
}

static int ptf(unsigned long fc)
209 210 211 212 213 214 215 216
{
	int rc;

	asm volatile(
		"	.insn	rre,0xb9a20000,%1,%1\n"
		"	ipm	%0\n"
		"	srl	%0,28\n"
		: "=d" (rc)
H
Heiko Carstens 已提交
217 218 219 220 221 222
		: "d" (fc)  : "cc");
	return rc;
}

int topology_set_cpu_management(int fc)
{
223
	int cpu, rc;
H
Heiko Carstens 已提交
224

225
	if (!MACHINE_HAS_TOPOLOGY)
H
Heiko Carstens 已提交
226 227 228 229 230 231 232
		return -EOPNOTSUPP;
	if (fc)
		rc = ptf(PTF_VERTICAL);
	else
		rc = ptf(PTF_HORIZONTAL);
	if (rc)
		return -EBUSY;
233
	for_each_possible_cpu(cpu)
234
		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
235 236 237
	return rc;
}

238 239
static void update_cpu_core_map(void)
{
240
	unsigned long flags;
241 242
	int cpu;

243 244 245 246 247 248 249 250
	spin_lock_irqsave(&topology_lock, flags);
	for_each_possible_cpu(cpu) {
		cpu_core_map[cpu] = cpu_group_map(&core_info, cpu);
		cpu_book_map[cpu] = cpu_group_map(&book_info, cpu);
	}
	spin_unlock_irqrestore(&topology_lock, flags);
}

251
void store_topology(struct sysinfo_15_1_x *info)
252 253 254 255 256 257 258
{
	int rc;

	rc = stsi(info, 15, 1, 3);
	if (rc != -ENOSYS)
		return;
	stsi(info, 15, 1, 2);
259 260
}

261
int arch_update_cpu_topology(void)
262
{
263
	struct sysinfo_15_1_x *info = tl_info;
264
	struct device *dev;
265 266
	int cpu;

267
	if (!MACHINE_HAS_TOPOLOGY) {
268
		update_cpu_core_map();
H
Heiko Carstens 已提交
269
		topology_update_polarization_simple();
270
		return 0;
H
Heiko Carstens 已提交
271
	}
272
	store_topology(info);
273
	tl_to_cores(info);
274
	update_cpu_core_map();
275
	for_each_online_cpu(cpu) {
276 277
		dev = get_cpu_device(cpu);
		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
278
	}
279
	return 1;
280 281
}

282 283
static void topology_work_fn(struct work_struct *work)
{
284
	rebuild_sched_domains();
285 286
}

H
Heiko Carstens 已提交
287 288 289 290 291
void topology_schedule_update(void)
{
	schedule_work(&topology_work);
}

292 293
static void topology_timer_fn(unsigned long ignored)
{
H
Heiko Carstens 已提交
294 295
	if (ptf(PTF_CHECK))
		topology_schedule_update();
296 297 298
	set_topology_timer();
}

299 300 301 302 303
static struct timer_list topology_timer =
	TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);

static atomic_t topology_poll = ATOMIC_INIT(0);

304 305
static void set_topology_timer(void)
{
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
	if (atomic_add_unless(&topology_poll, -1, 0))
		mod_timer(&topology_timer, jiffies + HZ / 10);
	else
		mod_timer(&topology_timer, jiffies + HZ * 60);
}

void topology_expect_change(void)
{
	if (!MACHINE_HAS_TOPOLOGY)
		return;
	/* This is racy, but it doesn't matter since it is just a heuristic.
	 * Worst case is that we poll in a higher frequency for a bit longer.
	 */
	if (atomic_read(&topology_poll) > 60)
		return;
	atomic_add(60, &topology_poll);
	set_topology_timer();
323 324
}

325
static int __init early_parse_topology(char *p)
326
{
H
Heiko Carstens 已提交
327
	if (strncmp(p, "off", 3))
328
		return 0;
H
Heiko Carstens 已提交
329
	topology_enabled = 0;
330
	return 0;
331
}
332
early_param("topology", early_parse_topology);
333

334 335
static void __init alloc_masks(struct sysinfo_15_1_x *info,
			       struct mask_info *mask, int offset)
336 337 338
{
	int i, nr_masks;

339
	nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
340
	for (i = 0; i < info->mnest - offset; i++)
341
		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
342 343 344 345 346 347 348
	nr_masks = max(nr_masks, 1);
	for (i = 0; i < nr_masks; i++) {
		mask->next = alloc_bootmem(sizeof(struct mask_info));
		mask = mask->next;
	}
}

349 350
void __init s390_init_cpu_topology(void)
{
351
	struct sysinfo_15_1_x *info;
352 353
	int i;

354
	if (!MACHINE_HAS_TOPOLOGY)
355 356 357
		return;
	tl_info = alloc_bootmem_pages(PAGE_SIZE);
	info = tl_info;
358
	store_topology(info);
359
	pr_info("The CPU configuration topology of the machine is:");
360
	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
361 362
		printk(KERN_CONT " %d", info->mag[i]);
	printk(KERN_CONT " / %d\n", info->mnest);
363 364
	alloc_masks(info, &core_info, 1);
	alloc_masks(info, &book_info, 2);
365
}
366 367 368

static int cpu_management;

369 370
static ssize_t dispatching_show(struct device *dev,
				struct device_attribute *attr,
371 372 373 374 375 376 377 378 379 380
				char *buf)
{
	ssize_t count;

	mutex_lock(&smp_cpu_state_mutex);
	count = sprintf(buf, "%d\n", cpu_management);
	mutex_unlock(&smp_cpu_state_mutex);
	return count;
}

381 382
static ssize_t dispatching_store(struct device *dev,
				 struct device_attribute *attr,
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
				 const char *buf,
				 size_t count)
{
	int val, rc;
	char delim;

	if (sscanf(buf, "%d %c", &val, &delim) != 1)
		return -EINVAL;
	if (val != 0 && val != 1)
		return -EINVAL;
	rc = 0;
	get_online_cpus();
	mutex_lock(&smp_cpu_state_mutex);
	if (cpu_management == val)
		goto out;
	rc = topology_set_cpu_management(val);
399 400 401 402
	if (rc)
		goto out;
	cpu_management = val;
	topology_expect_change();
403 404 405 406 407
out:
	mutex_unlock(&smp_cpu_state_mutex);
	put_online_cpus();
	return rc ? rc : count;
}
408
static DEVICE_ATTR(dispatching, 0644, dispatching_show,
409 410
			 dispatching_store);

411 412
static ssize_t cpu_polarization_show(struct device *dev,
				     struct device_attribute *attr, char *buf)
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
{
	int cpu = dev->id;
	ssize_t count;

	mutex_lock(&smp_cpu_state_mutex);
	switch (cpu_read_polarization(cpu)) {
	case POLARIZATION_HRZ:
		count = sprintf(buf, "horizontal\n");
		break;
	case POLARIZATION_VL:
		count = sprintf(buf, "vertical:low\n");
		break;
	case POLARIZATION_VM:
		count = sprintf(buf, "vertical:medium\n");
		break;
	case POLARIZATION_VH:
		count = sprintf(buf, "vertical:high\n");
		break;
	default:
		count = sprintf(buf, "unknown\n");
		break;
	}
	mutex_unlock(&smp_cpu_state_mutex);
	return count;
}
438
static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
439 440

static struct attribute *topology_cpu_attrs[] = {
441
	&dev_attr_polarization.attr,
442 443 444 445 446 447 448 449 450
	NULL,
};

static struct attribute_group topology_cpu_attr_group = {
	.attrs = topology_cpu_attrs,
};

int topology_cpu_init(struct cpu *cpu)
{
451
	return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
452 453 454 455 456 457 458 459 460 461 462
}

static int __init topology_init(void)
{
	if (!MACHINE_HAS_TOPOLOGY) {
		topology_update_polarization_simple();
		goto out;
	}
	set_topology_timer();
out:
	update_cpu_core_map();
463
	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
464 465
}
device_initcall(topology_init);