xpc_partition.c 13.6 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

18 19
#include <linux/device.h>
#include <linux/hardirq.h>
20
#include <linux/slab.h>
21
#include "xpc.h"
22
#include <asm/uv/uv_hub.h>
23 24 25 26

/* XPC is exiting flag */
int xpc_exiting;

27
/* this partition's reserved page pointers */
28
struct xpc_rsvd_page *xpc_rsvd_page;
29 30
static unsigned long *xpc_part_nasids;
unsigned long *xpc_mach_nasids;
31

32 33
static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
34

35
struct xpc_partition *xpc_partitions;
36

37 38 39
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
40
void *
41 42 43 44
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
45
	if (*base == NULL)
46
		return NULL;
47 48

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
49
		return *base;
50

51 52 53 54
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
55
	if (*base == NULL)
56
		return NULL;
57

58
	return (void *)L1_CACHE_ALIGN((u64)*base);
59 60
}

61 62 63 64
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
65
static unsigned long
66
xpc_get_rsvd_page_pa(int nasid)
67
{
68
	enum xp_retval ret;
69
	u64 cookie = 0;
70
	unsigned long rp_pa = nasid;	/* seed with nasid */
71
	size_t len = 0;
72 73
	size_t buf_len = 0;
	void *buf = buf;
74
	void *buf_base = NULL;
R
Robin Holt 已提交
75 76 77
	enum xp_retval (*get_partition_rsvd_page_pa)
		(void *, u64 *, unsigned long *, size_t *) =
		xpc_arch_ops.get_partition_rsvd_page_pa;
78 79 80

	while (1) {

81 82 83 84 85 86
		/* !!! rp_pa will need to be _gpa on UV.
		 * ??? So do we save it into the architecture specific parts
		 * ??? of the xpc_partition structure? Do we rename this
		 * ??? function or have two versions? Rename rp_pa for UV to
		 * ??? rp_gpa?
		 */
R
Robin Holt 已提交
87
		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
88

89 90
		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
			"address=0x%016lx, len=0x%016lx\n", ret,
91
			(unsigned long)cookie, rp_pa, len);
92

93
		if (ret != xpNeedMoreInfo)
94 95
			break;

96
		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
97 98 99 100 101 102
		if (is_shub())
			len = L1_CACHE_ALIGN(len);

		if (len > buf_len) {
			if (buf_base != NULL)
				kfree(buf_base);
103
			buf_len = L1_CACHE_ALIGN(len);
104 105
			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
							    &buf_base);
106 107
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
108
					"len=0x%016lx\n", buf_len);
109
				ret = xpNoMemory;
110 111
				break;
			}
112 113
		}

114
		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
115 116
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
117 118 119 120
			break;
		}
	}

121
	kfree(buf_base);
122

123
	if (ret != xpSuccess)
124
		rp_pa = 0;
125

126
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
127 128 129 130 131 132 133 134
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
135
int
136
xpc_setup_rsvd_page(void)
137
{
138
	int ret;
139
	struct xpc_rsvd_page *rp;
140
	unsigned long rp_pa;
141
	unsigned long new_ts_jiffies;
142 143 144

	/* get the local reserved page's address */

145
	preempt_disable();
146
	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
147
	preempt_enable();
148 149
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
150
		return -ESRCH;
151
	}
152
	rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
153

154 155 156 157
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
158
	BUG_ON(rp->SAL_partid != xp_partition_id);
159 160 161 162 163

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
164
		return -EINVAL;
165 166 167
	}

	rp->version = XPC_RP_VERSION;
168
	rp->max_npartitions = xp_max_npartitions;
169

170 171 172
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
173
		rp->SAL_nasids_size = 128;
174
	}
175 176 177
	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
					      BITS_PER_BYTE);
178 179 180 181

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
182

R
Robin Holt 已提交
183
	ret = xpc_arch_ops.setup_rsvd_page(rp);
184 185
	if (ret != 0)
		return ret;
186 187

	/*
188
	 * Set timestamp of when reserved page was setup by XPC.
189 190 191
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
192 193 194 195
	new_ts_jiffies = jiffies;
	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
		new_ts_jiffies++;
	rp->ts_jiffies = new_ts_jiffies;
196

197 198 199 200 201 202 203 204 205
	xpc_rsvd_page = rp;
	return 0;
}

void
xpc_teardown_rsvd_page(void)
{
	/* a zero timestamp indicates our rsvd page is not initialized */
	xpc_rsvd_page->ts_jiffies = 0;
206 207 208
}

/*
209
 * Get a copy of a portion of the remote partition's rsvd page.
210 211
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
212 213
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
214
 */
215
enum xp_retval
216
xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
217
		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
218
{
219
	int l;
220
	enum xp_retval ret;
221 222 223

	/* get the reserved page's physical address */

224
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
225
	if (*remote_rp_pa == 0)
226
		return xpNoRsvdPageAddr;
227

228
	/* pull over the reserved page header and part_nasids mask */
229
	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
230
			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
231 232
	if (ret != xpSuccess)
		return ret;
233 234

	if (discovered_nasids != NULL) {
235 236
		unsigned long *remote_part_nasids =
		    XPC_RP_PART_NASIDS(remote_rp);
237

238 239
		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
			discovered_nasids[l] |= remote_part_nasids[l];
240 241
	}

242 243
	/* zero timestamp indicates the reserved page has not been setup */
	if (remote_rp->ts_jiffies == 0)
244 245
		return xpRsvdPageNotSet;

246
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
247
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
248
		return xpBadVersion;
249 250
	}

251
	/* check that both remote and local partids are valid for each side */
252 253
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions ||
254
	    remote_rp->max_npartitions <= xp_partition_id) {
255
		return xpInvalidPartid;
256 257
	}

258
	if (remote_rp->SAL_partid == xp_partition_id)
259
		return xpLocalPartid;
260

261
	return xpSuccess;
262 263
}

264
/*
265 266 267
 * See if the other side has responded to a partition deactivate request
 * from us. Though we requested the remote partition to deactivate with regard
 * to us, we really only need to wait for the other side to disengage from us.
268 269 270 271
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
272
	short partid = XPC_PARTID(part);
273 274
	int disengaged;

R
Robin Holt 已提交
275
	disengaged = !xpc_arch_ops.partition_engaged(partid);
276
	if (part->disengage_timeout) {
277
		if (!disengaged) {
278
			if (time_is_after_jiffies(part->disengage_timeout)) {
279 280 281 282 283
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
284
			 * Other side hasn't responded to our deactivate
285 286 287
			 * request in a timely fashion, so assume it's dead.
			 */

288 289 290
			dev_info(xpc_part, "deactivate request to remote "
				 "partition %d timed out\n", partid);
			xpc_disengage_timedout = 1;
R
Robin Holt 已提交
291
			xpc_arch_ops.assume_partition_disengaged(partid);
292 293
			disengaged = 1;
		}
294
		part->disengage_timeout = 0;
295 296

		/* cancel the timer function, provided it's not us */
297 298
		if (!in_interrupt())
			del_singleshot_timer_sync(&part->disengage_timer);
299

300 301 302
		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
			part->act_state != XPC_P_AS_INACTIVE);
		if (part->act_state != XPC_P_AS_INACTIVE)
303 304
			xpc_wakeup_channel_mgr(part);

R
Robin Holt 已提交
305
		xpc_arch_ops.cancel_partition_deactivation_request(part);
306 307 308 309
	}
	return disengaged;
}

310 311 312
/*
 * Mark specified partition as active.
 */
313
enum xp_retval
314 315 316
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
317
	enum xp_retval ret;
318 319 320 321

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
322 323
	if (part->act_state == XPC_P_AS_ACTIVATING) {
		part->act_state = XPC_P_AS_ACTIVE;
324
		ret = xpSuccess;
325
	} else {
326
		DBUG_ON(part->reason == xpSuccess);
327 328 329 330 331 332 333 334
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
335
 * Start the process of deactivating the specified partition.
336 337 338
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
339
			 enum xp_retval reason)
340 341 342 343 344
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

345
	if (part->act_state == XPC_P_AS_INACTIVE) {
346 347
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
348
		if (reason == xpReactivating) {
349
			/* we interrupt ourselves to reactivate partition */
R
Robin Holt 已提交
350
			xpc_arch_ops.request_partition_reactivation(part);
351 352 353
		}
		return;
	}
354
	if (part->act_state == XPC_P_AS_DEACTIVATING) {
355 356
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
357 358 359 360 361 362
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

363
	part->act_state = XPC_P_AS_DEACTIVATING;
364 365 366 367
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

368
	/* ask remote partition to deactivate with regard to us */
R
Robin Holt 已提交
369
	xpc_arch_ops.request_partition_deactivation(part);
370

371 372 373 374
	/* set a timelimit on the disengage phase of the deactivation request */
	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
	part->disengage_timer.expires = part->disengage_timeout;
	add_timer(&part->disengage_timer);
375

376 377
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
378

379
	xpc_partition_going_down(part, reason);
380 381 382
}

/*
383
 * Mark specified partition as inactive.
384 385 386 387 388 389 390 391 392 393
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
394
	part->act_state = XPC_P_AS_INACTIVE;
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
413
	unsigned long remote_rp_pa;
414
	int region;
415
	int region_size;
416 417 418
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
419
	unsigned long *discovered_nasids;
420
	enum xp_retval ret;
421

422
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
423
						  xpc_nasid_mask_nbytes,
424
						  GFP_KERNEL, &remote_rp_base);
425
	if (remote_rp == NULL)
426
		return;
427

428
	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
429
				    GFP_KERNEL);
430 431 432 433 434
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

435
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
436 437 438 439 440 441

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
442
	max_regions = 64;
443
	region_size = xp_region_size;
444 445 446 447 448 449 450 451 452 453 454

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
455 456 457

	for (region = 0; region < max_regions; region++) {

458
		if (xpc_exiting)
459 460 461 462
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

463
		for (nasid = (region * region_size * 2);
464
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
465

466
			if (xpc_exiting)
467 468 469 470
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

471
			if (test_bit(nasid / 2, xpc_part_nasids)) {
472 473 474 475 476 477
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

478
			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
479 480 481 482 483 484
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

485
			if (test_bit(nasid / 2, discovered_nasids)) {
486 487 488 489 490 491
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

492
			/* pull over the rsvd page header & part_nasids mask */
493 494

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
495
						remote_rp, &remote_rp_pa);
496
			if (ret != xpSuccess) {
497 498 499 500
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

501
				if (ret == xpLocalPartid)
502
					break;
503

504 505 506
				continue;
			}

R
Robin Holt 已提交
507
			xpc_arch_ops.request_partition_activation(remote_rp,
508
							 remote_rp_pa, nasid);
509 510 511 512 513 514 515 516 517
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
518
 * remote partition's reserved page.
519
 */
520
enum xp_retval
521
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
522 523
{
	struct xpc_partition *part;
524
	unsigned long part_nasid_pa;
525 526

	part = &xpc_partitions[partid];
527
	if (part->remote_rp_pa == 0)
528
		return xpPartitionDown;
529

530
	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
531

532
	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
533

534
	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
535
				xpc_nasid_mask_nbytes);
536
}