xpc_partition.c 13.3 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
27
#include "xpc.h"
28 29 30 31

/* XPC is exiting flag */
int xpc_exiting;

32
/* this partition's reserved page pointers */
33
struct xpc_rsvd_page *xpc_rsvd_page;
34
static u64 *xpc_part_nasids;
35
u64 *xpc_mach_nasids;
36

37 38 39
/* >>> next two variables should be 'xpc_' if they remain here */
static int xp_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
int xp_nasid_mask_words;	/* actual size in words of nasid mask */
40

41
struct xpc_partition *xpc_partitions;
42 43

/*
44 45 46
 * Generic buffer used to store a local copy of portions of a remote
 * partition's reserved page (either its header and part_nasids mask,
 * or its vars).
47
 */
48 49
char *xpc_remote_copy_buffer;
void *xpc_remote_copy_buffer_base;
50

51 52 53
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
54
void *
55 56 57 58
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
59
	if (*base == NULL)
60
		return NULL;
61 62

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
63
		return *base;
64

65 66 67 68
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
69
	if (*base == NULL)
70
		return NULL;
71

72
	return (void *)L1_CACHE_ALIGN((u64)*base);
73 74
}

75 76 77 78 79
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
static u64
80
xpc_get_rsvd_page_pa(int nasid)
81
{
82
	enum xp_retval ret;
83 84 85 86
	s64 status;
	u64 cookie = 0;
	u64 rp_pa = nasid;	/* seed with nasid */
	u64 len = 0;
87 88 89
	u64 buf = buf;
	u64 buf_len = 0;
	void *buf_base = NULL;
90 91 92 93

	while (1) {

		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
94
						       &len);
95 96 97 98 99

		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
			status, cookie, rp_pa, len);

100
		if (status != SALRET_MORE_PASSES)
101 102
			break;

103
		/* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
104
		if (L1_CACHE_ALIGN(len) > buf_len) {
105
			kfree(buf_base);
106
			buf_len = L1_CACHE_ALIGN(len);
107 108 109
			buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
								 GFP_KERNEL,
								 &buf_base);
110 111 112 113 114 115
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
					"len=0x%016lx\n", buf_len);
				status = SALRET_ERROR;
				break;
			}
116 117
		}

118 119 120
		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
121 122 123 124 125
			status = SALRET_ERROR;
			break;
		}
	}

126
	kfree(buf_base);
127

128
	if (status != SALRET_OK)
129
		rp_pa = 0;
130

131 132 133 134 135 136 137 138 139 140
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
141
xpc_setup_rsvd_page(void)
142 143
{
	struct xpc_rsvd_page *rp;
144
	u64 rp_pa;
145
	unsigned long new_stamp;
146 147 148

	/* get the local reserved page's address */

149 150 151
	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
	preempt_enable();
152 153 154 155
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
156
	rp = (struct xpc_rsvd_page *)__va(rp_pa);
157

158 159 160 161 162 163 164 165 166 167
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
	BUG_ON(rp->SAL_partid != sn_partition_id);

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
168 169 170 171
		return NULL;
	}

	rp->version = XPC_RP_VERSION;
172
	rp->max_npartitions = xp_max_npartitions;
173

174 175 176
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
177
		rp->SAL_nasids_size = 128;
178
	}
179 180 181
	xp_sizeof_nasid_mask = rp->SAL_nasids_size;
	xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
					   BYTES_PER_WORD);
182 183 184 185

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
186

187 188
	if (xpc_rsvd_page_init(rp) != xpSuccess)
		return NULL;
189 190

	/*
191
	 * Set timestamp of when reserved page was setup by XPC.
192 193 194
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
195 196 197 198
	new_stamp = jiffies;
	if (new_stamp == 0 || new_stamp == rp->stamp)
		new_stamp++;
	rp->stamp = new_stamp;
199 200 201 202 203

	return rp;
}

/*
204
 * Get a copy of a portion of the remote partition's rsvd page.
205 206
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
207 208
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
209
 */
210
enum xp_retval
211
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
212
		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
213
{
214 215
	int i;
	enum xp_retval ret;
216 217 218

	/* get the reserved page's physical address */

219
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
220
	if (*remote_rp_pa == 0)
221
		return xpNoRsvdPageAddr;
222

223
	/* pull over the reserved page header and part_nasids mask */
224
	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
225
			       XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
226 227
	if (ret != xpSuccess)
		return ret;
228 229

	if (discovered_nasids != NULL) {
230 231
		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);

232
		for (i = 0; i < xp_nasid_mask_words; i++)
233
			discovered_nasids[i] |= remote_part_nasids[i];
234 235
	}

236 237
	/* see if the reserved page has been set up by XPC */
	if (remote_rp->stamp == 0)
238 239
		return xpRsvdPageNotSet;

240
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
241
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
242
		return xpBadVersion;
243 244
	}

245
	/* check that both remote and local partids are valid for each side */
246 247 248
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions ||
	    remote_rp->max_npartitions <= sn_partition_id) {
249
		return xpInvalidPartid;
250 251 252 253
	}

	if (remote_rp->SAL_partid == sn_partition_id)
		return xpLocalPartid;
254

255
	return xpSuccess;
256 257
}

258
/*
259 260 261
 * See if the other side has responded to a partition deactivate request
 * from us. Though we requested the remote partition to deactivate with regard
 * to us, we really only need to wait for the other side to disengage from us.
262 263 264 265
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
266
	short partid = XPC_PARTID(part);
267 268
	int disengaged;

269 270
	disengaged = !xpc_partition_engaged(partid);
	if (part->disengage_timeout) {
271
		if (!disengaged) {
272
			if (time_is_after_jiffies(part->disengage_timeout)) {
273 274 275 276 277
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
278
			 * Other side hasn't responded to our deactivate
279 280 281
			 * request in a timely fashion, so assume it's dead.
			 */

282 283 284 285
			dev_info(xpc_part, "deactivate request to remote "
				 "partition %d timed out\n", partid);
			xpc_disengage_timedout = 1;
			xpc_assume_partition_disengaged(partid);
286 287
			disengaged = 1;
		}
288
		part->disengage_timeout = 0;
289 290

		/* cancel the timer function, provided it's not us */
291 292
		if (!in_interrupt())
			del_singleshot_timer_sync(&part->disengage_timer);
293 294

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
295
			part->act_state != XPC_P_INACTIVE);
296
		if (part->act_state != XPC_P_INACTIVE)
297 298
			xpc_wakeup_channel_mgr(part);

299
		xpc_cancel_partition_deactivation_request(part);
300 301 302 303
	}
	return disengaged;
}

304 305 306
/*
 * Mark specified partition as active.
 */
307
enum xp_retval
308 309 310
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
311
	enum xp_retval ret;
312 313 314 315 316 317

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
318
		ret = xpSuccess;
319
	} else {
320
		DBUG_ON(part->reason == xpSuccess);
321 322 323 324 325 326 327 328
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
329
 * Start the process of deactivating the specified partition.
330 331 332
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
333
			 enum xp_retval reason)
334 335 336 337 338 339 340 341
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
342
		if (reason == xpReactivating) {
343
			/* we interrupt ourselves to reactivate partition */
344
			xpc_request_partition_reactivation(part);
345 346 347 348
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
349 350
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
351 352 353 354 355 356 357 358 359 360 361
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

362 363
	/* ask remote partition to deactivate with regard to us */
	xpc_request_partition_deactivation(part);
364

365 366 367 368
	/* set a timelimit on the disengage phase of the deactivation request */
	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
	part->disengage_timer.expires = part->disengage_timeout;
	add_timer(&part->disengage_timer);
369

370 371
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
372

373
	xpc_partition_going_down(part, reason);
374 375 376
}

/*
377
 * Mark specified partition as inactive.
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
407
	u64 remote_rp_pa;
408
	int region;
409
	int region_size;
410 411 412 413
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
	u64 *discovered_nasids;
414
	enum xp_retval ret;
415

416
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
417
						  xp_sizeof_nasid_mask,
418
						  GFP_KERNEL, &remote_rp_base);
419
	if (remote_rp == NULL)
420
		return;
421

422
	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
423
				    GFP_KERNEL);
424 425 426 427 428
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

429
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
430 431 432 433 434 435

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
436 437 438 439 440 441 442 443 444 445 446 447 448
	max_regions = 64;
	region_size = sn_region_size;

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
449 450 451

	for (region = 0; region < max_regions; region++) {

452
		if (xpc_exiting)
453 454 455 456
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

457
		for (nasid = (region * region_size * 2);
458
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
459

460
			if (xpc_exiting)
461 462 463 464
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

465
			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
466 467 468 469 470 471
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

472
			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
473 474 475 476 477 478 479 480 481 482 483 484 485
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

486
			/* pull over the rsvd page header & part_nasids mask */
487 488

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
489
						remote_rp, &remote_rp_pa);
490
			if (ret != xpSuccess) {
491 492 493 494
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

495
				if (ret == xpLocalPartid)
496
					break;
497

498 499 500
				continue;
			}

501 502
			xpc_request_partition_activation(remote_rp,
							 remote_rp_pa, nasid);
503 504 505 506 507 508 509 510 511
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
512
 * remote partition's reserved page.
513
 */
514
enum xp_retval
515
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
516 517 518 519 520
{
	struct xpc_partition *part;
	u64 part_nasid_pa;

	part = &xpc_partitions[partid];
521
	if (part->remote_rp_pa == 0)
522
		return xpPartitionDown;
523

524 525
	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);

526
	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
527

528
	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
529
				xp_sizeof_nasid_mask);
530
}