xpc_partition.c 13.0 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
27
#include "xpc.h"
28 29 30 31

/* XPC is exiting flag */
int xpc_exiting;

32
/* this partition's reserved page pointers */
33
struct xpc_rsvd_page *xpc_rsvd_page;
34
static u64 *xpc_part_nasids;
35
u64 *xpc_mach_nasids;
36

37 38
static int xpc_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
int xpc_nasid_mask_words;	/* actual size in words of nasid mask */
39

40
struct xpc_partition *xpc_partitions;
41

42 43 44
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
45
void *
46 47 48 49
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
50
	if (*base == NULL)
51
		return NULL;
52 53

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
54
		return *base;
55

56 57 58 59
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
60
	if (*base == NULL)
61
		return NULL;
62

63
	return (void *)L1_CACHE_ALIGN((u64)*base);
64 65
}

66 67 68 69 70
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
static u64
71
xpc_get_rsvd_page_pa(int nasid)
72
{
73
	enum xp_retval ret;
74 75 76 77
	s64 status;
	u64 cookie = 0;
	u64 rp_pa = nasid;	/* seed with nasid */
	u64 len = 0;
78 79 80
	u64 buf = buf;
	u64 buf_len = 0;
	void *buf_base = NULL;
81 82 83 84

	while (1) {

		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
85
						       &len);
86 87 88 89 90

		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
			status, cookie, rp_pa, len);

91
		if (status != SALRET_MORE_PASSES)
92 93
			break;

94
		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
95
		if (L1_CACHE_ALIGN(len) > buf_len) {
96
			kfree(buf_base);
97
			buf_len = L1_CACHE_ALIGN(len);
98 99 100
			buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
								 GFP_KERNEL,
								 &buf_base);
101 102 103 104 105 106
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
					"len=0x%016lx\n", buf_len);
				status = SALRET_ERROR;
				break;
			}
107 108
		}

109 110 111
		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
112 113 114 115 116
			status = SALRET_ERROR;
			break;
		}
	}

117
	kfree(buf_base);
118

119
	if (status != SALRET_OK)
120
		rp_pa = 0;
121

122 123 124 125 126 127 128 129 130 131
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
132
xpc_setup_rsvd_page(void)
133 134
{
	struct xpc_rsvd_page *rp;
135
	u64 rp_pa;
136
	unsigned long new_stamp;
137 138 139

	/* get the local reserved page's address */

140 141 142
	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
	preempt_enable();
143 144 145 146
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
147
	rp = (struct xpc_rsvd_page *)__va(rp_pa);
148

149 150 151 152 153 154 155 156 157 158
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
	BUG_ON(rp->SAL_partid != sn_partition_id);

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
159 160 161 162
		return NULL;
	}

	rp->version = XPC_RP_VERSION;
163
	rp->max_npartitions = xp_max_npartitions;
164

165 166 167
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
168
		rp->SAL_nasids_size = 128;
169
	}
170 171 172
	xpc_sizeof_nasid_mask = rp->SAL_nasids_size;
	xpc_nasid_mask_words = DIV_ROUND_UP(xpc_sizeof_nasid_mask,
					    BYTES_PER_WORD);
173 174 175 176

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
177

178 179
	if (xpc_rsvd_page_init(rp) != xpSuccess)
		return NULL;
180 181

	/*
182
	 * Set timestamp of when reserved page was setup by XPC.
183 184 185
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
186 187 188 189
	new_stamp = jiffies;
	if (new_stamp == 0 || new_stamp == rp->stamp)
		new_stamp++;
	rp->stamp = new_stamp;
190 191 192 193 194

	return rp;
}

/*
195
 * Get a copy of a portion of the remote partition's rsvd page.
196 197
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
198 199
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
200
 */
201
enum xp_retval
202
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
203
		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
204
{
205 206
	int i;
	enum xp_retval ret;
207 208 209

	/* get the reserved page's physical address */

210
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
211
	if (*remote_rp_pa == 0)
212
		return xpNoRsvdPageAddr;
213

214
	/* pull over the reserved page header and part_nasids mask */
215
	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
216
			       XPC_RP_HEADER_SIZE + xpc_sizeof_nasid_mask);
217 218
	if (ret != xpSuccess)
		return ret;
219 220

	if (discovered_nasids != NULL) {
221 222
		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);

223
		for (i = 0; i < xpc_nasid_mask_words; i++)
224
			discovered_nasids[i] |= remote_part_nasids[i];
225 226
	}

227 228
	/* see if the reserved page has been set up by XPC */
	if (remote_rp->stamp == 0)
229 230
		return xpRsvdPageNotSet;

231
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
232
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
233
		return xpBadVersion;
234 235
	}

236
	/* check that both remote and local partids are valid for each side */
237 238 239
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions ||
	    remote_rp->max_npartitions <= sn_partition_id) {
240
		return xpInvalidPartid;
241 242 243 244
	}

	if (remote_rp->SAL_partid == sn_partition_id)
		return xpLocalPartid;
245

246
	return xpSuccess;
247 248
}

249
/*
250 251 252
 * See if the other side has responded to a partition deactivate request
 * from us. Though we requested the remote partition to deactivate with regard
 * to us, we really only need to wait for the other side to disengage from us.
253 254 255 256
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
257
	short partid = XPC_PARTID(part);
258 259
	int disengaged;

260 261
	disengaged = !xpc_partition_engaged(partid);
	if (part->disengage_timeout) {
262
		if (!disengaged) {
263
			if (time_is_after_jiffies(part->disengage_timeout)) {
264 265 266 267 268
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
269
			 * Other side hasn't responded to our deactivate
270 271 272
			 * request in a timely fashion, so assume it's dead.
			 */

273 274 275 276
			dev_info(xpc_part, "deactivate request to remote "
				 "partition %d timed out\n", partid);
			xpc_disengage_timedout = 1;
			xpc_assume_partition_disengaged(partid);
277 278
			disengaged = 1;
		}
279
		part->disengage_timeout = 0;
280 281

		/* cancel the timer function, provided it's not us */
282 283
		if (!in_interrupt())
			del_singleshot_timer_sync(&part->disengage_timer);
284 285

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
286
			part->act_state != XPC_P_INACTIVE);
287
		if (part->act_state != XPC_P_INACTIVE)
288 289
			xpc_wakeup_channel_mgr(part);

290
		xpc_cancel_partition_deactivation_request(part);
291 292 293 294
	}
	return disengaged;
}

295 296 297
/*
 * Mark specified partition as active.
 */
298
enum xp_retval
299 300 301
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
302
	enum xp_retval ret;
303 304 305 306 307 308

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
309
		ret = xpSuccess;
310
	} else {
311
		DBUG_ON(part->reason == xpSuccess);
312 313 314 315 316 317 318 319
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
320
 * Start the process of deactivating the specified partition.
321 322 323
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
324
			 enum xp_retval reason)
325 326 327 328 329 330 331 332
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
333
		if (reason == xpReactivating) {
334
			/* we interrupt ourselves to reactivate partition */
335
			xpc_request_partition_reactivation(part);
336 337 338 339
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
340 341
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
342 343 344 345 346 347 348 349 350 351 352
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

353 354
	/* ask remote partition to deactivate with regard to us */
	xpc_request_partition_deactivation(part);
355

356 357 358 359
	/* set a timelimit on the disengage phase of the deactivation request */
	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
	part->disengage_timer.expires = part->disengage_timeout;
	add_timer(&part->disengage_timer);
360

361 362
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
363

364
	xpc_partition_going_down(part, reason);
365 366 367
}

/*
368
 * Mark specified partition as inactive.
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
398
	u64 remote_rp_pa;
399
	int region;
400
	int region_size;
401 402 403 404
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
	u64 *discovered_nasids;
405
	enum xp_retval ret;
406

407
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
408
						  xpc_sizeof_nasid_mask,
409
						  GFP_KERNEL, &remote_rp_base);
410
	if (remote_rp == NULL)
411
		return;
412

413
	discovered_nasids = kzalloc(sizeof(u64) * xpc_nasid_mask_words,
414
				    GFP_KERNEL);
415 416 417 418 419
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

420
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
421 422 423 424 425 426

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
427 428 429 430 431 432 433 434 435 436 437 438 439
	max_regions = 64;
	region_size = sn_region_size;

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
440 441 442

	for (region = 0; region < max_regions; region++) {

443
		if (xpc_exiting)
444 445 446 447
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

448
		for (nasid = (region * region_size * 2);
449
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
450

451
			if (xpc_exiting)
452 453 454 455
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

456
			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
457 458 459 460 461 462
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

463
			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
464 465 466 467 468 469 470 471 472 473 474 475 476
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

477
			/* pull over the rsvd page header & part_nasids mask */
478 479

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
480
						remote_rp, &remote_rp_pa);
481
			if (ret != xpSuccess) {
482 483 484 485
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

486
				if (ret == xpLocalPartid)
487
					break;
488

489 490 491
				continue;
			}

492 493
			xpc_request_partition_activation(remote_rp,
							 remote_rp_pa, nasid);
494 495 496 497 498 499 500 501 502
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
503
 * remote partition's reserved page.
504
 */
505
enum xp_retval
506
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
507 508 509 510 511
{
	struct xpc_partition *part;
	u64 part_nasid_pa;

	part = &xpc_partitions[partid];
512
	if (part->remote_rp_pa == 0)
513
		return xpPartitionDown;
514

515
	memset(nasid_mask, 0, xpc_sizeof_nasid_mask);
516

517
	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
518

519
	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
520
				xpc_sizeof_nasid_mask);
521
}