xpc_partition.c 28.6 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
27
#include "xpc.h"
28 29 30 31 32 33 34 35 36 37 38 39

/* XPC is exiting flag */
int xpc_exiting;

/* SH_IPI_ACCESS shub register value on startup */
static u64 xpc_sh1_IPI_access;
static u64 xpc_sh2_IPI_access0;
static u64 xpc_sh2_IPI_access1;
static u64 xpc_sh2_IPI_access2;
static u64 xpc_sh2_IPI_access3;

/* original protection values for each node */
40
u64 xpc_prot_vec[MAX_NUMNODES];
41

42
/* this partition's reserved page pointers */
43
struct xpc_rsvd_page *xpc_rsvd_page;
44 45
static u64 *xpc_part_nasids;
static u64 *xpc_mach_nasids;
46

47 48 49
/* >>> next two variables should be 'xpc_' if they remain here */
static int xp_sizeof_nasid_mask;	/* actual size in bytes of nasid mask */
int xp_nasid_mask_words;	/* actual size in words of nasid mask */
50

51
struct xpc_partition *xpc_partitions;
52 53

/*
54 55 56
 * Generic buffer used to store a local copy of portions of a remote
 * partition's reserved page (either its header and part_nasids mask,
 * or its vars).
57
 */
58 59
char *xpc_remote_copy_buffer;
void *xpc_remote_copy_buffer_base;
60

61 62 63
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
64
void *
65 66 67 68
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
69
	if (*base == NULL)
70
		return NULL;
71 72

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
73
		return *base;
74

75 76 77 78
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
79
	if (*base == NULL)
80
		return NULL;
81

82
	return (void *)L1_CACHE_ALIGN((u64)*base);
83 84
}

85 86 87 88 89
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
static u64
90
xpc_get_rsvd_page_pa(int nasid)
91
{
92
	enum xp_retval ret;
93 94 95 96
	s64 status;
	u64 cookie = 0;
	u64 rp_pa = nasid;	/* seed with nasid */
	u64 len = 0;
97 98 99
	u64 buf = buf;
	u64 buf_len = 0;
	void *buf_base = NULL;
100 101 102 103

	while (1) {

		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
104
						       &len);
105 106 107 108 109

		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
			status, cookie, rp_pa, len);

110
		if (status != SALRET_MORE_PASSES)
111 112
			break;

113
		/* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
114
		if (L1_CACHE_ALIGN(len) > buf_len) {
115
			kfree(buf_base);
116
			buf_len = L1_CACHE_ALIGN(len);
117 118 119
			buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
								 GFP_KERNEL,
								 &buf_base);
120 121 122 123 124 125
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
					"len=0x%016lx\n", buf_len);
				status = SALRET_ERROR;
				break;
			}
126 127
		}

128 129 130
		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
131 132 133 134 135
			status = SALRET_ERROR;
			break;
		}
	}

136
	kfree(buf_base);
137

138
	if (status != SALRET_OK)
139
		rp_pa = 0;
140

141 142 143 144 145 146 147 148 149 150
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
151
xpc_setup_rsvd_page(void)
152 153
{
	struct xpc_rsvd_page *rp;
154
	u64 rp_pa;
155 156 157

	/* get the local reserved page's address */

158 159 160
	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
	preempt_enable();
161 162 163 164
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
165
	rp = (struct xpc_rsvd_page *)__va(rp_pa);
166

167 168 169 170 171 172 173 174 175 176
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
	BUG_ON(rp->SAL_partid != sn_partition_id);

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
177 178 179 180
		return NULL;
	}

	rp->version = XPC_RP_VERSION;
181
	rp->max_npartitions = xp_max_npartitions;
182

183 184 185
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
186
		rp->SAL_nasids_size = 128;
187
	}
188 189 190
	xp_sizeof_nasid_mask = rp->SAL_nasids_size;
	xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
					   BYTES_PER_WORD);
191 192 193 194

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
195

196 197
	if (xpc_rsvd_page_init(rp) != xpSuccess)
		return NULL;
198 199

	/*
200
	 * Set timestamp of when reserved page was setup by XPC.
201 202 203
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
204
	rp->stamp = CURRENT_TIME;
205 206 207 208 209 210 211 212 213 214 215 216 217 218

	return rp;
}

/*
 * Change protections to allow IPI operations (and AMO operations on
 * Shub 1.1 systems).
 */
void
xpc_allow_IPI_ops(void)
{
	int node;
	int nasid;

219
	/* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
220 221 222

	if (is_shub2()) {
		xpc_sh2_IPI_access0 =
223
		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
224
		xpc_sh2_IPI_access1 =
225
		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
226
		xpc_sh2_IPI_access2 =
227
		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
228
		xpc_sh2_IPI_access3 =
229
		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
230 231 232

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
233 234 235 236 237 238 239 240
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
			      -1UL);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
			      -1UL);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
			      -1UL);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
			      -1UL);
241 242 243 244
		}

	} else {
		xpc_sh1_IPI_access =
245
		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
246 247 248

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
249 250
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
			      -1UL);
251 252 253 254 255 256 257

			/*
			 * Since the BIST collides with memory operations on
			 * SHUB 1.1 sn_change_memprotect() cannot be used.
			 */
			if (enable_shub_wars_1_1()) {
				/* open up everything */
258 259 260
				xpc_prot_vec[node] = (u64)HUB_L((u64 *)
								GLOBAL_MMR_ADDR
								(nasid,
261
						  SH1_MD_DQLP_MMR_DIR_PRIVEC0));
262 263
				HUB_S((u64 *)
				      GLOBAL_MMR_ADDR(nasid,
264
						   SH1_MD_DQLP_MMR_DIR_PRIVEC0),
265 266 267
				      -1UL);
				HUB_S((u64 *)
				      GLOBAL_MMR_ADDR(nasid,
268
						   SH1_MD_DQRP_MMR_DIR_PRIVEC0),
269
				      -1UL);
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
			}
		}
	}
}

/*
 * Restrict protections to disallow IPI operations (and AMO operations on
 * Shub 1.1 systems).
 */
void
xpc_restrict_IPI_ops(void)
{
	int node;
	int nasid;

285
	/* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
286 287 288 289 290

	if (is_shub2()) {

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
291 292 293 294 295 296 297 298
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
			      xpc_sh2_IPI_access0);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
			      xpc_sh2_IPI_access1);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
			      xpc_sh2_IPI_access2);
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
			      xpc_sh2_IPI_access3);
299 300 301 302 303 304
		}

	} else {

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
305 306
			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
			      xpc_sh1_IPI_access);
307 308

			if (enable_shub_wars_1_1()) {
309
				HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
310
						   SH1_MD_DQLP_MMR_DIR_PRIVEC0),
311 312
				      xpc_prot_vec[node]);
				HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
313
						   SH1_MD_DQRP_MMR_DIR_PRIVEC0),
314
				      xpc_prot_vec[node]);
315 316 317 318 319 320 321 322 323 324 325 326 327 328
			}
		}
	}
}

/*
 * At periodic intervals, scan through all active partitions and ensure
 * their heartbeat is still active.  If not, the partition is deactivated.
 */
void
xpc_check_remote_hb(void)
{
	struct xpc_vars *remote_vars;
	struct xpc_partition *part;
329
	short partid;
330
	enum xp_retval ret;
331

332
	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
333

334
	for (partid = 0; partid < xp_max_npartitions; partid++) {
335

336
		if (xpc_exiting)
337 338
			break;

339
		if (partid == sn_partition_id)
340 341 342 343 344
			continue;

		part = &xpc_partitions[partid];

		if (part->act_state == XPC_P_INACTIVE ||
345
		    part->act_state == XPC_P_DEACTIVATING) {
346 347 348 349
			continue;
		}

		/* pull the remote_hb cache line */
350 351 352 353 354
		ret = xp_remote_memcpy(remote_vars,
				       (void *)part->remote_vars_pa,
				       XPC_RP_VARS_SIZE);
		if (ret != xpSuccess) {
			XPC_DEACTIVATE_PARTITION(part, ret);
355 356 357 358
			continue;
		}

		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
359 360 361
			" = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
			partid, remote_vars->heartbeat, part->last_heartbeat,
			remote_vars->heartbeat_offline,
362 363 364
			remote_vars->heartbeating_to_mask);

		if (((remote_vars->heartbeat == part->last_heartbeat) &&
365 366
		     (remote_vars->heartbeat_offline == 0)) ||
		    !xpc_hb_allowed(sn_partition_id, remote_vars)) {
367

368
			XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
369 370 371 372 373 374 375 376
			continue;
		}

		part->last_heartbeat = remote_vars->heartbeat;
	}
}

/*
377
 * Get a copy of a portion of the remote partition's rsvd page.
378 379
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
380 381
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
382
 */
383
static enum xp_retval
384
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
385
		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
386
{
387 388
	int i;
	enum xp_retval ret;
389 390 391

	/* get the reserved page's physical address */

392
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
393
	if (*remote_rp_pa == 0)
394
		return xpNoRsvdPageAddr;
395

396
	/* pull over the reserved page header and part_nasids mask */
397
	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
398
			       XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
399 400
	if (ret != xpSuccess)
		return ret;
401 402

	if (discovered_nasids != NULL) {
403 404
		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);

405
		for (i = 0; i < xp_nasid_mask_words; i++)
406
			discovered_nasids[i] |= remote_part_nasids[i];
407 408
	}

409
	/* check that the partid is valid and is for another partition */
410

411 412
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions) {
413
		return xpInvalidPartid;
414
	}
415

416
	if (remote_rp->SAL_partid == sn_partition_id)
417
		return xpLocalPartid;
418

419 420 421 422
	/* see if the rest of the reserved page has been set up by XPC */
	if (timespec_equal(&remote_rp->stamp, &ZERO_STAMP))
		return xpRsvdPageNotSet;

423
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
424
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
425
		return xpBadVersion;
426 427
	}

428 429 430
	if (remote_rp->max_npartitions <= sn_partition_id)
		return xpInvalidPartid;

431
	return xpSuccess;
432 433 434
}

/*
435
 * Get a copy of the remote partition's XPC variables from the reserved page.
436 437
 *
 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
438
 * assumed to be of size XPC_RP_VARS_SIZE.
439
 */
440
static enum xp_retval
441 442
xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
{
443
	enum xp_retval ret;
444

445
	if (remote_vars_pa == 0)
446
		return xpVarsNotSet;
447 448

	/* pull over the cross partition variables */
449 450 451 452
	ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
			       XPC_RP_VARS_SIZE);
	if (ret != xpSuccess)
		return ret;
453 454

	if (XPC_VERSION_MAJOR(remote_vars->version) !=
455
	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
456
		return xpBadVersion;
457 458
	}

459
	return xpSuccess;
460 461 462
}

/*
463 464 465 466
 * Update the remote partition's info.
 */
static void
xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
467 468
			  struct timespec *remote_rp_stamp, u64 remote_rp_pa,
			  u64 remote_vars_pa, struct xpc_vars *remote_vars)
469 470
{
	part->remote_rp_version = remote_rp_version;
T
Tony Luck 已提交
471
	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
		part->remote_rp_version);

	part->remote_rp_stamp = *remote_rp_stamp;
	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);

	part->remote_rp_pa = remote_rp_pa;
	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);

	part->remote_vars_pa = remote_vars_pa;
	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
		part->remote_vars_pa);

	part->last_heartbeat = remote_vars->heartbeat;
	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
		part->last_heartbeat);

489
/* >>> remote_vars_part_pa and vars_part_pa are sn2 only!!! */
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	part->remote_vars_part_pa = remote_vars->vars_part_pa;
	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
		part->remote_vars_part_pa);

	part->remote_act_nasid = remote_vars->act_nasid;
	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
		part->remote_act_nasid);

	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
		part->remote_act_phys_cpuid);

	part->remote_amos_page_pa = remote_vars->amos_page_pa;
	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
		part->remote_amos_page_pa);

	part->remote_vars_version = remote_vars->version;
	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
		part->remote_vars_version);
}

511
/*
512
 * Prior code has determined the nasid which generated an IPI.  Inspect
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
 * that nasid to determine if its partition needs to be activated or
 * deactivated.
 *
 * A partition is consider "awaiting activation" if our partition
 * flags indicate it is not active and it has a heartbeat.  A
 * partition is considered "awaiting deactivation" if our partition
 * flags indicate it is active but it has no heartbeat or it is not
 * sending its heartbeat to us.
 *
 * To determine the heartbeat, the remote nasid must have a properly
 * initialized reserved page.
 */
static void
xpc_identify_act_IRQ_req(int nasid)
{
	struct xpc_rsvd_page *remote_rp;
	struct xpc_vars *remote_vars;
530
	u64 remote_rp_pa;
531
	u64 remote_vars_pa;
532 533 534
	int remote_rp_version;
	int reactivate = 0;
	int stamp_diff;
535
	struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
536
	short partid;
537
	struct xpc_partition *part;
538
	enum xp_retval ret;
539 540 541

	/* pull over the reserved page structure */

542
	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
543

544
	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
545
	if (ret != xpSuccess) {
546
		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
547
			 "which sent interrupt, reason=%d\n", nasid, ret);
548 549 550
		return;
	}

551
	remote_vars_pa = remote_rp->sn.vars_pa;
552
	remote_rp_version = remote_rp->version;
553
	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
554
		remote_rp_stamp = remote_rp->stamp;
555

556
	partid = remote_rp->SAL_partid;
557 558 559 560
	part = &xpc_partitions[partid];

	/* pull over the cross partition variables */

561
	remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
562 563

	ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
564
	if (ret != xpSuccess) {
565 566

		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
567
			 "which sent interrupt, reason=%d\n", nasid, ret);
568 569 570 571 572 573 574 575

		XPC_DEACTIVATE_PARTITION(part, ret);
		return;
	}

	part->act_IRQ_rcvd++;

	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
576
		"%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
577 578
		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);

579 580
	if (xpc_partition_disengaged(part) &&
	    part->act_state == XPC_P_INACTIVE) {
581

582
		xpc_update_partition_info(part, remote_rp_version,
583 584
					  &remote_rp_stamp, remote_rp_pa,
					  remote_vars_pa, remote_vars);
585

586 587 588 589 590 591 592 593 594 595 596 597
		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
			if (xpc_partition_disengage_requested(1UL << partid)) {
				/*
				 * Other side is waiting on us to disengage,
				 * even though we already have.
				 */
				return;
			}
		} else {
			/* other side doesn't support disengage requests */
			xpc_clear_partition_disengage_request(1UL << partid);
		}
598

599 600 601
		xpc_activate_partition(part);
		return;
	}
602

603 604 605 606 607
	DBUG_ON(part->remote_rp_version == 0);
	DBUG_ON(part->remote_vars_version == 0);

	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
608
						       remote_vars_version));
609 610 611

		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
612
							       version));
613 614
			/* see if the other side rebooted */
			if (part->remote_amos_page_pa ==
615 616
			    remote_vars->amos_page_pa &&
			    xpc_hb_allowed(sn_partition_id, remote_vars)) {
617 618 619 620
				/* doesn't look that way, so ignore the IPI */
				return;
			}
		}
621

622 623 624 625
		/*
		 * Other side rebooted and previous XPC didn't support the
		 * disengage request, so we don't need to do anything special.
		 */
626

627
		xpc_update_partition_info(part, remote_rp_version,
628 629
					  &remote_rp_stamp, remote_rp_pa,
					  remote_vars_pa, remote_vars);
630
		part->reactivate_nasid = nasid;
631
		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
632 633
		return;
	}
634

635
	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
636

637 638
	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
639

640 641 642 643
		/*
		 * Other side rebooted and previous XPC did support the
		 * disengage request, but the new one doesn't.
		 */
644

645 646
		xpc_clear_partition_engaged(1UL << partid);
		xpc_clear_partition_disengage_request(1UL << partid);
647

648
		xpc_update_partition_info(part, remote_rp_version,
649 650
					  &remote_rp_stamp, remote_rp_pa,
					  remote_vars_pa, remote_vars);
651 652 653 654
		reactivate = 1;

	} else {
		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
655

656
		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
657
						&remote_rp_stamp);
658 659 660 661 662 663 664 665 666 667
		if (stamp_diff != 0) {
			DBUG_ON(stamp_diff >= 0);

			/*
			 * Other side rebooted and the previous XPC did support
			 * the disengage request, as does the new one.
			 */

			DBUG_ON(xpc_partition_engaged(1UL << partid));
			DBUG_ON(xpc_partition_disengage_requested(1UL <<
668
								  partid));
669 670

			xpc_update_partition_info(part, remote_rp_version,
671 672 673
						  &remote_rp_stamp,
						  remote_rp_pa, remote_vars_pa,
						  remote_vars);
674 675 676 677
			reactivate = 1;
		}
	}

678
	if (part->disengage_request_timeout > 0 &&
679
	    !xpc_partition_disengaged(part)) {
680 681 682 683 684
		/* still waiting on other side to disengage from us */
		return;
	}

	if (reactivate) {
685
		part->reactivate_nasid = nasid;
686
		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
687 688

	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
689
		   xpc_partition_disengage_requested(1UL << partid)) {
690
		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
	}
}

/*
 * Loop through the activation AMO variables and process any bits
 * which are set.  Each bit indicates a nasid sending a partition
 * activation or deactivation request.
 *
 * Return #of IRQs detected.
 */
int
xpc_identify_act_IRQ_sender(void)
{
	int word, bit;
	u64 nasid_mask;
706
	u64 nasid;		/* remote nasid */
707 708 709
	int n_IRQs_detected = 0;
	AMO_t *act_amos;

710
	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
711 712

	/* scan through act AMO variable looking for non-zero entries */
713
	for (word = 0; word < xp_nasid_mask_words; word++) {
714

715
		if (xpc_exiting)
716
			break;
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732

		nasid_mask = xpc_IPI_receive(&act_amos[word]);
		if (nasid_mask == 0) {
			/* no IRQs from nasids in this variable */
			continue;
		}

		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
			nasid_mask);

		/*
		 * If this nasid has been added to the machine since
		 * our partition was reset, this will retain the
		 * remote nasid in our reserved pages machine mask.
		 * This is used in the event of module reload.
		 */
733
		xpc_mach_nasids[word] |= nasid_mask;
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749

		/* locate the nasid(s) which sent interrupts */

		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
			if (nasid_mask & (1UL << bit)) {
				n_IRQs_detected++;
				nasid = XPC_NASID_FROM_W_B(word, bit);
				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
					nasid);
				xpc_identify_act_IRQ_req(nasid);
			}
		}
	}
	return n_IRQs_detected;
}

750 751 752 753 754 755 756
/*
 * See if the other side has responded to a partition disengage request
 * from us.
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
757
	short partid = XPC_PARTID(part);
758 759 760 761 762
	int disengaged;

	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
	if (part->disengage_request_timeout) {
		if (!disengaged) {
763 764
			if (time_before(jiffies,
			    part->disengage_request_timeout)) {
765 766 767 768 769 770 771 772 773
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
			 * Other side hasn't responded to our disengage
			 * request in a timely fashion, so assume it's dead.
			 */

774
			dev_info(xpc_part, "disengage from remote partition %d "
775
				 "timed out\n", partid);
776
			xpc_disengage_request_timedout = 1;
777 778 779 780 781 782 783 784
			xpc_clear_partition_engaged(1UL << partid);
			disengaged = 1;
		}
		part->disengage_request_timeout = 0;

		/* cancel the timer function, provided it's not us */
		if (!in_interrupt()) {
			del_singleshot_timer_sync(&part->
785
						  disengage_request_timer);
786 787 788
		}

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
789
			part->act_state != XPC_P_INACTIVE);
790
		if (part->act_state != XPC_P_INACTIVE)
791 792
			xpc_wakeup_channel_mgr(part);

793
		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
794 795 796 797 798
			xpc_cancel_partition_disengage_request(part);
	}
	return disengaged;
}

799 800 801
/*
 * Mark specified partition as active.
 */
802
enum xp_retval
803 804 805
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
806
	enum xp_retval ret;
807 808 809 810 811 812

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
813
		ret = xpSuccess;
814
	} else {
815
		DBUG_ON(part->reason == xpSuccess);
816 817 818 819 820 821 822 823 824 825 826 827
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
 * Notify XPC that the partition is down.
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
828
			 enum xp_retval reason)
829 830 831 832 833 834 835 836
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
837
		if (reason == xpReactivating) {
838 839 840 841 842 843
			/* we interrupt ourselves to reactivate partition */
			xpc_IPI_send_reactivate(part);
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
844 845
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
846 847 848 849 850 851 852 853 854 855 856
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

857 858 859
	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
		xpc_request_partition_disengage(part);
		xpc_IPI_send_disengage(part);
860

861 862
		/* set a timelimit on the disengage request */
		part->disengage_request_timeout = jiffies +
863
		    (xpc_disengage_request_timelimit * HZ);
864
		part->disengage_request_timer.expires =
865
		    part->disengage_request_timeout;
866 867
		add_timer(&part->disengage_request_timer);
	}
868

869 870
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
871

872
	xpc_partition_going_down(part, reason);
873 874 875
}

/*
876
 * Mark specified partition as inactive.
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
	struct xpc_vars *remote_vars;
907
	u64 remote_rp_pa;
908 909
	u64 remote_vars_pa;
	int region;
910
	int region_size;
911 912 913
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
914
	short partid;
915 916
	struct xpc_partition *part;
	u64 *discovered_nasids;
917
	enum xp_retval ret;
918

919
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
920
						  xp_sizeof_nasid_mask,
921
						  GFP_KERNEL, &remote_rp_base);
922
	if (remote_rp == NULL)
923
		return;
924

925
	remote_vars = (struct xpc_vars *)remote_rp;
926

927
	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
928
				    GFP_KERNEL);
929 930 931 932 933
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

934
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
935 936 937 938 939 940

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
941 942 943 944 945 946 947 948 949 950 951 952 953
	max_regions = 64;
	region_size = sn_region_size;

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
954 955 956

	for (region = 0; region < max_regions; region++) {

957
		if (xpc_exiting)
958 959 960 961
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

962
		for (nasid = (region * region_size * 2);
963
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
964

965
			if (xpc_exiting)
966 967 968 969
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

970
			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
971 972 973 974 975 976
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

977
			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

			/* pull over the reserved page structure */

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
994
						remote_rp, &remote_rp_pa);
995
			if (ret != xpSuccess) {
996 997 998 999
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

1000
				if (ret == xpLocalPartid)
1001
					break;
1002

1003 1004 1005
				continue;
			}

1006
			remote_vars_pa = remote_rp->sn.vars_pa;
1007

1008
			partid = remote_rp->SAL_partid;
1009 1010 1011 1012 1013
			part = &xpc_partitions[partid];

			/* pull over the cross partition variables */

			ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1014
			if (ret != xpSuccess) {
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
				dev_dbg(xpc_part, "unable to get XPC variables "
					"from nasid %d, reason=%d\n", nasid,
					ret);

				XPC_DEACTIVATE_PARTITION(part, ret);
				continue;
			}

			if (part->act_state != XPC_P_INACTIVE) {
				dev_dbg(xpc_part, "partition %d on nasid %d is "
					"already activating\n", partid, nasid);
				break;
			}

			/*
			 * Register the remote partition's AMOs with SAL so it
			 * can handle and cleanup errors within that address
			 * range should the remote partition go down. We don't
			 * unregister this range because it is difficult to
			 * tell when outstanding writes to the remote partition
			 * are finished and thus when it is thus safe to
			 * unregister. This should not result in wasted space
			 * in the SAL xp_addr_region table because we should
			 * get the same page for remote_act_amos_pa after
			 * module reloads and system reboots.
			 */
1041 1042 1043 1044
			if (sn_register_xp_addr_region
			    (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
				dev_dbg(xpc_part,
					"partition %d failed to "
1045 1046 1047
					"register xp_addr region 0x%016lx\n",
					partid, remote_vars->amos_page_pa);

1048
				XPC_SET_REASON(part, xpPhysAddrRegFailed,
1049
					       __LINE__);
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
				break;
			}

			/*
			 * The remote nasid is valid and available.
			 * Send an interrupt to that nasid to notify
			 * it that we are ready to begin activation.
			 */
			dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
				"nasid %d, phys_cpuid 0x%x\n",
				remote_vars->amos_page_pa,
				remote_vars->act_nasid,
				remote_vars->act_phys_cpuid);

1064
			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1065
							   version)) {
1066
				part->remote_amos_page_pa =
1067
				    remote_vars->amos_page_pa;
1068 1069 1070
				xpc_mark_partition_disengaged(part);
				xpc_cancel_partition_disengage_request(part);
			}
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
			xpc_IPI_send_activate(remote_vars);
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
1081
 * remote partition's reserved page.
1082
 */
1083
enum xp_retval
1084
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
1085 1086 1087 1088 1089
{
	struct xpc_partition *part;
	u64 part_nasid_pa;

	part = &xpc_partitions[partid];
1090
	if (part->remote_rp_pa == 0)
1091
		return xpPartitionDown;
1092

1093 1094
	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);

1095
	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1096

1097
	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
1098
				xp_sizeof_nasid_mask);
1099
}