xpc_partition.c 31.4 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 */


/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */


#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
J
Jes Sorensen 已提交
25
#include <asm/uncached.h>
26 27 28 29 30
#include <asm/sn/bte.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
31
#include <asm/sn/xpc.h>
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46


/* XPC is exiting flag */
int xpc_exiting;


/* SH_IPI_ACCESS shub register value on startup */
static u64 xpc_sh1_IPI_access;
static u64 xpc_sh2_IPI_access0;
static u64 xpc_sh2_IPI_access1;
static u64 xpc_sh2_IPI_access2;
static u64 xpc_sh2_IPI_access3;


/* original protection values for each node */
47
u64 xpc_prot_vec[MAX_NUMNODES];
48 49


50
/* this partition's reserved page pointers */
51
struct xpc_rsvd_page *xpc_rsvd_page;
52 53
static u64 *xpc_part_nasids;
static u64 *xpc_mach_nasids;
54 55 56
struct xpc_vars *xpc_vars;
struct xpc_vars_part *xpc_vars_part;

57 58 59
static int xp_nasid_mask_bytes;	/* actual size in bytes of nasid mask */
static int xp_nasid_mask_words;	/* actual size in words of nasid mask */

60 61 62 63 64 65 66 67 68 69 70

/*
 * For performance reasons, each entry of xpc_partitions[] is cacheline
 * aligned. And xpc_partitions[] is padded with an additional entry at the
 * end so that the last legitimate entry doesn't share its cacheline with
 * another variable.
 */
struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];


/*
71 72 73
 * Generic buffer used to store a local copy of portions of a remote
 * partition's reserved page (either its header and part_nasids mask,
 * or its vars).
74 75 76 77 78
 *
 * xpc_discovery runs only once and is a seperate thread that is
 * very likely going to be processing in parallel with receiving
 * interrupts.
 */
79 80
char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
							XP_NASID_MASK_BYTES];
81 82


83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
static void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
	if (*base == NULL) {
		return NULL;
	}
	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
		return *base;
	}
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
	if (*base == NULL) {
		return NULL;
	}
	return (void *) L1_CACHE_ALIGN((u64) *base);
}


108 109 110 111 112
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
static u64
113
xpc_get_rsvd_page_pa(int nasid)
114 115 116 117 118 119
{
	bte_result_t bte_res;
	s64 status;
	u64 cookie = 0;
	u64 rp_pa = nasid;	/* seed with nasid */
	u64 len = 0;
120 121 122
	u64 buf = buf;
	u64 buf_len = 0;
	void *buf_base = NULL;
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137


	while (1) {

		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
								&len);

		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
			status, cookie, rp_pa, len);

		if (status != SALRET_MORE_PASSES) {
			break;
		}

138
		if (L1_CACHE_ALIGN(len) > buf_len) {
139
			kfree(buf_base);
140 141 142 143 144 145 146 147 148
			buf_len = L1_CACHE_ALIGN(len);
			buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
							GFP_KERNEL, &buf_base);
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
					"len=0x%016lx\n", buf_len);
				status = SALRET_ERROR;
				break;
			}
149 150
		}

151
		bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
152 153 154 155 156 157 158 159
					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
		if (bte_res != BTE_SUCCESS) {
			dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
			status = SALRET_ERROR;
			break;
		}
	}

160
	kfree(buf_base);
161

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
	if (status != SALRET_OK) {
		rp_pa = 0;
	}
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
	return rp_pa;
}


/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
xpc_rsvd_page_init(void)
{
	struct xpc_rsvd_page *rp;
	AMO_t *amos_page;
180
	u64 rp_pa, nasid_array = 0;
181 182 183 184 185
	int i, ret;


	/* get the local reserved page's address */

186 187 188
	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
	preempt_enable();
189 190 191 192 193 194 195 196 197 198 199 200 201 202
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
	rp = (struct xpc_rsvd_page *) __va(rp_pa);

	if (rp->partid != sn_partition_id) {
		dev_err(xpc_part, "the reserved page's partid of %d should be "
			"%d\n", rp->partid, sn_partition_id);
		return NULL;
	}

	rp->version = XPC_RP_VERSION;

203 204 205 206 207 208 209 210 211 212 213 214 215
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
		rp->nasids_size = 128;
	}
	xp_nasid_mask_bytes = rp->nasids_size;
	xp_nasid_mask_words = xp_nasid_mask_bytes / 8;

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
	xpc_vars = XPC_RP_VARS(rp);
	xpc_vars_part = XPC_RP_VARS_PART(rp);
216 217 218 219 220 221 222 223 224 225 226 227 228 229

	/*
	 * Before clearing xpc_vars, see if a page of AMOs had been previously
	 * allocated. If not we'll need to allocate one and set permissions
	 * so that cross-partition AMOs are allowed.
	 *
	 * The allocated AMO page needs MCA reporting to remain disabled after
	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
	 * which is pointed to by the reserved page, and re-use that saved copy
	 * on subsequent loads of XPC. This AMO page is never freed, and its
	 * memory protections are never restricted.
	 */
	if ((amos_page = xpc_vars->amos_page) == NULL) {
J
Jes Sorensen 已提交
230
		amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
		if (amos_page == NULL) {
			dev_err(xpc_part, "can't allocate page of AMOs\n");
			return NULL;
		}

		/*
		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
		 */
		if (!enable_shub_wars_1_1()) {
			ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
					PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
					&nasid_array);
			if (ret != 0) {
				dev_err(xpc_part, "can't change memory "
					"protections\n");
J
Jes Sorensen 已提交
247 248
				uncached_free_page(__IA64_UNCACHED_OFFSET |
						   TO_PHYS((u64) amos_page));
249 250 251
				return NULL;
			}
		}
252 253 254 255 256 257 258 259 260 261 262 263 264
	} else if (!IS_AMO_ADDRESS((u64) amos_page)) {
		/*
		 * EFI's XPBOOT can also set amos_page in the reserved page,
		 * but it happens to leave it as an uncached physical address
		 * and we need it to be an uncached virtual, so we'll have to
		 * convert it.
		 */
		if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
			dev_err(xpc_part, "previously used amos_page address "
				"is bad = 0x%p\n", (void *) amos_page);
			return NULL;
		}
		amos_page = (AMO_t *) TO_AMO((u64) amos_page);
265 266
	}

267
	/* clear xpc_vars */
268 269 270 271 272
	memset(xpc_vars, 0, sizeof(struct xpc_vars));

	xpc_vars->version = XPC_V_VERSION;
	xpc_vars->act_nasid = cpuid_to_nasid(0);
	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
273 274
	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
	xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
275 276 277
	xpc_vars->amos_page = amos_page;  /* save for next load of XPC */


278 279 280 281
	/* clear xpc_vars_part */
	memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
							XP_MAX_PARTITIONS);

282
	/* initialize the activate IRQ related AMO variables */
283
	for (i = 0; i < xp_nasid_mask_words; i++) {
284
		(void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
285
	}
286 287 288 289 290

	/* initialize the engaged remote partitions related AMO variables */
	(void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
	(void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);

291
	/* timestamp of when reserved page was setup by XPC */
292
	rp->stamp = CURRENT_TIME;
293 294 295 296 297

	/*
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
298
	rp->vars_pa = __pa(xpc_vars);
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431

	return rp;
}


/*
 * Change protections to allow IPI operations (and AMO operations on
 * Shub 1.1 systems).
 */
void
xpc_allow_IPI_ops(void)
{
	int node;
	int nasid;


	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.

	if (is_shub2()) {
		xpc_sh2_IPI_access0 =
			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
		xpc_sh2_IPI_access1 =
			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
		xpc_sh2_IPI_access2 =
			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
		xpc_sh2_IPI_access3 =
			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
								-1UL);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
								-1UL);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
								-1UL);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
								-1UL);
		}

	} else {
		xpc_sh1_IPI_access =
			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
								-1UL);

			/*
			 * Since the BIST collides with memory operations on
			 * SHUB 1.1 sn_change_memprotect() cannot be used.
			 */
			if (enable_shub_wars_1_1()) {
				/* open up everything */
				xpc_prot_vec[node] = (u64) HUB_L((u64 *)
						GLOBAL_MMR_ADDR(nasid,
						SH1_MD_DQLP_MMR_DIR_PRIVEC0));
				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
								-1UL);
				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
								-1UL);
			}
		}
	}
}


/*
 * Restrict protections to disallow IPI operations (and AMO operations on
 * Shub 1.1 systems).
 */
void
xpc_restrict_IPI_ops(void)
{
	int node;
	int nasid;


	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.

	if (is_shub2()) {

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
							xpc_sh2_IPI_access0);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
							xpc_sh2_IPI_access1);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
							xpc_sh2_IPI_access2);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
							xpc_sh2_IPI_access3);
		}

	} else {

		for_each_online_node(node) {
			nasid = cnodeid_to_nasid(node);
			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
							xpc_sh1_IPI_access);

			if (enable_shub_wars_1_1()) {
				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
							xpc_prot_vec[node]);
				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
							xpc_prot_vec[node]);
			}
		}
	}
}


/*
 * At periodic intervals, scan through all active partitions and ensure
 * their heartbeat is still active.  If not, the partition is deactivated.
 */
void
xpc_check_remote_hb(void)
{
	struct xpc_vars *remote_vars;
	struct xpc_partition *part;
	partid_t partid;
	bte_result_t bres;


	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;

	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
432 433 434 435 436

		if (xpc_exiting) {
			break;
		}

437 438 439 440 441 442 443 444 445 446 447 448 449 450
		if (partid == sn_partition_id) {
			continue;
		}

		part = &xpc_partitions[partid];

		if (part->act_state == XPC_P_INACTIVE ||
				part->act_state == XPC_P_DEACTIVATING) {
			continue;
		}

		/* pull the remote_hb cache line */
		bres = xp_bte_copy(part->remote_vars_pa,
					ia64_tpa((u64) remote_vars),
451
					XPC_RP_VARS_SIZE,
452 453 454 455 456 457 458 459
					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
		if (bres != BTE_SUCCESS) {
			XPC_DEACTIVATE_PARTITION(part,
						xpc_map_bte_errors(bres));
			continue;
		}

		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
460 461 462
			" = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
			partid, remote_vars->heartbeat, part->last_heartbeat,
			remote_vars->heartbeat_offline,
463 464 465
			remote_vars->heartbeating_to_mask);

		if (((remote_vars->heartbeat == part->last_heartbeat) &&
466
			(remote_vars->heartbeat_offline == 0)) ||
467
			     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
468 469 470 471 472 473 474 475 476 477 478

			XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
			continue;
		}

		part->last_heartbeat = remote_vars->heartbeat;
	}
}


/*
479
 * Get a copy of a portion of the remote partition's rsvd page.
480 481
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
482 483
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
484 485 486
 */
static enum xpc_retval
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
487
		struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
488 489 490 491 492 493
{
	int bres, i;


	/* get the reserved page's physical address */

494
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
495
	if (*remote_rp_pa == 0) {
496 497 498 499
		return xpcNoRsvdPageAddr;
	}


500
	/* pull over the reserved page header and part_nasids mask */
501

502
	bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
503
				XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
504 505 506 507 508 509 510
				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
	if (bres != BTE_SUCCESS) {
		return xpc_map_bte_errors(bres);
	}


	if (discovered_nasids != NULL) {
511 512 513 514 515
		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);


		for (i = 0; i < xp_nasid_mask_words; i++) {
			discovered_nasids[i] |= remote_part_nasids[i];
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
		}
	}


	/* check that the partid is for another partition */

	if (remote_rp->partid < 1 ||
				remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
		return xpcInvalidPartid;
	}

	if (remote_rp->partid == sn_partition_id) {
		return xpcLocalPartid;
	}


	if (XPC_VERSION_MAJOR(remote_rp->version) !=
					XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
		return xpcBadVersion;
	}

	return xpcSuccess;
}


/*
542
 * Get a copy of the remote partition's XPC variables from the reserved page.
543 544
 *
 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
545
 * assumed to be of size XPC_RP_VARS_SIZE.
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
 */
static enum xpc_retval
xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
{
	int bres;


	if (remote_vars_pa == 0) {
		return xpcVarsNotSet;
	}


	/* pull over the cross partition variables */

	bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
561
				XPC_RP_VARS_SIZE,
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
	if (bres != BTE_SUCCESS) {
		return xpc_map_bte_errors(bres);
	}

	if (XPC_VERSION_MAJOR(remote_vars->version) !=
					XPC_VERSION_MAJOR(XPC_V_VERSION)) {
		return xpcBadVersion;
	}

	return xpcSuccess;
}


/*
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
 * Update the remote partition's info.
 */
static void
xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
		struct timespec *remote_rp_stamp, u64 remote_rp_pa,
		u64 remote_vars_pa, struct xpc_vars *remote_vars)
{
	part->remote_rp_version = remote_rp_version;
	dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
		part->remote_rp_version);

	part->remote_rp_stamp = *remote_rp_stamp;
	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);

	part->remote_rp_pa = remote_rp_pa;
	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);

	part->remote_vars_pa = remote_vars_pa;
	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
		part->remote_vars_pa);

	part->last_heartbeat = remote_vars->heartbeat;
	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
		part->last_heartbeat);

	part->remote_vars_part_pa = remote_vars->vars_part_pa;
	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
		part->remote_vars_part_pa);

	part->remote_act_nasid = remote_vars->act_nasid;
	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
		part->remote_act_nasid);

	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
		part->remote_act_phys_cpuid);

	part->remote_amos_page_pa = remote_vars->amos_page_pa;
	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
		part->remote_amos_page_pa);

	part->remote_vars_version = remote_vars->version;
	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
		part->remote_vars_version);
}


625
/*
626
 * Prior code has determined the nasid which generated an IPI.  Inspect
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
 * that nasid to determine if its partition needs to be activated or
 * deactivated.
 *
 * A partition is consider "awaiting activation" if our partition
 * flags indicate it is not active and it has a heartbeat.  A
 * partition is considered "awaiting deactivation" if our partition
 * flags indicate it is active but it has no heartbeat or it is not
 * sending its heartbeat to us.
 *
 * To determine the heartbeat, the remote nasid must have a properly
 * initialized reserved page.
 */
static void
xpc_identify_act_IRQ_req(int nasid)
{
	struct xpc_rsvd_page *remote_rp;
	struct xpc_vars *remote_vars;
644
	u64 remote_rp_pa;
645
	u64 remote_vars_pa;
646 647 648 649
	int remote_rp_version;
	int reactivate = 0;
	int stamp_diff;
	struct timespec remote_rp_stamp = { 0, 0 };
650 651 652 653 654 655 656 657 658
	partid_t partid;
	struct xpc_partition *part;
	enum xpc_retval ret;


	/* pull over the reserved page structure */

	remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;

659
	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
660 661 662 663 664 665 666
	if (ret != xpcSuccess) {
		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
			"which sent interrupt, reason=%d\n", nasid, ret);
		return;
	}

	remote_vars_pa = remote_rp->vars_pa;
667 668 669 670
	remote_rp_version = remote_rp->version;
	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
		remote_rp_stamp = remote_rp->stamp;
	}
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
	partid = remote_rp->partid;
	part = &xpc_partitions[partid];


	/* pull over the cross partition variables */

	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;

	ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
	if (ret != xpcSuccess) {

		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
			"which sent interrupt, reason=%d\n", nasid, ret);

		XPC_DEACTIVATE_PARTITION(part, ret);
		return;
	}


	part->act_IRQ_rcvd++;

	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
		"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);

696 697
	if (xpc_partition_disengaged(part) &&
					part->act_state == XPC_P_INACTIVE) {
698

699 700 701
		xpc_update_partition_info(part, remote_rp_version,
					&remote_rp_stamp, remote_rp_pa,
					remote_vars_pa, remote_vars);
702

703 704 705 706 707 708 709 710 711 712 713 714
		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
			if (xpc_partition_disengage_requested(1UL << partid)) {
				/*
				 * Other side is waiting on us to disengage,
				 * even though we already have.
				 */
				return;
			}
		} else {
			/* other side doesn't support disengage requests */
			xpc_clear_partition_disengage_request(1UL << partid);
		}
715

716 717 718
		xpc_activate_partition(part);
		return;
	}
719

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
	DBUG_ON(part->remote_rp_version == 0);
	DBUG_ON(part->remote_vars_version == 0);

	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
							remote_vars_version));

		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
								version));
			/* see if the other side rebooted */
			if (part->remote_amos_page_pa ==
				remote_vars->amos_page_pa &&
					xpc_hb_allowed(sn_partition_id,
								remote_vars)) {
				/* doesn't look that way, so ignore the IPI */
				return;
			}
		}
739

740 741 742 743
		/*
		 * Other side rebooted and previous XPC didn't support the
		 * disengage request, so we don't need to do anything special.
		 */
744

745 746 747 748 749 750 751
		xpc_update_partition_info(part, remote_rp_version,
						&remote_rp_stamp, remote_rp_pa,
						remote_vars_pa, remote_vars);
		part->reactivate_nasid = nasid;
		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
		return;
	}
752

753
	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
754

755 756
	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
757

758 759 760 761
		/*
		 * Other side rebooted and previous XPC did support the
		 * disengage request, but the new one doesn't.
		 */
762

763 764
		xpc_clear_partition_engaged(1UL << partid);
		xpc_clear_partition_disengage_request(1UL << partid);
765

766 767 768 769 770 771 772
		xpc_update_partition_info(part, remote_rp_version,
						&remote_rp_stamp, remote_rp_pa,
						remote_vars_pa, remote_vars);
		reactivate = 1;

	} else {
		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
773

774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
							&remote_rp_stamp);
		if (stamp_diff != 0) {
			DBUG_ON(stamp_diff >= 0);

			/*
			 * Other side rebooted and the previous XPC did support
			 * the disengage request, as does the new one.
			 */

			DBUG_ON(xpc_partition_engaged(1UL << partid));
			DBUG_ON(xpc_partition_disengage_requested(1UL <<
								partid));

			xpc_update_partition_info(part, remote_rp_version,
						&remote_rp_stamp, remote_rp_pa,
						remote_vars_pa, remote_vars);
			reactivate = 1;
		}
	}

795 796
	if (part->disengage_request_timeout > 0 &&
					!xpc_partition_disengaged(part)) {
797 798 799 800 801
		/* still waiting on other side to disengage from us */
		return;
	}

	if (reactivate) {
802 803
		part->reactivate_nasid = nasid;
		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
804 805 806 807

	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
			xpc_partition_disengage_requested(1UL << partid)) {
		XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
	}
}


/*
 * Loop through the activation AMO variables and process any bits
 * which are set.  Each bit indicates a nasid sending a partition
 * activation or deactivation request.
 *
 * Return #of IRQs detected.
 */
int
xpc_identify_act_IRQ_sender(void)
{
	int word, bit;
	u64 nasid_mask;
	u64 nasid;			/* remote nasid */
	int n_IRQs_detected = 0;
	AMO_t *act_amos;


829
	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
830 831 832


	/* scan through act AMO variable looking for non-zero entries */
833
	for (word = 0; word < xp_nasid_mask_words; word++) {
834

835 836 837
		if (xpc_exiting) {
			break;
		}
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854

		nasid_mask = xpc_IPI_receive(&act_amos[word]);
		if (nasid_mask == 0) {
			/* no IRQs from nasids in this variable */
			continue;
		}

		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
			nasid_mask);


		/*
		 * If this nasid has been added to the machine since
		 * our partition was reset, this will retain the
		 * remote nasid in our reserved pages machine mask.
		 * This is used in the event of module reload.
		 */
855
		xpc_mach_nasids[word] |= nasid_mask;
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873


		/* locate the nasid(s) which sent interrupts */

		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
			if (nasid_mask & (1UL << bit)) {
				n_IRQs_detected++;
				nasid = XPC_NASID_FROM_W_B(word, bit);
				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
					nasid);
				xpc_identify_act_IRQ_req(nasid);
			}
		}
	}
	return n_IRQs_detected;
}


874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
/*
 * See if the other side has responded to a partition disengage request
 * from us.
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
	partid_t partid = XPC_PARTID(part);
	int disengaged;


	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
	if (part->disengage_request_timeout) {
		if (!disengaged) {
			if (jiffies < part->disengage_request_timeout) {
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
			 * Other side hasn't responded to our disengage
			 * request in a timely fashion, so assume it's dead.
			 */

898 899 900
			dev_info(xpc_part, "disengage from remote partition %d "
				"timed out\n", partid);
			xpc_disengage_request_timedout = 1;
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
			xpc_clear_partition_engaged(1UL << partid);
			disengaged = 1;
		}
		part->disengage_request_timeout = 0;

		/* cancel the timer function, provided it's not us */
		if (!in_interrupt()) {
			del_singleshot_timer_sync(&part->
						      disengage_request_timer);
		}

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
					part->act_state != XPC_P_INACTIVE);
		if (part->act_state != XPC_P_INACTIVE) {
			xpc_wakeup_channel_mgr(part);
		}

		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
			xpc_cancel_partition_disengage_request(part);
		}
	}
	return disengaged;
}


926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
/*
 * Mark specified partition as active.
 */
enum xpc_retval
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
	enum xpc_retval ret;


	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
		ret = xpcSuccess;
	} else {
		DBUG_ON(part->reason == xpcSuccess);
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}


/*
 * Notify XPC that the partition is down.
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
				enum xpc_retval reason)
{
	unsigned long irq_flags;


	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		if (reason == xpcReactivating) {
			/* we interrupt ourselves to reactivate partition */
			xpc_IPI_send_reactivate(part);
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
		if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
					reason == xpcReactivating) {
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

987 988 989
	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
		xpc_request_partition_disengage(part);
		xpc_IPI_send_disengage(part);
990

991 992
		/* set a timelimit on the disengage request */
		part->disengage_request_timeout = jiffies +
993
					(xpc_disengage_request_timelimit * HZ);
994 995 996 997
		part->disengage_request_timer.expires =
					part->disengage_request_timeout;
		add_timer(&part->disengage_request_timer);
	}
998

999 1000
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
1001

1002
	xpc_partition_going_down(part, reason);
1003 1004 1005 1006
}


/*
1007
 * Mark specified partition as inactive.
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;


	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}


/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
	struct xpc_vars *remote_vars;
1040
	u64 remote_rp_pa;
1041 1042
	u64 remote_vars_pa;
	int region;
1043
	int region_size;
1044 1045 1046 1047 1048 1049 1050 1051 1052
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
	partid_t partid;
	struct xpc_partition *part;
	u64 *discovered_nasids;
	enum xpc_retval ret;


1053 1054
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
						xp_nasid_mask_bytes,
1055 1056 1057 1058 1059 1060 1061
						GFP_KERNEL, &remote_rp_base);
	if (remote_rp == NULL) {
		return;
	}
	remote_vars = (struct xpc_vars *) remote_rp;


1062
	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
							GFP_KERNEL);
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

	rp = (struct xpc_rsvd_page *) xpc_rsvd_page;

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
	max_regions = 64;
	region_size = sn_region_size;

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
1089 1090 1091 1092 1093 1094 1095 1096 1097

	for (region = 0; region < max_regions; region++) {

		if ((volatile int) xpc_exiting) {
			break;
		}

		dev_dbg(xpc_part, "searching region %d\n", region);

1098 1099
		for (nasid = (region * region_size * 2);
		     nasid < ((region + 1) * region_size * 2);
1100 1101 1102 1103 1104 1105 1106 1107 1108
		     nasid += 2) {

			if ((volatile int) xpc_exiting) {
				break;
			}

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);


1109
			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
1110 1111 1112 1113 1114 1115
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

1116
			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}


			/* pull over the reserved page structure */

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
1134
					      remote_rp, &remote_rp_pa);
1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
			if (ret != xpcSuccess) {
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

				if (ret == xpcLocalPartid) {
					break;
				}
				continue;
			}

			remote_vars_pa = remote_rp->vars_pa;

			partid = remote_rp->partid;
			part = &xpc_partitions[partid];


			/* pull over the cross partition variables */

			ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
			if (ret != xpcSuccess) {
				dev_dbg(xpc_part, "unable to get XPC variables "
					"from nasid %d, reason=%d\n", nasid,
					ret);

				XPC_DEACTIVATE_PARTITION(part, ret);
				continue;
			}

			if (part->act_state != XPC_P_INACTIVE) {
				dev_dbg(xpc_part, "partition %d on nasid %d is "
					"already activating\n", partid, nasid);
				break;
			}

			/*
			 * Register the remote partition's AMOs with SAL so it
			 * can handle and cleanup errors within that address
			 * range should the remote partition go down. We don't
			 * unregister this range because it is difficult to
			 * tell when outstanding writes to the remote partition
			 * are finished and thus when it is thus safe to
			 * unregister. This should not result in wasted space
			 * in the SAL xp_addr_region table because we should
			 * get the same page for remote_act_amos_pa after
			 * module reloads and system reboots.
			 */
			if (sn_register_xp_addr_region(
					    remote_vars->amos_page_pa,
							PAGE_SIZE, 1) < 0) {
				dev_dbg(xpc_part, "partition %d failed to "
					"register xp_addr region 0x%016lx\n",
					partid, remote_vars->amos_page_pa);

				XPC_SET_REASON(part, xpcPhysAddrRegFailed,
						__LINE__);
				break;
			}

			/*
			 * The remote nasid is valid and available.
			 * Send an interrupt to that nasid to notify
			 * it that we are ready to begin activation.
			 */
			dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
				"nasid %d, phys_cpuid 0x%x\n",
				remote_vars->amos_page_pa,
				remote_vars->act_nasid,
				remote_vars->act_phys_cpuid);

1205 1206 1207 1208 1209 1210 1211
			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
								version)) {
				part->remote_amos_page_pa =
						remote_vars->amos_page_pa;
				xpc_mark_partition_disengaged(part);
				xpc_cancel_partition_disengage_request(part);
			}
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
			xpc_IPI_send_activate(remote_vars);
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}


/*
 * Given a partid, get the nasids owned by that partition from the
1223
 * remote partition's reserved page.
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
 */
enum xpc_retval
xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
{
	struct xpc_partition *part;
	u64 part_nasid_pa;
	int bte_res;


	part = &xpc_partitions[partid];
	if (part->remote_rp_pa == 0) {
		return xpcPartitionDown;
	}

1238 1239 1240
	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);

	part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
1241 1242

	bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
1243
			xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
1244 1245 1246 1247

	return xpc_map_bte_errors(bte_res);
}