xpc_partition.c 13.1 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
27
#include "xpc.h"
28 29 30 31

/* XPC is exiting flag */
int xpc_exiting;

32
/* this partition's reserved page pointers */
33
struct xpc_rsvd_page *xpc_rsvd_page;
34 35
static unsigned long *xpc_part_nasids;
unsigned long *xpc_mach_nasids;
36

37 38
static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
39

40
struct xpc_partition *xpc_partitions;
41

42 43 44
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
45
void *
46 47 48 49
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
50
	if (*base == NULL)
51
		return NULL;
52 53

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
54
		return *base;
55

56 57 58 59
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
60
	if (*base == NULL)
61
		return NULL;
62

63
	return (void *)L1_CACHE_ALIGN((u64)*base);
64 65
}

66 67 68 69 70
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
static u64
71
xpc_get_rsvd_page_pa(int nasid)
72
{
73
	enum xp_retval ret;
74 75 76 77
	s64 status;
	u64 cookie = 0;
	u64 rp_pa = nasid;	/* seed with nasid */
	u64 len = 0;
78 79 80
	u64 buf = buf;
	u64 buf_len = 0;
	void *buf_base = NULL;
81 82 83 84

	while (1) {

		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
85
						       &len);
86 87 88 89 90

		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
			status, cookie, rp_pa, len);

91
		if (status != SALRET_MORE_PASSES)
92 93
			break;

94
		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
95
		if (L1_CACHE_ALIGN(len) > buf_len) {
96
			kfree(buf_base);
97
			buf_len = L1_CACHE_ALIGN(len);
98 99 100
			buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
								 GFP_KERNEL,
								 &buf_base);
101 102 103 104 105 106
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
					"len=0x%016lx\n", buf_len);
				status = SALRET_ERROR;
				break;
			}
107 108
		}

109 110 111
		ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
112 113 114 115 116
			status = SALRET_ERROR;
			break;
		}
	}

117
	kfree(buf_base);
118

119
	if (status != SALRET_OK)
120
		rp_pa = 0;
121

122 123 124 125 126 127 128 129 130 131
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
132
xpc_setup_rsvd_page(void)
133 134
{
	struct xpc_rsvd_page *rp;
135
	u64 rp_pa;
136
	unsigned long new_ts_jiffies;
137 138 139

	/* get the local reserved page's address */

140 141 142
	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
	preempt_enable();
143 144 145 146
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
147
	rp = (struct xpc_rsvd_page *)__va(rp_pa);
148

149 150 151 152 153 154 155 156 157 158
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
	BUG_ON(rp->SAL_partid != sn_partition_id);

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
159 160 161 162
		return NULL;
	}

	rp->version = XPC_RP_VERSION;
163
	rp->max_npartitions = xp_max_npartitions;
164

165 166 167
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
168
		rp->SAL_nasids_size = 128;
169
	}
170 171 172
	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
					      BITS_PER_BYTE);
173 174 175 176

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
177

178 179
	if (xpc_rsvd_page_init(rp) != xpSuccess)
		return NULL;
180 181

	/*
182
	 * Set timestamp of when reserved page was setup by XPC.
183 184 185
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
186 187 188 189
	new_ts_jiffies = jiffies;
	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
		new_ts_jiffies++;
	rp->ts_jiffies = new_ts_jiffies;
190 191 192 193 194

	return rp;
}

/*
195
 * Get a copy of a portion of the remote partition's rsvd page.
196 197
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
198 199
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
200
 */
201
enum xp_retval
202
xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
203
		  struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
204
{
205
	int l;
206
	enum xp_retval ret;
207 208 209

	/* get the reserved page's physical address */

210
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
211
	if (*remote_rp_pa == 0)
212
		return xpNoRsvdPageAddr;
213

214
	/* pull over the reserved page header and part_nasids mask */
215
	ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
216
			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
217 218
	if (ret != xpSuccess)
		return ret;
219 220

	if (discovered_nasids != NULL) {
221 222
		unsigned long *remote_part_nasids =
		    XPC_RP_PART_NASIDS(remote_rp);
223

224 225
		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
			discovered_nasids[l] |= remote_part_nasids[l];
226 227
	}

228 229
	/* zero timestamp indicates the reserved page has not been setup */
	if (remote_rp->ts_jiffies == 0)
230 231
		return xpRsvdPageNotSet;

232
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
233
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
234
		return xpBadVersion;
235 236
	}

237
	/* check that both remote and local partids are valid for each side */
238 239 240
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions ||
	    remote_rp->max_npartitions <= sn_partition_id) {
241
		return xpInvalidPartid;
242 243 244 245
	}

	if (remote_rp->SAL_partid == sn_partition_id)
		return xpLocalPartid;
246

247
	return xpSuccess;
248 249
}

250
/*
251 252 253
 * See if the other side has responded to a partition deactivate request
 * from us. Though we requested the remote partition to deactivate with regard
 * to us, we really only need to wait for the other side to disengage from us.
254 255 256 257
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
258
	short partid = XPC_PARTID(part);
259 260
	int disengaged;

261 262
	disengaged = !xpc_partition_engaged(partid);
	if (part->disengage_timeout) {
263
		if (!disengaged) {
264
			if (time_is_after_jiffies(part->disengage_timeout)) {
265 266 267 268 269
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
270
			 * Other side hasn't responded to our deactivate
271 272 273
			 * request in a timely fashion, so assume it's dead.
			 */

274 275 276 277
			dev_info(xpc_part, "deactivate request to remote "
				 "partition %d timed out\n", partid);
			xpc_disengage_timedout = 1;
			xpc_assume_partition_disengaged(partid);
278 279
			disengaged = 1;
		}
280
		part->disengage_timeout = 0;
281 282

		/* cancel the timer function, provided it's not us */
283 284
		if (!in_interrupt())
			del_singleshot_timer_sync(&part->disengage_timer);
285 286

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
287
			part->act_state != XPC_P_INACTIVE);
288
		if (part->act_state != XPC_P_INACTIVE)
289 290
			xpc_wakeup_channel_mgr(part);

291
		xpc_cancel_partition_deactivation_request(part);
292 293 294 295
	}
	return disengaged;
}

296 297 298
/*
 * Mark specified partition as active.
 */
299
enum xp_retval
300 301 302
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
303
	enum xp_retval ret;
304 305 306 307 308 309

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
310
		ret = xpSuccess;
311
	} else {
312
		DBUG_ON(part->reason == xpSuccess);
313 314 315 316 317 318 319 320
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
321
 * Start the process of deactivating the specified partition.
322 323 324
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
325
			 enum xp_retval reason)
326 327 328 329 330 331 332 333
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
334
		if (reason == xpReactivating) {
335
			/* we interrupt ourselves to reactivate partition */
336
			xpc_request_partition_reactivation(part);
337 338 339 340
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
341 342
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
343 344 345 346 347 348 349 350 351 352 353
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

354 355
	/* ask remote partition to deactivate with regard to us */
	xpc_request_partition_deactivation(part);
356

357 358 359 360
	/* set a timelimit on the disengage phase of the deactivation request */
	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
	part->disengage_timer.expires = part->disengage_timeout;
	add_timer(&part->disengage_timer);
361

362 363
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
364

365
	xpc_partition_going_down(part, reason);
366 367 368
}

/*
369
 * Mark specified partition as inactive.
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
399
	u64 remote_rp_pa;
400
	int region;
401
	int region_size;
402 403 404
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
405
	unsigned long *discovered_nasids;
406
	enum xp_retval ret;
407

408
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
409
						  xpc_nasid_mask_nbytes,
410
						  GFP_KERNEL, &remote_rp_base);
411
	if (remote_rp == NULL)
412
		return;
413

414
	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
415
				    GFP_KERNEL);
416 417 418 419 420
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

421
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
422 423 424 425 426 427

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
428 429 430 431 432 433 434 435 436 437 438 439 440
	max_regions = 64;
	region_size = sn_region_size;

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
441 442 443

	for (region = 0; region < max_regions; region++) {

444
		if (xpc_exiting)
445 446 447 448
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

449
		for (nasid = (region * region_size * 2);
450
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
451

452
			if (xpc_exiting)
453 454 455 456
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

457
			if (test_bit(nasid / 2, xpc_part_nasids)) {
458 459 460 461 462 463
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

464
			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
465 466 467 468 469 470
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

471
			if (test_bit(nasid / 2, discovered_nasids)) {
472 473 474 475 476 477
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

478
			/* pull over the rsvd page header & part_nasids mask */
479 480

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
481
						remote_rp, &remote_rp_pa);
482
			if (ret != xpSuccess) {
483 484 485 486
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

487
				if (ret == xpLocalPartid)
488
					break;
489

490 491 492
				continue;
			}

493 494
			xpc_request_partition_activation(remote_rp,
							 remote_rp_pa, nasid);
495 496 497 498 499 500 501 502 503
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
504
 * remote partition's reserved page.
505
 */
506
enum xp_retval
507
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
508 509 510 511 512
{
	struct xpc_partition *part;
	u64 part_nasid_pa;

	part = &xpc_partitions[partid];
513
	if (part->remote_rp_pa == 0)
514
		return xpPartitionDown;
515

516
	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
517

518
	part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
519

520
	return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
521
				xpc_nasid_mask_nbytes);
522
}