xpc_partition.c 12.9 KB
Newer Older
1 2 3 4 5
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
6
 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7 8 9 10 11 12 13 14 15 16 17
 */

/*
 * Cross Partition Communication (XPC) partition support.
 *
 *	This is the part of XPC that detects the presence/absence of
 *	other partitions. It provides a heartbeat and monitors the
 *	heartbeats of other partitions.
 *
 */

18 19
#include <linux/device.h>
#include <linux/hardirq.h>
20
#include "xpc.h"
21 22 23 24

/* XPC is exiting flag */
int xpc_exiting;

25
/* this partition's reserved page pointers */
26
struct xpc_rsvd_page *xpc_rsvd_page;
27 28
static unsigned long *xpc_part_nasids;
unsigned long *xpc_mach_nasids;
29

30 31
static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
32

33
struct xpc_partition *xpc_partitions;
34

35 36 37
/*
 * Guarantee that the kmalloc'd memory is cacheline aligned.
 */
38
void *
39 40 41 42
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
	/* see if kmalloc will give us cachline aligned memory by default */
	*base = kmalloc(size, flags);
43
	if (*base == NULL)
44
		return NULL;
45 46

	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
47
		return *base;
48

49 50 51 52
	kfree(*base);

	/* nope, we'll have to do it ourselves */
	*base = kmalloc(size + L1_CACHE_BYTES, flags);
53
	if (*base == NULL)
54
		return NULL;
55

56
	return (void *)L1_CACHE_ALIGN((u64)*base);
57 58
}

59 60 61 62
/*
 * Given a nasid, get the physical address of the  partition's reserved page
 * for that nasid. This function returns 0 on any error.
 */
63
static unsigned long
64
xpc_get_rsvd_page_pa(int nasid)
65
{
66
	enum xp_retval ret;
67
	u64 cookie = 0;
68
	unsigned long rp_pa = nasid;	/* seed with nasid */
69
	size_t len = 0;
70 71
	size_t buf_len = 0;
	void *buf = buf;
72
	void *buf_base = NULL;
73 74 75

	while (1) {

76 77
		ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
						     &len);
78

79 80
		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
			"address=0x%016lx, len=0x%016lx\n", ret,
81
			(unsigned long)cookie, rp_pa, len);
82

83
		if (ret != xpNeedMoreInfo)
84 85
			break;

86
		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
87
		if (L1_CACHE_ALIGN(len) > buf_len) {
88
			kfree(buf_base);
89
			buf_len = L1_CACHE_ALIGN(len);
90 91
			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
							    &buf_base);
92 93
			if (buf_base == NULL) {
				dev_err(xpc_part, "unable to kmalloc "
94
					"len=0x%016lx\n", buf_len);
95
				ret = xpNoMemory;
96 97
				break;
			}
98 99
		}

100
		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len);
101 102
		if (ret != xpSuccess) {
			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
103 104 105 106
			break;
		}
	}

107
	kfree(buf_base);
108

109
	if (ret != xpSuccess)
110
		rp_pa = 0;
111

112
	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
113 114 115 116 117 118 119 120 121
	return rp_pa;
}

/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
122
xpc_setup_rsvd_page(void)
123 124
{
	struct xpc_rsvd_page *rp;
125
	unsigned long rp_pa;
126
	unsigned long new_ts_jiffies;
127 128 129

	/* get the local reserved page's address */

130
	preempt_disable();
131
	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
132
	preempt_enable();
133 134 135 136
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return NULL;
	}
137
	rp = (struct xpc_rsvd_page *)__va(rp_pa);
138

139 140 141 142
	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
143
	BUG_ON(rp->SAL_partid != xp_partition_id);
144 145 146 147 148

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
149 150 151 152
		return NULL;
	}

	rp->version = XPC_RP_VERSION;
153
	rp->max_npartitions = xp_max_npartitions;
154

155 156 157
	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
158
		rp->SAL_nasids_size = 128;
159
	}
160 161 162
	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
					      BITS_PER_BYTE);
163 164 165 166

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
167

168 169
	if (xpc_rsvd_page_init(rp) != xpSuccess)
		return NULL;
170 171

	/*
172
	 * Set timestamp of when reserved page was setup by XPC.
173 174 175
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
176 177 178 179
	new_ts_jiffies = jiffies;
	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
		new_ts_jiffies++;
	rp->ts_jiffies = new_ts_jiffies;
180 181 182 183 184

	return rp;
}

/*
185
 * Get a copy of a portion of the remote partition's rsvd page.
186 187
 *
 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
188 189
 * is large enough to contain a copy of their reserved page header and
 * part_nasids mask.
190
 */
191
enum xp_retval
192
xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
193
		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
194
{
195
	int l;
196
	enum xp_retval ret;
197 198 199

	/* get the reserved page's physical address */

200
	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
201
	if (*remote_rp_pa == 0)
202
		return xpNoRsvdPageAddr;
203

204
	/* pull over the reserved page header and part_nasids mask */
205
	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
206
			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
207 208
	if (ret != xpSuccess)
		return ret;
209 210

	if (discovered_nasids != NULL) {
211 212
		unsigned long *remote_part_nasids =
		    XPC_RP_PART_NASIDS(remote_rp);
213

214 215
		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
			discovered_nasids[l] |= remote_part_nasids[l];
216 217
	}

218 219
	/* zero timestamp indicates the reserved page has not been setup */
	if (remote_rp->ts_jiffies == 0)
220 221
		return xpRsvdPageNotSet;

222
	if (XPC_VERSION_MAJOR(remote_rp->version) !=
223
	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
224
		return xpBadVersion;
225 226
	}

227
	/* check that both remote and local partids are valid for each side */
228 229
	if (remote_rp->SAL_partid < 0 ||
	    remote_rp->SAL_partid >= xp_max_npartitions ||
230
	    remote_rp->max_npartitions <= xp_partition_id) {
231
		return xpInvalidPartid;
232 233
	}

234
	if (remote_rp->SAL_partid == xp_partition_id)
235
		return xpLocalPartid;
236

237
	return xpSuccess;
238 239
}

240
/*
241 242 243
 * See if the other side has responded to a partition deactivate request
 * from us. Though we requested the remote partition to deactivate with regard
 * to us, we really only need to wait for the other side to disengage from us.
244 245 246 247
 */
int
xpc_partition_disengaged(struct xpc_partition *part)
{
248
	short partid = XPC_PARTID(part);
249 250
	int disengaged;

251 252
	disengaged = !xpc_partition_engaged(partid);
	if (part->disengage_timeout) {
253
		if (!disengaged) {
254
			if (time_is_after_jiffies(part->disengage_timeout)) {
255 256 257 258 259
				/* timelimit hasn't been reached yet */
				return 0;
			}

			/*
260
			 * Other side hasn't responded to our deactivate
261 262 263
			 * request in a timely fashion, so assume it's dead.
			 */

264 265 266 267
			dev_info(xpc_part, "deactivate request to remote "
				 "partition %d timed out\n", partid);
			xpc_disengage_timedout = 1;
			xpc_assume_partition_disengaged(partid);
268 269
			disengaged = 1;
		}
270
		part->disengage_timeout = 0;
271 272

		/* cancel the timer function, provided it's not us */
273 274
		if (!in_interrupt())
			del_singleshot_timer_sync(&part->disengage_timer);
275 276

		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
277
			part->act_state != XPC_P_INACTIVE);
278
		if (part->act_state != XPC_P_INACTIVE)
279 280
			xpc_wakeup_channel_mgr(part);

281
		xpc_cancel_partition_deactivation_request(part);
282 283 284 285
	}
	return disengaged;
}

286 287 288
/*
 * Mark specified partition as active.
 */
289
enum xp_retval
290 291 292
xpc_mark_partition_active(struct xpc_partition *part)
{
	unsigned long irq_flags;
293
	enum xp_retval ret;
294 295 296 297 298 299

	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	if (part->act_state == XPC_P_ACTIVATING) {
		part->act_state = XPC_P_ACTIVE;
300
		ret = xpSuccess;
301
	} else {
302
		DBUG_ON(part->reason == xpSuccess);
303 304 305 306 307 308 309 310
		ret = part->reason;
	}
	spin_unlock_irqrestore(&part->act_lock, irq_flags);

	return ret;
}

/*
311
 * Start the process of deactivating the specified partition.
312 313 314
 */
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
315
			 enum xp_retval reason)
316 317 318 319 320 321 322 323
{
	unsigned long irq_flags;

	spin_lock_irqsave(&part->act_lock, irq_flags);

	if (part->act_state == XPC_P_INACTIVE) {
		XPC_SET_REASON(part, reason, line);
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
324
		if (reason == xpReactivating) {
325
			/* we interrupt ourselves to reactivate partition */
326
			xpc_request_partition_reactivation(part);
327 328 329 330
		}
		return;
	}
	if (part->act_state == XPC_P_DEACTIVATING) {
331 332
		if ((part->reason == xpUnloading && reason != xpUnloading) ||
		    reason == xpReactivating) {
333 334 335 336 337 338 339 340 341 342 343
			XPC_SET_REASON(part, reason, line);
		}
		spin_unlock_irqrestore(&part->act_lock, irq_flags);
		return;
	}

	part->act_state = XPC_P_DEACTIVATING;
	XPC_SET_REASON(part, reason, line);

	spin_unlock_irqrestore(&part->act_lock, irq_flags);

344 345
	/* ask remote partition to deactivate with regard to us */
	xpc_request_partition_deactivation(part);
346

347 348 349 350
	/* set a timelimit on the disengage phase of the deactivation request */
	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
	part->disengage_timer.expires = part->disengage_timeout;
	add_timer(&part->disengage_timer);
351

352 353
	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
		XPC_PARTID(part), reason);
354

355
	xpc_partition_going_down(part, reason);
356 357 358
}

/*
359
 * Mark specified partition as inactive.
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
 */
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
	unsigned long irq_flags;

	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
		XPC_PARTID(part));

	spin_lock_irqsave(&part->act_lock, irq_flags);
	part->act_state = XPC_P_INACTIVE;
	spin_unlock_irqrestore(&part->act_lock, irq_flags);
	part->remote_rp_pa = 0;
}

/*
 * SAL has provided a partition and machine mask.  The partition mask
 * contains a bit for each even nasid in our partition.  The machine
 * mask contains a bit for each even nasid in the entire machine.
 *
 * Using those two bit arrays, we can determine which nasids are
 * known in the machine.  Each should also have a reserved page
 * initialized if they are available for partitioning.
 */
void
xpc_discovery(void)
{
	void *remote_rp_base;
	struct xpc_rsvd_page *remote_rp;
389
	unsigned long remote_rp_pa;
390
	int region;
391
	int region_size;
392 393 394
	int max_regions;
	int nasid;
	struct xpc_rsvd_page *rp;
395
	unsigned long *discovered_nasids;
396
	enum xp_retval ret;
397

398
	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
399
						  xpc_nasid_mask_nbytes,
400
						  GFP_KERNEL, &remote_rp_base);
401
	if (remote_rp == NULL)
402
		return;
403

404
	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
405
				    GFP_KERNEL);
406 407 408 409 410
	if (discovered_nasids == NULL) {
		kfree(remote_rp_base);
		return;
	}

411
	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
412 413 414 415 416 417

	/*
	 * The term 'region' in this context refers to the minimum number of
	 * nodes that can comprise an access protection grouping. The access
	 * protection is in regards to memory, IOI and IPI.
	 */
418
	max_regions = 64;
419
	region_size = xp_region_size;
420 421 422 423 424 425 426 427 428 429 430

	switch (region_size) {
	case 128:
		max_regions *= 2;
	case 64:
		max_regions *= 2;
	case 32:
		max_regions *= 2;
		region_size = 16;
		DBUG_ON(!is_shub2());
	}
431 432 433

	for (region = 0; region < max_regions; region++) {

434
		if (xpc_exiting)
435 436 437 438
			break;

		dev_dbg(xpc_part, "searching region %d\n", region);

439
		for (nasid = (region * region_size * 2);
440
		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
441

442
			if (xpc_exiting)
443 444 445 446
				break;

			dev_dbg(xpc_part, "checking nasid %d\n", nasid);

447
			if (test_bit(nasid / 2, xpc_part_nasids)) {
448 449 450 451 452 453
				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
					"part of the local partition; skipping "
					"region\n", nasid);
				break;
			}

454
			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
455 456 457 458 459 460
				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
					"not on Numa-Link network at reset\n",
					nasid);
				continue;
			}

461
			if (test_bit(nasid / 2, discovered_nasids)) {
462 463 464 465 466 467
				dev_dbg(xpc_part, "Nasid %d is part of a "
					"partition which was previously "
					"discovered\n", nasid);
				continue;
			}

468
			/* pull over the rsvd page header & part_nasids mask */
469 470

			ret = xpc_get_remote_rp(nasid, discovered_nasids,
471
						remote_rp, &remote_rp_pa);
472
			if (ret != xpSuccess) {
473 474 475 476
				dev_dbg(xpc_part, "unable to get reserved page "
					"from nasid %d, reason=%d\n", nasid,
					ret);

477
				if (ret == xpLocalPartid)
478
					break;
479

480 481 482
				continue;
			}

483 484
			xpc_request_partition_activation(remote_rp,
							 remote_rp_pa, nasid);
485 486 487 488 489 490 491 492 493
		}
	}

	kfree(discovered_nasids);
	kfree(remote_rp_base);
}

/*
 * Given a partid, get the nasids owned by that partition from the
494
 * remote partition's reserved page.
495
 */
496
enum xp_retval
497
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
498 499
{
	struct xpc_partition *part;
500
	unsigned long part_nasid_pa;
501 502

	part = &xpc_partitions[partid];
503
	if (part->remote_rp_pa == 0)
504
		return xpPartitionDown;
505

506
	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
507

508
	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
509

510
	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
511
				xpc_nasid_mask_nbytes);
512
}