sn2_smp.c 12.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7
/*
 * SN2 Platform specific SMP Support
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
8
 * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22
 */

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/mmzone.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/nodemask.h>
23 24
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
L
Linus Torvalds 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/sal.h>
#include <asm/system.h>
#include <asm/delay.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/tlb.h>
#include <asm/numa.h>
#include <asm/hw_irq.h>
#include <asm/current.h>
#include <asm/sn/sn_cpuid.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/addrs.h>
#include <asm/sn/shub_mmr.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/rw_mmr.h>

44 45
DEFINE_PER_CPU(struct ptc_stats, ptcstats);
DECLARE_PER_CPU(struct ptc_stats, ptcstats);
L
Linus Torvalds 已提交
46 47 48

static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);

49 50 51 52 53 54 55 56
extern unsigned long
sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
			       volatile unsigned long *, unsigned long,
			       volatile unsigned long *, unsigned long);
void
sn2_ptc_deadlock_recovery(short *, short, short, int,
			  volatile unsigned long *, unsigned long,
			  volatile unsigned long *, unsigned long);
57 58

/*
59 60
 * Note: some is the following is captured here to make degugging easier
 * (the macros make more sense if you see the debug patch - not posted)
61 62
 */
#define sn2_ptctest	0
63 64 65 66
#define local_node_uses_ptc_ga(sh1)	((sh1) ? 1 : 0)
#define max_active_pio(sh1)		((sh1) ? 32 : 7)
#define reset_max_active_on_deadlock()	1
#define PTC_LOCK(sh1)			((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
67 68 69 70 71 72 73

struct ptc_stats {
	unsigned long ptc_l;
	unsigned long change_rid;
	unsigned long shub_ptc_flushes;
	unsigned long nodes_flushed;
	unsigned long deadlocks;
74
	unsigned long deadlocks2;
75 76 77
	unsigned long lock_itc_clocks;
	unsigned long shub_itc_clocks;
	unsigned long shub_itc_clocks_max;
78
	unsigned long shub_ptc_flushes_not_my_mm;
79
};
L
Linus Torvalds 已提交
80

81 82
#define sn2_ptctest	0

L
Linus Torvalds 已提交
83 84
static inline unsigned long wait_piowc(void)
{
85 86
	volatile unsigned long *piows;
	unsigned long zeroval, ws;
L
Linus Torvalds 已提交
87 88 89 90 91 92

	piows = pda->pio_write_status_addr;
	zeroval = pda->pio_write_status_val;
	do {
		cpu_relax();
	} while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
93
	return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
L
Linus Torvalds 已提交
94 95 96 97
}

void sn_tlb_migrate_finish(struct mm_struct *mm)
{
98 99
	/* flush_tlb_mm is inefficient if more than 1 users of mm */
	if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
L
Linus Torvalds 已提交
100 101 102 103 104
		flush_tlb_mm(mm);
}

/**
 * sn2_global_tlb_purge - globally purge translation cache of virtual address range
105
 * @mm: mm_struct containing virtual address range
L
Linus Torvalds 已提交
106 107 108 109 110 111 112 113 114 115 116
 * @start: start of virtual address range
 * @end: end of virtual address range
 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
 *
 * Purges the translation caches of all processors of the given virtual address
 * range.
 *
 * Note:
 * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
 * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
 * 	  cpus in cpu_vm_mask.
117 118 119 120
 *	- if only one bit is set in cpu_vm_mask & it is the current cpu & the
 *	  process is purging its own virtual address range, then only the
 *	  local TLB needs to be flushed. This flushing can be done using
 *	  ptc.l. This is the common case & avoids the global spinlock.
L
Linus Torvalds 已提交
121 122 123 124 125
 *	- if multiple cpus have loaded the context, then flushing has to be
 *	  done with ptc.g/MMRs under protection of the global ptc_lock.
 */

void
126 127
sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
		     unsigned long end, unsigned long nbits)
L
Linus Torvalds 已提交
128
{
129 130 131
	int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
	int mymm = (mm == current->active_mm && mm == current->mm);
	int use_cpu_ptcga;
L
Linus Torvalds 已提交
132
	volatile unsigned long *ptc0, *ptc1;
133
	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
L
Linus Torvalds 已提交
134 135
	short nasids[MAX_NUMNODES], nix;
	nodemask_t nodes_flushed;
136
	int active, max_active, deadlock;
L
Linus Torvalds 已提交
137 138 139 140 141 142 143 144 145 146 147

	nodes_clear(nodes_flushed);
	i = 0;

	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
		cnode = cpu_to_node(cpu);
		node_set(cnode, nodes_flushed);
		lcpu = cpu;
		i++;
	}

148 149 150
	if (i == 0)
		return;

L
Linus Torvalds 已提交
151 152
	preempt_disable();

153
	if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
L
Linus Torvalds 已提交
154 155 156 157 158
		do {
			ia64_ptcl(start, nbits << 2);
			start += (1UL << nbits);
		} while (start < end);
		ia64_srlz_i();
159
		__get_cpu_var(ptcstats).ptc_l++;
L
Linus Torvalds 已提交
160 161 162 163
		preempt_enable();
		return;
	}

164
	if (atomic_read(&mm->mm_users) == 1 && mymm) {
L
Linus Torvalds 已提交
165
		flush_tlb_mm(mm);
166
		__get_cpu_var(ptcstats).change_rid++;
L
Linus Torvalds 已提交
167 168 169 170
		preempt_enable();
		return;
	}

171
	itc = ia64_get_itc();
L
Linus Torvalds 已提交
172 173 174 175
	nix = 0;
	for_each_node_mask(cnode, nodes_flushed)
		nasids[nix++] = cnodeid_to_nasid(cnode);

176 177
	rr_value = (mm->context << 3) | REGION_NUMBER(start);

L
Linus Torvalds 已提交
178 179 180 181
	shub1 = is_shub1();
	if (shub1) {
		data0 = (1UL << SH1_PTC_0_A_SHFT) |
		    	(nbits << SH1_PTC_0_PS_SHFT) |
182
			(rr_value << SH1_PTC_0_RID_SHFT) |
L
Linus Torvalds 已提交
183 184 185 186 187 188 189 190
		    	(1UL << SH1_PTC_0_START_SHFT);
		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
		ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
	} else {
		data0 = (1UL << SH2_PTC_A_SHFT) |
			(nbits << SH2_PTC_PS_SHFT) |
		    	(1UL << SH2_PTC_START_SHFT);
		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
191
			(rr_value << SH2_PTC_RID_SHFT));
L
Linus Torvalds 已提交
192 193 194 195 196
		ptc1 = NULL;
	}
	

	mynasid = get_nasid();
197 198
	use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
	max_active = max_active_pio(shub1);
L
Linus Torvalds 已提交
199

200
	itc = ia64_get_itc();
201
	spin_lock_irqsave(PTC_LOCK(shub1), flags);
202
	itc2 = ia64_get_itc();
203

204 205 206
	__get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
	__get_cpu_var(ptcstats).shub_ptc_flushes++;
	__get_cpu_var(ptcstats).nodes_flushed += nix;
207 208
	if (!mymm)
		 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
L
Linus Torvalds 已提交
209

210 211 212 213 214 215 216
	if (use_cpu_ptcga && !mymm) {
		old_rr = ia64_get_rr(start);
		ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
		ia64_srlz_d();
	}

	wait_piowc();
L
Linus Torvalds 已提交
217 218 219 220 221
	do {
		if (shub1)
			data1 = start | (1UL << SH1_PTC_1_START_SHFT);
		else
			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
222 223 224
		deadlock = 0;
		active = 0;
		for (ibegin = 0, i = 0; i < nix; i++) {
L
Linus Torvalds 已提交
225
			nasid = nasids[i];
226
			if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
L
Linus Torvalds 已提交
227 228 229 230 231 232
				ia64_ptcga(start, nbits << 2);
				ia64_srlz_i();
			} else {
				ptc0 = CHANGE_NASID(nasid, ptc0);
				if (ptc1)
					ptc1 = CHANGE_NASID(nasid, ptc1);
233 234 235 236 237 238 239 240 241 242 243
				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
				active++;
			}
			if (active >= max_active || i == (nix - 1)) {
				if ((deadlock = wait_piowc())) {
					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
					if (reset_max_active_on_deadlock())
						max_active = 1;
				}
				active = 0;
				ibegin = i + 1;
L
Linus Torvalds 已提交
244 245 246 247 248
			}
		}
		start += (1UL << nbits);
	} while (start < end);

249 250 251 252 253
	itc2 = ia64_get_itc() - itc2;
	__get_cpu_var(ptcstats).shub_itc_clocks += itc2;
	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
		__get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;

254 255 256 257 258
	if (old_rr) {
		ia64_set_rr(start, old_rr);
		ia64_srlz_d();
	}

259
	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
L
Linus Torvalds 已提交
260 261 262 263 264 265 266 267 268 269 270

	preempt_enable();
}

/*
 * sn2_ptc_deadlock_recovery
 *
 * Recover from PTC deadlocks conditions. Recovery requires stepping thru each 
 * TLB flush transaction.  The recovery sequence is somewhat tricky & is
 * coded in assembly language.
 */
271 272 273 274 275

void
sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
			  volatile unsigned long *ptc0, unsigned long data0,
			  volatile unsigned long *ptc1, unsigned long data1)
L
Linus Torvalds 已提交
276
{
277
	short nasid, i;
278
	unsigned long *piows, zeroval, n;
L
Linus Torvalds 已提交
279

280
	__get_cpu_var(ptcstats).deadlocks++;
L
Linus Torvalds 已提交
281

282
	piows = (unsigned long *) pda->pio_write_status_addr;
L
Linus Torvalds 已提交
283 284
	zeroval = pda->pio_write_status_val;

285 286

	for (i=ib; i <= ie; i++) {
287
		nasid = nasids[i];
288
		if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
L
Linus Torvalds 已提交
289 290 291 292
			continue;
		ptc0 = CHANGE_NASID(nasid, ptc0);
		if (ptc1)
			ptc1 = CHANGE_NASID(nasid, ptc1);
293 294 295

		n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
		__get_cpu_var(ptcstats).deadlocks2 += n;
L
Linus Torvalds 已提交
296
	}
297

L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
}

/**
 * sn_send_IPI_phys - send an IPI to a Nasid and slice
 * @nasid: nasid to receive the interrupt (may be outside partition)
 * @physid: physical cpuid to receive the interrupt.
 * @vector: command to send
 * @delivery_mode: delivery mechanism
 *
 * Sends an IPI (interprocessor interrupt) to the processor specified by
 * @physid
 *
 * @delivery_mode can be one of the following
 *
 * %IA64_IPI_DM_INT - pend an interrupt
 * %IA64_IPI_DM_PMI - pend a PMI
 * %IA64_IPI_DM_NMI - pend an NMI
 * %IA64_IPI_DM_INIT - pend an INIT interrupt
 */
void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
{
	long val;
	unsigned long flags = 0;
	volatile long *p;

	p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
	val = (1UL << SH_IPI_INT_SEND_SHFT) |
	    (physid << SH_IPI_INT_PID_SHFT) |
	    ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
	    ((long)vector << SH_IPI_INT_IDX_SHFT) |
	    (0x000feeUL << SH_IPI_INT_BASE_SHFT);

	mb();
	if (enable_shub_wars_1_1()) {
		spin_lock_irqsave(&sn2_global_ptc_lock, flags);
	}
	pio_phys_write_mmr(p, val);
	if (enable_shub_wars_1_1()) {
		wait_piowc();
		spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
	}

}

EXPORT_SYMBOL(sn_send_IPI_phys);

/**
 * sn2_send_IPI - send an IPI to a processor
 * @cpuid: target of the IPI
 * @vector: command to send
 * @delivery_mode: delivery mechanism
 * @redirect: redirect the IPI?
 *
 * Sends an IPI (InterProcessor Interrupt) to the processor specified by
 * @cpuid.  @vector specifies the command to send, while @delivery_mode can 
 * be one of the following
 *
 * %IA64_IPI_DM_INT - pend an interrupt
 * %IA64_IPI_DM_PMI - pend a PMI
 * %IA64_IPI_DM_NMI - pend an NMI
 * %IA64_IPI_DM_INIT - pend an INIT interrupt
 */
void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
{
	long physid;
	int nasid;

	physid = cpu_physical_id(cpuid);
	nasid = cpuid_to_nasid(cpuid);

	/* the following is used only when starting cpus at boot time */
	if (unlikely(nasid == -1))
		ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);

	sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
}
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405

#ifdef CONFIG_PROC_FS

#define PTC_BASENAME	"sgi_sn/ptc_statistics"

static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
{
	if (*offset < NR_CPUS)
		return offset;
	return NULL;
}

static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
{
	(*offset)++;
	if (*offset < NR_CPUS)
		return offset;
	return NULL;
}

static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
{
}

static int sn2_ptc_seq_show(struct seq_file *file, void *data)
{
	struct ptc_stats *stat;
	int cpu;

	cpu = *(loff_t *) data;

	if (!cpu) {
406 407
		seq_printf(file,
			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
408 409 410 411 412
		seq_printf(file, "# ptctest %d\n", sn2_ptctest);
	}

	if (cpu < NR_CPUS && cpu_online(cpu)) {
		stat = &per_cpu(ptcstats, cpu);
413
		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
414 415 416 417
				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
				stat->deadlocks,
				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
418 419 420
				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
				stat->shub_ptc_flushes_not_my_mm,
				stat->deadlocks2);
421 422 423 424 425 426 427 428 429 430 431
	}
	return 0;
}

static struct seq_operations sn2_ptc_seq_ops = {
	.start = sn2_ptc_seq_start,
	.next = sn2_ptc_seq_next,
	.stop = sn2_ptc_seq_stop,
	.show = sn2_ptc_seq_show
};

432
static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
{
	return seq_open(file, &sn2_ptc_seq_ops);
}

static struct file_operations proc_sn2_ptc_operations = {
	.open = sn2_ptc_proc_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};

static struct proc_dir_entry *proc_sn2_ptc;

static int __init sn2_ptc_init(void)
{
448 449 450
	if (!ia64_platform_is("sn2"))
		return -ENOSYS;

451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
	if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
		return -EINVAL;
	}
	proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
	spin_lock_init(&sn2_global_ptc_lock);
	return 0;
}

static void __exit sn2_ptc_exit(void)
{
	remove_proc_entry(PTC_BASENAME, NULL);
}

module_init(sn2_ptc_init);
module_exit(sn2_ptc_exit);
#endif /* CONFIG_PROC_FS */