spu_base.c 14.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Low-level SPU handling
 *
 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
 *
 * Author: Arnd Bergmann <arndb@de.ibm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

23
#undef DEBUG
24 25 26 27 28 29 30

#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/wait.h>
31 32
#include <linux/mm.h>
#include <linux/io.h>
33
#include <linux/mutex.h>
34
#include <asm/spu.h>
35
#include <asm/spu_priv1.h>
36
#include <asm/xmon.h>
37

38
const struct spu_management_ops *spu_management_ops;
39 40
EXPORT_SYMBOL_GPL(spu_management_ops);

41 42
const struct spu_priv1_ops *spu_priv1_ops;

43 44 45
static struct list_head spu_list[MAX_NUMNODES];
static LIST_HEAD(spu_full_list);
static DEFINE_MUTEX(spu_mutex);
46
static DEFINE_SPINLOCK(spu_list_lock);
47

48 49
EXPORT_SYMBOL_GPL(spu_priv1_ops);

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
void spu_invalidate_slbs(struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
		out_be64(&priv2->slb_invalidate_all_W, 0UL);
}
EXPORT_SYMBOL_GPL(spu_invalidate_slbs);

/* This is called by the MM core when a segment size is changed, to
 * request a flush of all the SPEs using a given mm
 */
void spu_flush_all_slbs(struct mm_struct *mm)
{
	struct spu *spu;
	unsigned long flags;

	spin_lock_irqsave(&spu_list_lock, flags);
	list_for_each_entry(spu, &spu_full_list, full_list) {
		if (spu->mm == mm)
			spu_invalidate_slbs(spu);
	}
	spin_unlock_irqrestore(&spu_list_lock, flags);
}

/* The hack below stinks... try to do something better one of
 * these days... Does it even work properly with NR_CPUS == 1 ?
 */
static inline void mm_needs_global_tlbie(struct mm_struct *mm)
{
	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;

	/* Global TLBIE broadcast required with SPEs. */
	__cpus_setall(&mm->cpu_vm_mask, nr);
}

void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
{
	unsigned long flags;

	spin_lock_irqsave(&spu_list_lock, flags);
	spu->mm = mm;
	spin_unlock_irqrestore(&spu_list_lock, flags);
	if (mm)
		mm_needs_global_tlbie(mm);
}
EXPORT_SYMBOL_GPL(spu_associate_mm);

98 99 100
static int __spu_trap_invalid_dma(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
101
	spu->dma_callback(spu, SPE_EVENT_INVALID_DMA);
102 103 104 105 106 107
	return 0;
}

static int __spu_trap_dma_align(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
108
	spu->dma_callback(spu, SPE_EVENT_DMA_ALIGNMENT);
109 110 111 112 113 114
	return 0;
}

static int __spu_trap_error(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
115
	spu->dma_callback(spu, SPE_EVENT_SPE_ERROR);
116 117 118 119 120 121
	return 0;
}

static void spu_restart_dma(struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
122

123
	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
124
		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
125 126 127 128
}

static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
{
129 130
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	struct mm_struct *mm = spu->mm;
131
	u64 esid, vsid, llp;
132
	int psize;
133 134 135

	pr_debug("%s\n", __FUNCTION__);

136
	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
137 138 139
		/* SLBs are pre-loaded for context switch, so
		 * we should never get here!
		 */
140 141 142
		printk("%s: invalid access during switch!\n", __func__);
		return 1;
	}
143 144 145 146
	esid = (ea & ESID_MASK) | SLB_ESID_V;

	switch(REGION_ID(ea)) {
	case USER_REGION_ID:
147 148 149 150
#ifdef CONFIG_PPC_MM_SLICES
		psize = get_slice_psize(mm, ea);
#else
		psize = mm->context.user_psize;
151 152
#endif
		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
153
				SLB_VSID_USER;
154 155
		break;
	case VMALLOC_REGION_ID:
156 157 158 159
		if (ea < VMALLOC_END)
			psize = mmu_vmalloc_psize;
		else
			psize = mmu_io_psize;
160
		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
161
			SLB_VSID_KERNEL;
162 163
		break;
	case KERNEL_REGION_ID:
164
		psize = mmu_linear_psize;
165
		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
166
			SLB_VSID_KERNEL;
167 168
		break;
	default:
169 170 171
		/* Future: support kernel segments so that drivers
		 * can use SPUs.
		 */
172 173 174
		pr_debug("invalid region access at %016lx\n", ea);
		return 1;
	}
175
	llp = mmu_psize_defs[psize].sllp;
176

177
	out_be64(&priv2->slb_index_W, spu->slb_replace);
178
	out_be64(&priv2->slb_vsid_RW, vsid | llp);
179 180 181
	out_be64(&priv2->slb_esid_RW, esid);

	spu->slb_replace++;
182 183 184 185
	if (spu->slb_replace >= 8)
		spu->slb_replace = 0;

	spu_restart_dma(spu);
186
	spu->stats.slb_flt++;
187 188 189
	return 0;
}

190
extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
191
static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
192
{
193
	pr_debug("%s, %lx, %lx\n", __FUNCTION__, dsisr, ea);
194

195 196 197 198 199 200 201 202 203
	/* Handle kernel space hash faults immediately.
	   User hash faults need to be deferred to process context. */
	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND)
	    && REGION_ID(ea) != USER_REGION_ID
	    && hash_page(ea, _PAGE_PRESENT, 0x300) == 0) {
		spu_restart_dma(spu);
		return 0;
	}

204
	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
205 206 207
		printk("%s: invalid access during switch!\n", __func__);
		return 1;
	}
208

209 210 211
	spu->dar = ea;
	spu->dsisr = dsisr;
	mb();
212
	spu->stop_callback(spu);
213 214 215 216
	return 0;
}

static irqreturn_t
217
spu_irq_class_0(int irq, void *data)
218 219 220 221 222
{
	struct spu *spu;

	spu = data;
	spu->class_0_pending = 1;
223
	spu->stop_callback(spu);
224 225 226 227

	return IRQ_HANDLED;
}

228
int
229 230
spu_irq_class_0_bottom(struct spu *spu)
{
231
	unsigned long stat, mask;
232
	unsigned long flags;
233 234 235

	spu->class_0_pending = 0;

236
	spin_lock_irqsave(&spu->register_lock, flags);
237 238
	mask = spu_int_mask_get(spu, 0);
	stat = spu_int_stat_get(spu, 0);
239

240 241
	stat &= mask;

242
	if (stat & 1) /* invalid DMA alignment */
243 244
		__spu_trap_dma_align(spu);

245 246 247
	if (stat & 2) /* invalid MFC DMA */
		__spu_trap_invalid_dma(spu);

248 249 250
	if (stat & 4) /* error on SPU */
		__spu_trap_error(spu);

251
	spu_int_stat_clear(spu, 0, stat);
252
	spin_unlock_irqrestore(&spu->register_lock, flags);
253 254

	return (stat & 0x7) ? -EIO : 0;
255
}
256
EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom);
257 258

static irqreturn_t
259
spu_irq_class_1(int irq, void *data)
260 261
{
	struct spu *spu;
262
	unsigned long stat, mask, dar, dsisr;
263 264

	spu = data;
265 266 267

	/* atomically read & clear class1 status. */
	spin_lock(&spu->register_lock);
268 269 270 271
	mask  = spu_int_mask_get(spu, 1);
	stat  = spu_int_stat_get(spu, 1) & mask;
	dar   = spu_mfc_dar_get(spu);
	dsisr = spu_mfc_dsisr_get(spu);
272
	if (stat & 2) /* mapping fault */
273 274
		spu_mfc_dsisr_set(spu, 0ul);
	spu_int_stat_clear(spu, 1, stat);
275
	spin_unlock(&spu->register_lock);
276 277
	pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat,
			dar, dsisr);
278 279 280 281 282

	if (stat & 1) /* segment fault */
		__spu_trap_data_seg(spu, dar);

	if (stat & 2) { /* mapping fault */
283
		__spu_trap_data_map(spu, dar, dsisr);
284 285 286 287 288 289 290 291 292 293 294 295
	}

	if (stat & 4) /* ls compare & suspend on get */
		;

	if (stat & 8) /* ls compare & suspend on put */
		;

	return stat ? IRQ_HANDLED : IRQ_NONE;
}

static irqreturn_t
296
spu_irq_class_2(int irq, void *data)
297 298 299
{
	struct spu *spu;
	unsigned long stat;
300
	unsigned long mask;
301 302

	spu = data;
303
	spin_lock(&spu->register_lock);
304 305
	stat = spu_int_stat_get(spu, 2);
	mask = spu_int_mask_get(spu, 2);
306 307 308 309 310 311 312 313 314 315 316
	/* ignore interrupts we're not waiting for */
	stat &= mask;
	/*
	 * mailbox interrupts (0x1 and 0x10) are level triggered.
	 * mask them now before acknowledging.
	 */
	if (stat & 0x11)
		spu_int_mask_and(spu, 2, ~(stat & 0x11));
	/* acknowledge all interrupts before the callbacks */
	spu_int_stat_clear(spu, 2, stat);
	spin_unlock(&spu->register_lock);
317

318
	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
319 320

	if (stat & 1)  /* PPC core mailbox */
321
		spu->ibox_callback(spu);
322 323

	if (stat & 2) /* SPU stop-and-signal */
324
		spu->stop_callback(spu);
325 326

	if (stat & 4) /* SPU halted */
327
		spu->stop_callback(spu);
328 329

	if (stat & 8) /* DMA tag group complete */
330
		spu->mfc_callback(spu);
331 332

	if (stat & 0x10) /* SPU mailbox threshold */
333
		spu->wbox_callback(spu);
334

335
	spu->stats.class2_intr++;
336 337 338
	return stat ? IRQ_HANDLED : IRQ_NONE;
}

339
static int spu_request_irqs(struct spu *spu)
340
{
341
	int ret = 0;
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
	if (spu->irqs[0] != NO_IRQ) {
		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
			 spu->number);
		ret = request_irq(spu->irqs[0], spu_irq_class_0,
				  IRQF_DISABLED,
				  spu->irq_c0, spu);
		if (ret)
			goto bail0;
	}
	if (spu->irqs[1] != NO_IRQ) {
		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
			 spu->number);
		ret = request_irq(spu->irqs[1], spu_irq_class_1,
				  IRQF_DISABLED,
				  spu->irq_c1, spu);
		if (ret)
			goto bail1;
	}
	if (spu->irqs[2] != NO_IRQ) {
		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
			 spu->number);
		ret = request_irq(spu->irqs[2], spu_irq_class_2,
				  IRQF_DISABLED,
				  spu->irq_c2, spu);
		if (ret)
			goto bail2;
	}
	return 0;
371

372 373 374 375 376 377 378
bail2:
	if (spu->irqs[1] != NO_IRQ)
		free_irq(spu->irqs[1], spu);
bail1:
	if (spu->irqs[0] != NO_IRQ)
		free_irq(spu->irqs[0], spu);
bail0:
379 380 381
	return ret;
}

382
static void spu_free_irqs(struct spu *spu)
383
{
384 385 386 387 388 389
	if (spu->irqs[0] != NO_IRQ)
		free_irq(spu->irqs[0], spu);
	if (spu->irqs[1] != NO_IRQ)
		free_irq(spu->irqs[1], spu);
	if (spu->irqs[2] != NO_IRQ)
		free_irq(spu->irqs[2], spu);
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
}

static void spu_init_channels(struct spu *spu)
{
	static const struct {
		 unsigned channel;
		 unsigned count;
	} zero_list[] = {
		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
	}, count_list[] = {
		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
	};
405
	struct spu_priv2 __iomem *priv2;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
	int i;

	priv2 = spu->priv2;

	/* initialize all channel data to zero */
	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
		int count;

		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
		for (count = 0; count < zero_list[i].count; count++)
			out_be64(&priv2->spu_chnldata_RW, 0);
	}

	/* initialize channel counts to meaningful values */
	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
	}
}

426
struct spu *spu_alloc_node(int node)
427
{
428
	struct spu *spu = NULL;
429

430
	mutex_lock(&spu_mutex);
431 432
	if (!list_empty(&spu_list[node])) {
		spu = list_entry(spu_list[node].next, struct spu, list);
433
		list_del_init(&spu->list);
434
		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
435
	}
436
	mutex_unlock(&spu_mutex);
437

438 439
	if (spu)
		spu_init_channels(spu);
440 441 442 443 444 445 446 447 448 449 450 451 452 453
	return spu;
}
EXPORT_SYMBOL_GPL(spu_alloc_node);

struct spu *spu_alloc(void)
{
	struct spu *spu = NULL;
	int node;

	for (node = 0; node < MAX_NUMNODES; node++) {
		spu = spu_alloc_node(node);
		if (spu)
			break;
	}
454 455 456 457 458 459

	return spu;
}

void spu_free(struct spu *spu)
{
460
	mutex_lock(&spu_mutex);
461
	list_add_tail(&spu->list, &spu_list[spu->node]);
462
	mutex_unlock(&spu_mutex);
463
}
464
EXPORT_SYMBOL_GPL(spu_free);
465

466 467 468 469 470 471 472 473 474
static int spu_shutdown(struct sys_device *sysdev)
{
	struct spu *spu = container_of(sysdev, struct spu, sysdev);

	spu_free_irqs(spu);
	spu_destroy_spu(spu);
	return 0;
}

475
struct sysdev_class spu_sysdev_class = {
476 477
	set_kset_name("spu"),
	.shutdown = spu_shutdown,
478 479
};

480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
int spu_add_sysdev_attr(struct sysdev_attribute *attr)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysdev_create_file(&spu->sysdev, attr);

	mutex_unlock(&spu_mutex);
	return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);

int spu_add_sysdev_attr_group(struct attribute_group *attrs)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysfs_create_group(&spu->sysdev.kobj, attrs);

	mutex_unlock(&spu_mutex);
	return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);


void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysdev_remove_file(&spu->sysdev, attr);

	mutex_unlock(&spu_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);

void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysfs_remove_group(&spu->sysdev.kobj, attrs);

	mutex_unlock(&spu_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);

531 532 533 534 535 536 537 538 539 540 541 542 543
static int spu_create_sysdev(struct spu *spu)
{
	int ret;

	spu->sysdev.id = spu->number;
	spu->sysdev.cls = &spu_sysdev_class;
	ret = sysdev_register(&spu->sysdev);
	if (ret) {
		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
				spu->number);
		return ret;
	}

544
	sysfs_add_device_to_node(&spu->sysdev, spu->node);
545 546 547 548

	return 0;
}

549
static int __init create_spu(void *data)
550 551 552 553
{
	struct spu *spu;
	int ret;
	static int number;
554
	unsigned long flags;
555 556

	ret = -ENOMEM;
557
	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
558 559 560
	if (!spu)
		goto out;

561 562 563 564 565 566
	spin_lock_init(&spu->register_lock);
	mutex_lock(&spu_mutex);
	spu->number = number++;
	mutex_unlock(&spu_mutex);

	ret = spu_create_spu(spu, data);
567

568 569 570
	if (ret)
		goto out_free;

571
	spu_mfc_sdr_setup(spu);
572
	spu_mfc_sr1_set(spu, 0x33);
573 574
	ret = spu_request_irqs(spu);
	if (ret)
575
		goto out_destroy;
576

577 578 579 580
	ret = spu_create_sysdev(spu);
	if (ret)
		goto out_free_irqs;

581
	mutex_lock(&spu_mutex);
582
	spin_lock_irqsave(&spu_list_lock, flags);
583
	list_add(&spu->list, &spu_list[spu->node]);
584
	list_add(&spu->full_list, &spu_full_list);
585
	spin_unlock_irqrestore(&spu_list_lock, flags);
586
	mutex_unlock(&spu_mutex);
587 588 589

	goto out;

590 591
out_free_irqs:
	spu_free_irqs(spu);
592 593
out_destroy:
	spu_destroy_spu(spu);
594 595 596 597 598 599 600 601
out_free:
	kfree(spu);
out:
	return ret;
}

static int __init init_spu_base(void)
{
602
	int i, ret = 0;
603

604 605 606
	for (i = 0; i < MAX_NUMNODES; i++)
		INIT_LIST_HEAD(&spu_list[i]);

607
	if (!spu_management_ops)
608
		goto out;
609

610 611 612
	/* create sysdev class for spus */
	ret = sysdev_class_register(&spu_sysdev_class);
	if (ret)
613
		goto out;
614

615 616 617 618 619
	ret = spu_enumerate_spus(create_spu);

	if (ret) {
		printk(KERN_WARNING "%s: Error initializing spus\n",
			__FUNCTION__);
620
		goto out_unregister_sysdev_class;
621
	}
622 623 624

	xmon_register_spus(&spu_full_list);

625 626 627 628 629 630
	return 0;

 out_unregister_sysdev_class:
	sysdev_class_unregister(&spu_sysdev_class);
 out:

631 632 633 634 635 636
	return ret;
}
module_init(init_spu_base);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");