spu_base.c 14.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Low-level SPU handling
 *
 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
 *
 * Author: Arnd Bergmann <arndb@de.ibm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

23
#undef DEBUG
24 25 26 27 28 29 30

#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/wait.h>
31 32
#include <linux/mm.h>
#include <linux/io.h>
33
#include <linux/mutex.h>
34
#include <asm/spu.h>
35
#include <asm/spu_priv1.h>
36
#include <asm/xmon.h>
37

38
const struct spu_management_ops *spu_management_ops;
39 40
EXPORT_SYMBOL_GPL(spu_management_ops);

41 42
const struct spu_priv1_ops *spu_priv1_ops;

43 44 45 46 47
static struct list_head spu_list[MAX_NUMNODES];
static LIST_HEAD(spu_full_list);
static DEFINE_MUTEX(spu_mutex);
static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED;

48 49
EXPORT_SYMBOL_GPL(spu_priv1_ops);

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
void spu_invalidate_slbs(struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;

	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
		out_be64(&priv2->slb_invalidate_all_W, 0UL);
}
EXPORT_SYMBOL_GPL(spu_invalidate_slbs);

/* This is called by the MM core when a segment size is changed, to
 * request a flush of all the SPEs using a given mm
 */
void spu_flush_all_slbs(struct mm_struct *mm)
{
	struct spu *spu;
	unsigned long flags;

	spin_lock_irqsave(&spu_list_lock, flags);
	list_for_each_entry(spu, &spu_full_list, full_list) {
		if (spu->mm == mm)
			spu_invalidate_slbs(spu);
	}
	spin_unlock_irqrestore(&spu_list_lock, flags);
}

/* The hack below stinks... try to do something better one of
 * these days... Does it even work properly with NR_CPUS == 1 ?
 */
static inline void mm_needs_global_tlbie(struct mm_struct *mm)
{
	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;

	/* Global TLBIE broadcast required with SPEs. */
	__cpus_setall(&mm->cpu_vm_mask, nr);
}

void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
{
	unsigned long flags;

	spin_lock_irqsave(&spu_list_lock, flags);
	spu->mm = mm;
	spin_unlock_irqrestore(&spu_list_lock, flags);
	if (mm)
		mm_needs_global_tlbie(mm);
}
EXPORT_SYMBOL_GPL(spu_associate_mm);

98 99 100
static int __spu_trap_invalid_dma(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
101
	spu->dma_callback(spu, SPE_EVENT_INVALID_DMA);
102 103 104 105 106 107
	return 0;
}

static int __spu_trap_dma_align(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
108
	spu->dma_callback(spu, SPE_EVENT_DMA_ALIGNMENT);
109 110 111 112 113 114
	return 0;
}

static int __spu_trap_error(struct spu *spu)
{
	pr_debug("%s\n", __FUNCTION__);
115
	spu->dma_callback(spu, SPE_EVENT_SPE_ERROR);
116 117 118 119 120 121
	return 0;
}

static void spu_restart_dma(struct spu *spu)
{
	struct spu_priv2 __iomem *priv2 = spu->priv2;
122

123
	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
124
		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
125 126 127 128
}

static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
{
129 130
	struct spu_priv2 __iomem *priv2 = spu->priv2;
	struct mm_struct *mm = spu->mm;
131
	u64 esid, vsid, llp;
132
	int psize;
133 134 135

	pr_debug("%s\n", __FUNCTION__);

136
	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
137 138 139
		/* SLBs are pre-loaded for context switch, so
		 * we should never get here!
		 */
140 141 142
		printk("%s: invalid access during switch!\n", __func__);
		return 1;
	}
143 144 145 146 147 148
	esid = (ea & ESID_MASK) | SLB_ESID_V;

	switch(REGION_ID(ea)) {
	case USER_REGION_ID:
#ifdef CONFIG_HUGETLB_PAGE
		if (in_hugepage_area(mm->context, ea))
149
			psize = mmu_huge_psize;
150 151
		else
#endif
152
			psize = mm->context.user_psize;
153
		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
154
				SLB_VSID_USER;
155 156
		break;
	case VMALLOC_REGION_ID:
157 158 159 160
		if (ea < VMALLOC_END)
			psize = mmu_vmalloc_psize;
		else
			psize = mmu_io_psize;
161
		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
162
			SLB_VSID_KERNEL;
163 164
		break;
	case KERNEL_REGION_ID:
165
		psize = mmu_linear_psize;
166
		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
167
			SLB_VSID_KERNEL;
168 169
		break;
	default:
170 171 172
		/* Future: support kernel segments so that drivers
		 * can use SPUs.
		 */
173 174 175
		pr_debug("invalid region access at %016lx\n", ea);
		return 1;
	}
176
	llp = mmu_psize_defs[psize].sllp;
177

178
	out_be64(&priv2->slb_index_W, spu->slb_replace);
179
	out_be64(&priv2->slb_vsid_RW, vsid | llp);
180 181 182
	out_be64(&priv2->slb_esid_RW, esid);

	spu->slb_replace++;
183 184 185 186 187 188 189 190
	if (spu->slb_replace >= 8)
		spu->slb_replace = 0;

	spu_restart_dma(spu);

	return 0;
}

191
extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
192
static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
193
{
194
	pr_debug("%s, %lx, %lx\n", __FUNCTION__, dsisr, ea);
195

196 197 198 199 200 201 202 203 204
	/* Handle kernel space hash faults immediately.
	   User hash faults need to be deferred to process context. */
	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND)
	    && REGION_ID(ea) != USER_REGION_ID
	    && hash_page(ea, _PAGE_PRESENT, 0x300) == 0) {
		spu_restart_dma(spu);
		return 0;
	}

205
	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
206 207 208
		printk("%s: invalid access during switch!\n", __func__);
		return 1;
	}
209

210 211 212
	spu->dar = ea;
	spu->dsisr = dsisr;
	mb();
213
	spu->stop_callback(spu);
214 215 216 217
	return 0;
}

static irqreturn_t
218
spu_irq_class_0(int irq, void *data)
219 220 221 222 223
{
	struct spu *spu;

	spu = data;
	spu->class_0_pending = 1;
224
	spu->stop_callback(spu);
225 226 227 228

	return IRQ_HANDLED;
}

229
int
230 231
spu_irq_class_0_bottom(struct spu *spu)
{
232
	unsigned long stat, mask;
233
	unsigned long flags;
234 235 236

	spu->class_0_pending = 0;

237
	spin_lock_irqsave(&spu->register_lock, flags);
238 239
	mask = spu_int_mask_get(spu, 0);
	stat = spu_int_stat_get(spu, 0);
240

241 242
	stat &= mask;

243
	if (stat & 1) /* invalid DMA alignment */
244 245
		__spu_trap_dma_align(spu);

246 247 248
	if (stat & 2) /* invalid MFC DMA */
		__spu_trap_invalid_dma(spu);

249 250 251
	if (stat & 4) /* error on SPU */
		__spu_trap_error(spu);

252
	spu_int_stat_clear(spu, 0, stat);
253
	spin_unlock_irqrestore(&spu->register_lock, flags);
254 255

	return (stat & 0x7) ? -EIO : 0;
256
}
257
EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom);
258 259

static irqreturn_t
260
spu_irq_class_1(int irq, void *data)
261 262
{
	struct spu *spu;
263
	unsigned long stat, mask, dar, dsisr;
264 265

	spu = data;
266 267 268

	/* atomically read & clear class1 status. */
	spin_lock(&spu->register_lock);
269 270 271 272
	mask  = spu_int_mask_get(spu, 1);
	stat  = spu_int_stat_get(spu, 1) & mask;
	dar   = spu_mfc_dar_get(spu);
	dsisr = spu_mfc_dsisr_get(spu);
273
	if (stat & 2) /* mapping fault */
274 275
		spu_mfc_dsisr_set(spu, 0ul);
	spu_int_stat_clear(spu, 1, stat);
276
	spin_unlock(&spu->register_lock);
277 278
	pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat,
			dar, dsisr);
279 280 281 282 283

	if (stat & 1) /* segment fault */
		__spu_trap_data_seg(spu, dar);

	if (stat & 2) { /* mapping fault */
284
		__spu_trap_data_map(spu, dar, dsisr);
285 286 287 288 289 290 291 292 293 294 295 296
	}

	if (stat & 4) /* ls compare & suspend on get */
		;

	if (stat & 8) /* ls compare & suspend on put */
		;

	return stat ? IRQ_HANDLED : IRQ_NONE;
}

static irqreturn_t
297
spu_irq_class_2(int irq, void *data)
298 299 300
{
	struct spu *spu;
	unsigned long stat;
301
	unsigned long mask;
302 303

	spu = data;
304
	spin_lock(&spu->register_lock);
305 306
	stat = spu_int_stat_get(spu, 2);
	mask = spu_int_mask_get(spu, 2);
307 308 309 310 311 312 313 314 315 316 317
	/* ignore interrupts we're not waiting for */
	stat &= mask;
	/*
	 * mailbox interrupts (0x1 and 0x10) are level triggered.
	 * mask them now before acknowledging.
	 */
	if (stat & 0x11)
		spu_int_mask_and(spu, 2, ~(stat & 0x11));
	/* acknowledge all interrupts before the callbacks */
	spu_int_stat_clear(spu, 2, stat);
	spin_unlock(&spu->register_lock);
318

319
	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
320 321

	if (stat & 1)  /* PPC core mailbox */
322
		spu->ibox_callback(spu);
323 324

	if (stat & 2) /* SPU stop-and-signal */
325
		spu->stop_callback(spu);
326 327

	if (stat & 4) /* SPU halted */
328
		spu->stop_callback(spu);
329 330

	if (stat & 8) /* DMA tag group complete */
331
		spu->mfc_callback(spu);
332 333

	if (stat & 0x10) /* SPU mailbox threshold */
334
		spu->wbox_callback(spu);
335 336 337 338

	return stat ? IRQ_HANDLED : IRQ_NONE;
}

339
static int spu_request_irqs(struct spu *spu)
340
{
341
	int ret = 0;
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
	if (spu->irqs[0] != NO_IRQ) {
		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
			 spu->number);
		ret = request_irq(spu->irqs[0], spu_irq_class_0,
				  IRQF_DISABLED,
				  spu->irq_c0, spu);
		if (ret)
			goto bail0;
	}
	if (spu->irqs[1] != NO_IRQ) {
		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
			 spu->number);
		ret = request_irq(spu->irqs[1], spu_irq_class_1,
				  IRQF_DISABLED,
				  spu->irq_c1, spu);
		if (ret)
			goto bail1;
	}
	if (spu->irqs[2] != NO_IRQ) {
		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
			 spu->number);
		ret = request_irq(spu->irqs[2], spu_irq_class_2,
				  IRQF_DISABLED,
				  spu->irq_c2, spu);
		if (ret)
			goto bail2;
	}
	return 0;
371

372 373 374 375 376 377 378
bail2:
	if (spu->irqs[1] != NO_IRQ)
		free_irq(spu->irqs[1], spu);
bail1:
	if (spu->irqs[0] != NO_IRQ)
		free_irq(spu->irqs[0], spu);
bail0:
379 380 381
	return ret;
}

382
static void spu_free_irqs(struct spu *spu)
383
{
384 385 386 387 388 389
	if (spu->irqs[0] != NO_IRQ)
		free_irq(spu->irqs[0], spu);
	if (spu->irqs[1] != NO_IRQ)
		free_irq(spu->irqs[1], spu);
	if (spu->irqs[2] != NO_IRQ)
		free_irq(spu->irqs[2], spu);
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
}

static void spu_init_channels(struct spu *spu)
{
	static const struct {
		 unsigned channel;
		 unsigned count;
	} zero_list[] = {
		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
	}, count_list[] = {
		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
	};
405
	struct spu_priv2 __iomem *priv2;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
	int i;

	priv2 = spu->priv2;

	/* initialize all channel data to zero */
	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
		int count;

		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
		for (count = 0; count < zero_list[i].count; count++)
			out_be64(&priv2->spu_chnldata_RW, 0);
	}

	/* initialize channel counts to meaningful values */
	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
	}
}

426
struct spu *spu_alloc_node(int node)
427
{
428
	struct spu *spu = NULL;
429

430
	mutex_lock(&spu_mutex);
431 432
	if (!list_empty(&spu_list[node])) {
		spu = list_entry(spu_list[node].next, struct spu, list);
433
		list_del_init(&spu->list);
434
		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
435
	}
436
	mutex_unlock(&spu_mutex);
437

438 439
	if (spu)
		spu_init_channels(spu);
440 441 442 443 444 445 446 447 448 449 450 451 452 453
	return spu;
}
EXPORT_SYMBOL_GPL(spu_alloc_node);

struct spu *spu_alloc(void)
{
	struct spu *spu = NULL;
	int node;

	for (node = 0; node < MAX_NUMNODES; node++) {
		spu = spu_alloc_node(node);
		if (spu)
			break;
	}
454 455 456 457 458 459

	return spu;
}

void spu_free(struct spu *spu)
{
460
	mutex_lock(&spu_mutex);
461
	list_add_tail(&spu->list, &spu_list[spu->node]);
462
	mutex_unlock(&spu_mutex);
463
}
464
EXPORT_SYMBOL_GPL(spu_free);
465

466 467 468 469
struct sysdev_class spu_sysdev_class = {
	set_kset_name("spu")
};

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
int spu_add_sysdev_attr(struct sysdev_attribute *attr)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysdev_create_file(&spu->sysdev, attr);

	mutex_unlock(&spu_mutex);
	return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);

int spu_add_sysdev_attr_group(struct attribute_group *attrs)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysfs_create_group(&spu->sysdev.kobj, attrs);

	mutex_unlock(&spu_mutex);
	return 0;
}
EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);


void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysdev_remove_file(&spu->sysdev, attr);

	mutex_unlock(&spu_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);

void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
{
	struct spu *spu;
	mutex_lock(&spu_mutex);

	list_for_each_entry(spu, &spu_full_list, full_list)
		sysfs_remove_group(&spu->sysdev.kobj, attrs);

	mutex_unlock(&spu_mutex);
}
EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);

521 522 523 524 525 526 527 528 529 530 531 532 533
static int spu_create_sysdev(struct spu *spu)
{
	int ret;

	spu->sysdev.id = spu->number;
	spu->sysdev.cls = &spu_sysdev_class;
	ret = sysdev_register(&spu->sysdev);
	if (ret) {
		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
				spu->number);
		return ret;
	}

534
	sysfs_add_device_to_node(&spu->sysdev, spu->node);
535 536 537 538

	return 0;
}

539
static int __init create_spu(void *data)
540 541 542 543
{
	struct spu *spu;
	int ret;
	static int number;
544
	unsigned long flags;
545 546

	ret = -ENOMEM;
547
	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
548 549 550
	if (!spu)
		goto out;

551 552 553 554 555 556
	spin_lock_init(&spu->register_lock);
	mutex_lock(&spu_mutex);
	spu->number = number++;
	mutex_unlock(&spu_mutex);

	ret = spu_create_spu(spu, data);
557

558 559 560
	if (ret)
		goto out_free;

561
	spu_mfc_sdr_setup(spu);
562
	spu_mfc_sr1_set(spu, 0x33);
563 564
	ret = spu_request_irqs(spu);
	if (ret)
565
		goto out_destroy;
566

567 568 569 570
	ret = spu_create_sysdev(spu);
	if (ret)
		goto out_free_irqs;

571
	mutex_lock(&spu_mutex);
572
	spin_lock_irqsave(&spu_list_lock, flags);
573
	list_add(&spu->list, &spu_list[spu->node]);
574
	list_add(&spu->full_list, &spu_full_list);
575
	spin_unlock_irqrestore(&spu_list_lock, flags);
576
	mutex_unlock(&spu_mutex);
577 578 579

	goto out;

580 581
out_free_irqs:
	spu_free_irqs(spu);
582 583
out_destroy:
	spu_destroy_spu(spu);
584 585 586 587 588 589 590 591
out_free:
	kfree(spu);
out:
	return ret;
}

static int __init init_spu_base(void)
{
592
	int i, ret = 0;
593

594 595 596
	for (i = 0; i < MAX_NUMNODES; i++)
		INIT_LIST_HEAD(&spu_list[i]);

597
	if (!spu_management_ops)
598
		goto out;
599

600 601 602
	/* create sysdev class for spus */
	ret = sysdev_class_register(&spu_sysdev_class);
	if (ret)
603
		goto out;
604

605 606 607 608 609
	ret = spu_enumerate_spus(create_spu);

	if (ret) {
		printk(KERN_WARNING "%s: Error initializing spus\n",
			__FUNCTION__);
610
		goto out_unregister_sysdev_class;
611
	}
612 613 614

	xmon_register_spus(&spu_full_list);

615 616 617 618 619 620
	return 0;

 out_unregister_sysdev_class:
	sysdev_class_unregister(&spu_sysdev_class);
 out:

621 622 623 624 625 626
	return ret;
}
module_init(init_spu_base);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");