cppi41.c 29.4 KB
Newer Older
1
#include <linux/delay.h>
2 3 4 5 6 7 8 9 10 11 12
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/of_dma.h>
#include <linux/of_irq.h>
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/of_address.h>
13
#include <linux/pm_runtime.h>
14
#include "../dmaengine.h"
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

#define DESC_TYPE	27
#define DESC_TYPE_HOST	0x10
#define DESC_TYPE_TEARD	0x13

#define TD_DESC_IS_RX	(1 << 16)
#define TD_DESC_DMA_NUM	10

#define DESC_LENGTH_BITS_NUM	21

#define DESC_TYPE_USB	(5 << 26)
#define DESC_PD_COMPLETE	(1 << 31)

/* DMA engine */
#define DMA_TDFDQ	4
#define DMA_TXGCR(x)	(0x800 + (x) * 0x20)
#define DMA_RXGCR(x)	(0x808 + (x) * 0x20)
#define RXHPCRA0		4

#define GCR_CHAN_ENABLE		(1 << 31)
#define GCR_TEARDOWN		(1 << 30)
#define GCR_STARV_RETRY		(1 << 24)
#define GCR_DESC_TYPE_HOST	(1 << 14)

/* DMA scheduler */
#define DMA_SCHED_CTRL		0
#define DMA_SCHED_CTRL_EN	(1 << 31)
#define DMA_SCHED_WORD(x)	((x) * 4 + 0x800)

#define SCHED_ENTRY0_CHAN(x)	((x) << 0)
#define SCHED_ENTRY0_IS_RX	(1 << 7)

#define SCHED_ENTRY1_CHAN(x)	((x) << 8)
#define SCHED_ENTRY1_IS_RX	(1 << 15)

#define SCHED_ENTRY2_CHAN(x)	((x) << 16)
#define SCHED_ENTRY2_IS_RX	(1 << 23)

#define SCHED_ENTRY3_CHAN(x)	((x) << 24)
#define SCHED_ENTRY3_IS_RX	(1 << 31)

/* Queue manager */
/* 4 KiB of memory for descriptors, 2 for each endpoint */
#define ALLOC_DECS_NUM		128
#define DESCS_AREAS		1
#define TOTAL_DESCS_NUM		(ALLOC_DECS_NUM * DESCS_AREAS)
#define QMGR_SCRATCH_SIZE	(TOTAL_DESCS_NUM * 4)

#define QMGR_LRAM0_BASE		0x80
#define QMGR_LRAM_SIZE		0x84
#define QMGR_LRAM1_BASE		0x88
#define QMGR_MEMBASE(x)		(0x1000 + (x) * 0x10)
#define QMGR_MEMCTRL(x)		(0x1004 + (x) * 0x10)
#define QMGR_MEMCTRL_IDX_SH	16
#define QMGR_MEMCTRL_DESC_SH	8

#define QMGR_PEND(x)	(0x90 + (x) * 4)

#define QMGR_PENDING_SLOT_Q(x)	(x / 32)
#define QMGR_PENDING_BIT_Q(x)	(x % 32)

#define QMGR_QUEUE_A(n)	(0x2000 + (n) * 0x10)
#define QMGR_QUEUE_B(n)	(0x2004 + (n) * 0x10)
#define QMGR_QUEUE_C(n)	(0x2008 + (n) * 0x10)
#define QMGR_QUEUE_D(n)	(0x200c + (n) * 0x10)

81 82 83
/* Packet Descriptor */
#define PD2_ZERO_LENGTH		(1 << 19)

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
struct cppi41_channel {
	struct dma_chan chan;
	struct dma_async_tx_descriptor txd;
	struct cppi41_dd *cdd;
	struct cppi41_desc *desc;
	dma_addr_t desc_phys;
	void __iomem *gcr_reg;
	int is_tx;
	u32 residue;

	unsigned int q_num;
	unsigned int q_comp_num;
	unsigned int port_num;

	unsigned td_retry;
	unsigned td_queued:1;
	unsigned td_seen:1;
	unsigned td_desc_seen:1;
102 103

	struct list_head node;		/* Node for pending list */
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
};

struct cppi41_desc {
	u32 pd0;
	u32 pd1;
	u32 pd2;
	u32 pd3;
	u32 pd4;
	u32 pd5;
	u32 pd6;
	u32 pd7;
} __aligned(32);

struct chan_queues {
	u16 submit;
	u16 complete;
};

struct cppi41_dd {
	struct dma_device ddev;

	void *qmgr_scratch;
	dma_addr_t scratch_phys;

	struct cppi41_desc *cd;
	dma_addr_t descs_phys;
	u32 first_td_desc;
	struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];

	void __iomem *ctrl_mem;
	void __iomem *sched_mem;
	void __iomem *qmgr_mem;
	unsigned int irq;
	const struct chan_queues *queues_rx;
	const struct chan_queues *queues_tx;
	struct chan_queues td_queue;
140 141
	u16 first_completion_queue;
	u16 qmgr_num_pend;
142 143
	u32 n_chans;
	u8 platform;
D
Daniel Mack 已提交
144

145 146 147
	struct list_head pending;	/* Pending queued transfers */
	spinlock_t lock;		/* Lock for pending list */

D
Daniel Mack 已提交
148 149
	/* context for suspend/resume */
	unsigned int dma_tdfdq;
150 151

	bool is_suspended;
152 153
};

154
static struct chan_queues am335x_usb_queues_tx[] = {
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
	/* USB0 ENDP 1 */
	[ 0] = { .submit = 32, .complete =  93},
	[ 1] = { .submit = 34, .complete =  94},
	[ 2] = { .submit = 36, .complete =  95},
	[ 3] = { .submit = 38, .complete =  96},
	[ 4] = { .submit = 40, .complete =  97},
	[ 5] = { .submit = 42, .complete =  98},
	[ 6] = { .submit = 44, .complete =  99},
	[ 7] = { .submit = 46, .complete = 100},
	[ 8] = { .submit = 48, .complete = 101},
	[ 9] = { .submit = 50, .complete = 102},
	[10] = { .submit = 52, .complete = 103},
	[11] = { .submit = 54, .complete = 104},
	[12] = { .submit = 56, .complete = 105},
	[13] = { .submit = 58, .complete = 106},
	[14] = { .submit = 60, .complete = 107},

	/* USB1 ENDP1 */
	[15] = { .submit = 62, .complete = 125},
	[16] = { .submit = 64, .complete = 126},
	[17] = { .submit = 66, .complete = 127},
	[18] = { .submit = 68, .complete = 128},
	[19] = { .submit = 70, .complete = 129},
	[20] = { .submit = 72, .complete = 130},
	[21] = { .submit = 74, .complete = 131},
	[22] = { .submit = 76, .complete = 132},
	[23] = { .submit = 78, .complete = 133},
	[24] = { .submit = 80, .complete = 134},
	[25] = { .submit = 82, .complete = 135},
	[26] = { .submit = 84, .complete = 136},
	[27] = { .submit = 86, .complete = 137},
	[28] = { .submit = 88, .complete = 138},
	[29] = { .submit = 90, .complete = 139},
};

190
static const struct chan_queues am335x_usb_queues_rx[] = {
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	/* USB0 ENDP 1 */
	[ 0] = { .submit =  1, .complete = 109},
	[ 1] = { .submit =  2, .complete = 110},
	[ 2] = { .submit =  3, .complete = 111},
	[ 3] = { .submit =  4, .complete = 112},
	[ 4] = { .submit =  5, .complete = 113},
	[ 5] = { .submit =  6, .complete = 114},
	[ 6] = { .submit =  7, .complete = 115},
	[ 7] = { .submit =  8, .complete = 116},
	[ 8] = { .submit =  9, .complete = 117},
	[ 9] = { .submit = 10, .complete = 118},
	[10] = { .submit = 11, .complete = 119},
	[11] = { .submit = 12, .complete = 120},
	[12] = { .submit = 13, .complete = 121},
	[13] = { .submit = 14, .complete = 122},
	[14] = { .submit = 15, .complete = 123},

	/* USB1 ENDP 1 */
	[15] = { .submit = 16, .complete = 141},
	[16] = { .submit = 17, .complete = 142},
	[17] = { .submit = 18, .complete = 143},
	[18] = { .submit = 19, .complete = 144},
	[19] = { .submit = 20, .complete = 145},
	[20] = { .submit = 21, .complete = 146},
	[21] = { .submit = 22, .complete = 147},
	[22] = { .submit = 23, .complete = 148},
	[23] = { .submit = 24, .complete = 149},
	[24] = { .submit = 25, .complete = 150},
	[25] = { .submit = 26, .complete = 151},
	[26] = { .submit = 27, .complete = 152},
	[27] = { .submit = 28, .complete = 153},
	[28] = { .submit = 29, .complete = 154},
	[29] = { .submit = 30, .complete = 155},
};

226 227 228 229 230 231 232 233 234 235 236 237 238 239
static const struct chan_queues da8xx_usb_queues_tx[] = {
	[0] = { .submit =  16, .complete = 24},
	[1] = { .submit =  18, .complete = 24},
	[2] = { .submit =  20, .complete = 24},
	[3] = { .submit =  22, .complete = 24},
};

static const struct chan_queues da8xx_usb_queues_rx[] = {
	[0] = { .submit =  1, .complete = 26},
	[1] = { .submit =  3, .complete = 26},
	[2] = { .submit =  5, .complete = 26},
	[3] = { .submit =  7, .complete = 26},
};

240 241 242 243
struct cppi_glue_infos {
	const struct chan_queues *queues_rx;
	const struct chan_queues *queues_tx;
	struct chan_queues td_queue;
244 245
	u16 first_completion_queue;
	u16 qmgr_num_pend;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
};

static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
{
	return container_of(c, struct cppi41_channel, chan);
}

static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc)
{
	struct cppi41_channel *c;
	u32 descs_size;
	u32 desc_num;

	descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM;

	if (!((desc >= cdd->descs_phys) &&
			(desc < (cdd->descs_phys + descs_size)))) {
		return NULL;
	}

	desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc);
267
	BUG_ON(desc_num >= ALLOC_DECS_NUM);
268 269
	c = cdd->chan_busy[desc_num];
	cdd->chan_busy[desc_num] = NULL;
270 271 272 273

	/* Usecount for chan_busy[], paired with push_desc_queue() */
	pm_runtime_put(cdd->ddev.dev);

274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	return c;
}

static void cppi_writel(u32 val, void *__iomem *mem)
{
	__raw_writel(val, mem);
}

static u32 cppi_readl(void *__iomem *mem)
{
	return __raw_readl(mem);
}

static u32 pd_trans_len(u32 val)
{
	return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
}

292 293 294 295 296 297 298 299 300
static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
{
	u32 desc;

	desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
	desc &= ~0x1f;
	return desc;
}

301 302 303
static irqreturn_t cppi41_irq(int irq, void *data)
{
	struct cppi41_dd *cdd = data;
304 305
	u16 first_completion_queue = cdd->first_completion_queue;
	u16 qmgr_num_pend = cdd->qmgr_num_pend;
306 307 308
	struct cppi41_channel *c;
	int i;

309
	for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
310 311 312 313 314
			i++) {
		u32 val;
		u32 q_num;

		val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
315
		if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
316 317
			u32 mask;
			/* set corresponding bit for completetion Q 93 */
318
			mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
319 320 321 322 323 324 325 326 327 328
			/* not set all bits for queues less than Q 93 */
			mask--;
			/* now invert and keep only Q 93+ set */
			val &= ~mask;
		}

		if (val)
			__iormb();

		while (val) {
329
			u32 desc, len;
330

331 332 333 334 335
			/*
			 * This should never trigger, see the comments in
			 * push_desc_queue()
			 */
			WARN_ON(cdd->is_suspended);
336

337 338 339
			q_num = __fls(val);
			val &= ~(1 << q_num);
			q_num += 32 * i;
340
			desc = cppi41_pop_desc(cdd, q_num);
341 342 343 344 345 346 347
			c = desc_to_chan(cdd, desc);
			if (WARN_ON(!c)) {
				pr_err("%s() q %d desc %08x\n", __func__,
						q_num, desc);
				continue;
			}

348 349 350 351 352 353
			if (c->desc->pd2 & PD2_ZERO_LENGTH)
				len = 0;
			else
				len = pd_trans_len(c->desc->pd0);

			c->residue = pd_trans_len(c->desc->pd6) - len;
354
			dma_cookie_complete(&c->txd);
355
			dmaengine_desc_get_callback_invoke(&c->txd, NULL);
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
		}
	}
	return IRQ_HANDLED;
}

static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx)
{
	dma_cookie_t cookie;

	cookie = dma_cookie_assign(tx);

	return cookie;
}

static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan)
{
	struct cppi41_channel *c = to_cpp41_chan(chan);
373 374 375 376
	struct cppi41_dd *cdd = c->cdd;
	int error;

	error = pm_runtime_get_sync(cdd->ddev.dev);
377
	if (error < 0) {
378 379
		dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
			__func__, error);
380 381
		pm_runtime_put_noidle(cdd->ddev.dev);

382
		return error;
383
	}
384 385 386 387 388 389 390 391

	dma_cookie_init(chan);
	dma_async_tx_descriptor_init(&c->txd, chan);
	c->txd.tx_submit = cppi41_tx_submit;

	if (!c->is_tx)
		cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);

392 393 394
	pm_runtime_mark_last_busy(cdd->ddev.dev);
	pm_runtime_put_autosuspend(cdd->ddev.dev);

395 396 397 398 399
	return 0;
}

static void cppi41_dma_free_chan_resources(struct dma_chan *chan)
{
400 401 402 403 404
	struct cppi41_channel *c = to_cpp41_chan(chan);
	struct cppi41_dd *cdd = c->cdd;
	int error;

	error = pm_runtime_get_sync(cdd->ddev.dev);
405 406 407
	if (error < 0) {
		pm_runtime_put_noidle(cdd->ddev.dev);

408
		return;
409
	}
410 411 412 413 414

	WARN_ON(!list_empty(&cdd->pending));

	pm_runtime_mark_last_busy(cdd->ddev.dev);
	pm_runtime_put_autosuspend(cdd->ddev.dev);
415 416 417 418 419 420 421 422 423
}

static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
	dma_cookie_t cookie, struct dma_tx_state *txstate)
{
	struct cppi41_channel *c = to_cpp41_chan(chan);
	enum dma_status ret;

	ret = dma_cookie_status(chan, cookie, txstate);
424 425

	dma_set_residue(txstate, c->residue);
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453

	return ret;
}

static void push_desc_queue(struct cppi41_channel *c)
{
	struct cppi41_dd *cdd = c->cdd;
	u32 desc_num;
	u32 desc_phys;
	u32 reg;

	c->residue = 0;

	reg = GCR_CHAN_ENABLE;
	if (!c->is_tx) {
		reg |= GCR_STARV_RETRY;
		reg |= GCR_DESC_TYPE_HOST;
		reg |= c->q_comp_num;
	}

	cppi_writel(reg, c->gcr_reg);

	/*
	 * We don't use writel() but __raw_writel() so we have to make sure
	 * that the DMA descriptor in coherent memory made to the main memory
	 * before starting the dma engine.
	 */
	__iowmb();
454

455 456 457 458 459 460 461 462 463
	/*
	 * DMA transfers can take at least 200ms to complete with USB mass
	 * storage connected. To prevent autosuspend timeouts, we must use
	 * pm_runtime_get/put() when chan_busy[] is modified. This will get
	 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the
	 * outcome of the transfer.
	 */
	pm_runtime_get(cdd->ddev.dev);

464 465 466 467 468 469 470 471 472 473
	desc_phys = lower_32_bits(c->desc_phys);
	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
	WARN_ON(cdd->chan_busy[desc_num]);
	cdd->chan_busy[desc_num] = c;

	reg = (sizeof(struct cppi41_desc) - 24) / 4;
	reg |= desc_phys;
	cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
}

474 475 476 477 478 479
/*
 * Caller must hold cdd->lock to prevent push_desc_queue()
 * getting called out of order. We have both cppi41_dma_issue_pending()
 * and cppi41_runtime_resume() call this function.
 */
static void cppi41_run_queue(struct cppi41_dd *cdd)
480
{
481
	struct cppi41_channel *c, *_c;
482

483 484 485 486
	list_for_each_entry_safe(c, _c, &cdd->pending, node) {
		push_desc_queue(c);
		list_del(&c->node);
	}
487 488
}

489 490 491
static void cppi41_dma_issue_pending(struct dma_chan *chan)
{
	struct cppi41_channel *c = to_cpp41_chan(chan);
492
	struct cppi41_dd *cdd = c->cdd;
493
	unsigned long flags;
494 495 496
	int error;

	error = pm_runtime_get(cdd->ddev.dev);
497
	if ((error != -EINPROGRESS) && error < 0) {
498
		pm_runtime_put_noidle(cdd->ddev.dev);
499 500
		dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n",
			error);
501

502 503 504
		return;
	}

505 506 507 508 509
	spin_lock_irqsave(&cdd->lock, flags);
	list_add_tail(&c->node, &cdd->pending);
	if (!cdd->is_suspended)
		cppi41_run_queue(cdd);
	spin_unlock_irqrestore(&cdd->lock, flags);
510 511 512

	pm_runtime_mark_last_busy(cdd->ddev.dev);
	pm_runtime_put_autosuspend(cdd->ddev.dev);
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
}

static u32 get_host_pd0(u32 length)
{
	u32 reg;

	reg = DESC_TYPE_HOST << DESC_TYPE;
	reg |= length;

	return reg;
}

static u32 get_host_pd1(struct cppi41_channel *c)
{
	u32 reg;

	reg = 0;

	return reg;
}

static u32 get_host_pd2(struct cppi41_channel *c)
{
	u32 reg;

	reg = DESC_TYPE_USB;
	reg |= c->q_comp_num;

	return reg;
}

static u32 get_host_pd3(u32 length)
{
	u32 reg;

	/* PD3 = packet size */
	reg = length;

	return reg;
}

static u32 get_host_pd6(u32 length)
{
	u32 reg;

	/* PD6 buffer size */
	reg = DESC_PD_COMPLETE;
	reg |= length;

	return reg;
}

static u32 get_host_pd4_or_7(u32 addr)
{
	u32 reg;

	reg = addr;

	return reg;
}

static u32 get_host_pd5(void)
{
	u32 reg;

	reg = 0;

	return reg;
}

static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
	struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len,
	enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
{
	struct cppi41_channel *c = to_cpp41_chan(chan);
	struct cppi41_desc *d;
	struct scatterlist *sg;
	unsigned int i;

	d = c->desc;
	for_each_sg(sgl, sg, sg_len, i) {
		u32 addr;
		u32 len;

		/* We need to use more than one desc once musb supports sg */
		addr = lower_32_bits(sg_dma_address(sg));
		len = sg_dma_len(sg);

		d->pd0 = get_host_pd0(len);
		d->pd1 = get_host_pd1(c);
		d->pd2 = get_host_pd2(c);
		d->pd3 = get_host_pd3(len);
		d->pd4 = get_host_pd4_or_7(addr);
		d->pd5 = get_host_pd5();
		d->pd6 = get_host_pd6(len);
		d->pd7 = get_host_pd4_or_7(addr);

		d++;
	}

	return &c->txd;
}

static void cppi41_compute_td_desc(struct cppi41_desc *d)
{
	d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
}

static int cppi41_tear_down_chan(struct cppi41_channel *c)
{
623
	struct dmaengine_result abort_result;
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
	struct cppi41_dd *cdd = c->cdd;
	struct cppi41_desc *td;
	u32 reg;
	u32 desc_phys;
	u32 td_desc_phys;

	td = cdd->cd;
	td += cdd->first_td_desc;

	td_desc_phys = cdd->descs_phys;
	td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc);

	if (!c->td_queued) {
		cppi41_compute_td_desc(td);
		__iowmb();

		reg = (sizeof(struct cppi41_desc) - 24) / 4;
		reg |= td_desc_phys;
		cppi_writel(reg, cdd->qmgr_mem +
				QMGR_QUEUE_D(cdd->td_queue.submit));

		reg = GCR_CHAN_ENABLE;
		if (!c->is_tx) {
			reg |= GCR_STARV_RETRY;
			reg |= GCR_DESC_TYPE_HOST;
649
			reg |= cdd->td_queue.complete;
650 651 652 653
		}
		reg |= GCR_TEARDOWN;
		cppi_writel(reg, c->gcr_reg);
		c->td_queued = 1;
654
		c->td_retry = 500;
655 656
	}

657
	if (!c->td_seen || !c->td_desc_seen) {
658

659
		desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
660
		if (!desc_phys && c->is_tx)
661
			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
662

663 664 665 666 667
		if (desc_phys == c->desc_phys) {
			c->td_desc_seen = 1;

		} else if (desc_phys == td_desc_phys) {
			u32 pd0;
668 669

			__iormb();
670 671 672 673 674 675 676
			pd0 = td->pd0;
			WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
			WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
			WARN_ON((pd0 & 0x1f) != c->port_num);
			c->td_seen = 1;
		} else if (desc_phys) {
			WARN_ON_ONCE(1);
677 678 679 680 681 682 683 684 685 686 687 688 689
		}
	}
	c->td_retry--;
	/*
	 * If the TX descriptor / channel is in use, the caller needs to poke
	 * his TD bit multiple times. After that he hardware releases the
	 * transfer descriptor followed by TD descriptor. Waiting seems not to
	 * cause any difference.
	 * RX seems to be thrown out right away. However once the TearDown
	 * descriptor gets through we are done. If we have seens the transfer
	 * descriptor before the TD we fetch it from enqueue, it has to be
	 * there waiting for us.
	 */
690 691
	if (!c->td_seen && c->td_retry) {
		udelay(1);
692
		return -EAGAIN;
693
	}
694
	WARN_ON(!c->td_retry);
695

696
	if (!c->td_desc_seen) {
697
		desc_phys = cppi41_pop_desc(cdd, c->q_num);
698 699
		if (!desc_phys)
			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
700 701 702 703 704 705 706
		WARN_ON(!desc_phys);
	}

	c->td_queued = 0;
	c->td_seen = 0;
	c->td_desc_seen = 0;
	cppi_writel(0, c->gcr_reg);
707 708 709 710 711 712

	/* Invoke the callback to do the necessary clean-up */
	abort_result.result = DMA_TRANS_ABORTED;
	dma_cookie_complete(&c->txd);
	dmaengine_desc_get_callback_invoke(&c->txd, &abort_result);

713 714 715 716 717 718 719 720 721 722 723
	return 0;
}

static int cppi41_stop_chan(struct dma_chan *chan)
{
	struct cppi41_channel *c = to_cpp41_chan(chan);
	struct cppi41_dd *cdd = c->cdd;
	u32 desc_num;
	u32 desc_phys;
	int ret;

724 725
	desc_phys = lower_32_bits(c->desc_phys);
	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
726 727 728 729 730 731 732 733 734 735 736 737 738 739
	if (!cdd->chan_busy[desc_num]) {
		struct cppi41_channel *cc, *_ct;

		/*
		 * channels might still be in the pendling list if
		 * cppi41_dma_issue_pending() is called after
		 * cppi41_runtime_suspend() is called
		 */
		list_for_each_entry_safe(cc, _ct, &cdd->pending, node) {
			if (cc != c)
				continue;
			list_del(&cc->node);
			break;
		}
740
		return 0;
741
	}
742

743 744 745 746 747 748 749
	ret = cppi41_tear_down_chan(c);
	if (ret)
		return ret;

	WARN_ON(!cdd->chan_busy[desc_num]);
	cdd->chan_busy[desc_num] = NULL;

750 751 752
	/* Usecount for chan_busy[], paired with push_desc_queue() */
	pm_runtime_put(cdd->ddev.dev);

753 754 755
	return 0;
}

756
static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
757
{
758
	struct cppi41_channel *cchan, *chans;
759
	int i;
760
	u32 n_chans = cdd->n_chans;
761 762 763 764 765 766 767

	/*
	 * The channels can only be used as TX or as RX. So we add twice
	 * that much dma channels because USB can only do RX or TX.
	 */
	n_chans *= 2;

768 769 770 771
	chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
	if (!chans)
		return -ENOMEM;

772
	for (i = 0; i < n_chans; i++) {
773
		cchan = &chans[i];
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794

		cchan->cdd = cdd;
		if (i & 1) {
			cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1);
			cchan->is_tx = 1;
		} else {
			cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1);
			cchan->is_tx = 0;
		}
		cchan->port_num = i >> 1;
		cchan->desc = &cdd->cd[i];
		cchan->desc_phys = cdd->descs_phys;
		cchan->desc_phys += i * sizeof(struct cppi41_desc);
		cchan->chan.device = &cdd->ddev;
		list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels);
	}
	cdd->first_td_desc = n_chans;

	return 0;
}

795
static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
796 797 798 799 800 801 802 803 804 805 806
{
	unsigned int mem_decs;
	int i;

	mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc);

	for (i = 0; i < DESCS_AREAS; i++) {

		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));

807
		dma_free_coherent(dev, mem_decs, cdd->cd,
808 809 810 811 812 813 814 815 816
				cdd->descs_phys);
	}
}

static void disable_sched(struct cppi41_dd *cdd)
{
	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
}

817
static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
818 819 820
{
	disable_sched(cdd);

821
	purge_descs(dev, cdd);
822 823 824

	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
825
	dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
826 827 828
			cdd->scratch_phys);
}

829
static int init_descs(struct device *dev, struct cppi41_dd *cdd)
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
{
	unsigned int desc_size;
	unsigned int mem_decs;
	int i;
	u32 reg;
	u32 idx;

	BUILD_BUG_ON(sizeof(struct cppi41_desc) &
			(sizeof(struct cppi41_desc) - 1));
	BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32);
	BUILD_BUG_ON(ALLOC_DECS_NUM < 32);

	desc_size = sizeof(struct cppi41_desc);
	mem_decs = ALLOC_DECS_NUM * desc_size;

	idx = 0;
	for (i = 0; i < DESCS_AREAS; i++) {

		reg = idx << QMGR_MEMCTRL_IDX_SH;
		reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH;
		reg |= ilog2(ALLOC_DECS_NUM) - 5;

		BUILD_BUG_ON(DESCS_AREAS != 1);
853
		cdd->cd = dma_alloc_coherent(dev, mem_decs,
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
				&cdd->descs_phys, GFP_KERNEL);
		if (!cdd->cd)
			return -ENOMEM;

		cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
		cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i));

		idx += ALLOC_DECS_NUM;
	}
	return 0;
}

static void init_sched(struct cppi41_dd *cdd)
{
	unsigned ch;
	unsigned word;
	u32 reg;

	word = 0;
	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
874
	for (ch = 0; ch < cdd->n_chans; ch += 2) {
875 876 877 878 879 880 881 882 883

		reg = SCHED_ENTRY0_CHAN(ch);
		reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;

		reg |= SCHED_ENTRY2_CHAN(ch + 1);
		reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX;
		cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
		word++;
	}
884
	reg = cdd->n_chans * 2 - 1;
885 886 887 888
	reg |= DMA_SCHED_CTRL_EN;
	cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
}

889
static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
890 891 892 893
{
	int ret;

	BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
894
	cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
895 896 897 898 899
			&cdd->scratch_phys, GFP_KERNEL);
	if (!cdd->qmgr_scratch)
		return -ENOMEM;

	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
900
	cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
901 902
	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);

903
	ret = init_descs(dev, cdd);
904 905 906 907 908
	if (ret)
		goto err_td;

	cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
	init_sched(cdd);
909

910 911
	return 0;
err_td:
912
	deinit_cppi41(dev, cdd);
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
	return ret;
}

static struct platform_driver cpp41_dma_driver;
/*
 * The param format is:
 * X Y
 * X: Port
 * Y: 0 = RX else TX
 */
#define INFO_PORT	0
#define INFO_IS_TX	1

static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
{
	struct cppi41_channel *cchan;
	struct cppi41_dd *cdd;
	const struct chan_queues *queues;
	u32 *num = param;

	if (chan->device->dev->driver != &cpp41_dma_driver.driver)
		return false;

	cchan = to_cpp41_chan(chan);

	if (cchan->port_num != num[INFO_PORT])
		return false;

	if (cchan->is_tx && !num[INFO_IS_TX])
		return false;
	cdd = cchan->cdd;
	if (cchan->is_tx)
		queues = cdd->queues_tx;
	else
		queues = cdd->queues_rx;

949 950
	BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
		     ARRAY_SIZE(am335x_usb_queues_tx));
951
	if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx)))
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
		return false;

	cchan->q_num = queues[cchan->port_num].submit;
	cchan->q_comp_num = queues[cchan->port_num].complete;
	return true;
}

static struct of_dma_filter_info cpp41_dma_info = {
	.filter_fn = cpp41_dma_filter_fn,
};

static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
		struct of_dma *ofdma)
{
	int count = dma_spec->args_count;
	struct of_dma_filter_info *info = ofdma->of_dma_data;

	if (!info || !info->filter_fn)
		return NULL;

	if (count != 2)
		return NULL;

	return dma_request_channel(info->dma_cap, info->filter_fn,
			&dma_spec->args[0]);
}

979 980 981
static const struct cppi_glue_infos am335x_usb_infos = {
	.queues_rx = am335x_usb_queues_rx,
	.queues_tx = am335x_usb_queues_tx,
982
	.td_queue = { .submit = 31, .complete = 0 },
983 984
	.first_completion_queue = 93,
	.qmgr_num_pend = 5,
985 986
};

987 988 989 990 991 992 993 994
static const struct cppi_glue_infos da8xx_usb_infos = {
	.queues_rx = da8xx_usb_queues_rx,
	.queues_tx = da8xx_usb_queues_tx,
	.td_queue = { .submit = 31, .complete = 0 },
	.first_completion_queue = 24,
	.qmgr_num_pend = 2,
};

995
static const struct of_device_id cppi41_dma_ids[] = {
996
	{ .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
997
	{ .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
998 999 1000 1001
	{},
};
MODULE_DEVICE_TABLE(of, cppi41_dma_ids);

1002
static const struct cppi_glue_infos *get_glue_info(struct device *dev)
1003 1004 1005
{
	const struct of_device_id *of_id;

1006
	of_id = of_match_node(cppi41_dma_ids, dev->of_node);
1007 1008 1009 1010 1011
	if (!of_id)
		return NULL;
	return of_id->data;
}

1012 1013 1014 1015 1016
#define CPPI41_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
				BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))

1017 1018 1019
static int cppi41_dma_probe(struct platform_device *pdev)
{
	struct cppi41_dd *cdd;
1020
	struct device *dev = &pdev->dev;
1021
	const struct cppi_glue_infos *glue_info;
1022
	struct resource *mem;
1023
	int index;
1024 1025 1026
	int irq;
	int ret;

1027
	glue_info = get_glue_info(dev);
1028 1029 1030
	if (!glue_info)
		return -EINVAL;

1031
	cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL);
1032 1033 1034 1035 1036 1037 1038 1039 1040
	if (!cdd)
		return -ENOMEM;

	dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask);
	cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources;
	cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources;
	cdd->ddev.device_tx_status = cppi41_dma_tx_status;
	cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
	cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
1041
	cdd->ddev.device_terminate_all = cppi41_stop_chan;
1042 1043 1044 1045
	cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
	cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
	cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
	cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
1046
	cdd->ddev.dev = dev;
1047 1048 1049
	INIT_LIST_HEAD(&cdd->ddev.channels);
	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;

1050 1051 1052 1053 1054
	index = of_property_match_string(dev->of_node,
					 "reg-names", "controller");
	if (index < 0)
		return index;

1055
	mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
1056 1057 1058 1059
	cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
	if (IS_ERR(cdd->ctrl_mem))
		return PTR_ERR(cdd->ctrl_mem);

1060
	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
1061 1062 1063 1064
	cdd->sched_mem = devm_ioremap_resource(dev, mem);
	if (IS_ERR(cdd->sched_mem))
		return PTR_ERR(cdd->sched_mem);

1065
	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
1066 1067 1068
	cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
	if (IS_ERR(cdd->qmgr_mem))
		return PTR_ERR(cdd->qmgr_mem);
1069

1070 1071 1072 1073
	spin_lock_init(&cdd->lock);
	INIT_LIST_HEAD(&cdd->pending);

	platform_set_drvdata(pdev, cdd);
1074

1075
	pm_runtime_enable(dev);
1076 1077
	pm_runtime_set_autosuspend_delay(dev, 100);
	pm_runtime_use_autosuspend(dev);
1078
	ret = pm_runtime_get_sync(dev);
1079
	if (ret < 0)
1080 1081
		goto err_get_sync;

1082 1083 1084
	cdd->queues_rx = glue_info->queues_rx;
	cdd->queues_tx = glue_info->queues_tx;
	cdd->td_queue = glue_info->td_queue;
1085 1086
	cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
	cdd->first_completion_queue = glue_info->first_completion_queue;
1087

1088 1089 1090 1091 1092
	ret = of_property_read_u32(dev->of_node,
				   "#dma-channels", &cdd->n_chans);
	if (ret)
		goto err_get_n_chans;

1093
	ret = init_cppi41(dev, cdd);
1094 1095 1096
	if (ret)
		goto err_init_cppi;

1097
	ret = cppi41_add_chans(dev, cdd);
1098 1099 1100
	if (ret)
		goto err_chans;

1101
	irq = irq_of_parse_and_map(dev->of_node, 0);
1102 1103
	if (!irq) {
		ret = -EINVAL;
1104
		goto err_chans;
1105
	}
1106

1107
	ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
1108
			dev_name(dev), cdd);
1109
	if (ret)
1110
		goto err_chans;
1111 1112 1113 1114
	cdd->irq = irq;

	ret = dma_async_device_register(&cdd->ddev);
	if (ret)
1115
		goto err_chans;
1116

1117
	ret = of_dma_controller_register(dev->of_node,
1118 1119 1120 1121
			cppi41_dma_xlate, &cpp41_dma_info);
	if (ret)
		goto err_of;

1122 1123 1124
	pm_runtime_mark_last_busy(dev);
	pm_runtime_put_autosuspend(dev);

1125 1126 1127 1128
	return 0;
err_of:
	dma_async_device_unregister(&cdd->ddev);
err_chans:
1129
	deinit_cppi41(dev, cdd);
1130
err_init_cppi:
1131
	pm_runtime_dont_use_autosuspend(dev);
1132
err_get_n_chans:
1133
err_get_sync:
1134
	pm_runtime_put_sync(dev);
1135
	pm_runtime_disable(dev);
1136 1137 1138 1139 1140 1141
	return ret;
}

static int cppi41_dma_remove(struct platform_device *pdev)
{
	struct cppi41_dd *cdd = platform_get_drvdata(pdev);
1142
	int error;
1143

1144 1145 1146 1147
	error = pm_runtime_get_sync(&pdev->dev);
	if (error < 0)
		dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n",
			__func__, error);
1148 1149 1150
	of_dma_controller_free(pdev->dev.of_node);
	dma_async_device_unregister(&cdd->ddev);

1151
	devm_free_irq(&pdev->dev, cdd->irq, cdd);
1152
	deinit_cppi41(&pdev->dev, cdd);
1153 1154
	pm_runtime_dont_use_autosuspend(&pdev->dev);
	pm_runtime_put_sync(&pdev->dev);
1155
	pm_runtime_disable(&pdev->dev);
1156 1157 1158
	return 0;
}

1159
static int __maybe_unused cppi41_suspend(struct device *dev)
1160 1161 1162
{
	struct cppi41_dd *cdd = dev_get_drvdata(dev);

D
Daniel Mack 已提交
1163
	cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
1164 1165 1166 1167 1168
	disable_sched(cdd);

	return 0;
}

1169
static int __maybe_unused cppi41_resume(struct device *dev)
1170 1171
{
	struct cppi41_dd *cdd = dev_get_drvdata(dev);
D
Daniel Mack 已提交
1172
	struct cppi41_channel *c;
1173 1174 1175 1176 1177
	int i;

	for (i = 0; i < DESCS_AREAS; i++)
		cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));

D
Daniel Mack 已提交
1178 1179 1180 1181
	list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
		if (!c->is_tx)
			cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);

1182
	init_sched(cdd);
D
Daniel Mack 已提交
1183 1184 1185 1186 1187 1188

	cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);

1189 1190
	return 0;
}
1191

1192
static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
1193 1194
{
	struct cppi41_dd *cdd = dev_get_drvdata(dev);
1195
	unsigned long flags;
1196

1197 1198
	spin_lock_irqsave(&cdd->lock, flags);
	cdd->is_suspended = true;
1199
	WARN_ON(!list_empty(&cdd->pending));
1200
	spin_unlock_irqrestore(&cdd->lock, flags);
1201 1202 1203 1204

	return 0;
}

1205
static int __maybe_unused cppi41_runtime_resume(struct device *dev)
1206 1207 1208 1209 1210
{
	struct cppi41_dd *cdd = dev_get_drvdata(dev);
	unsigned long flags;

	spin_lock_irqsave(&cdd->lock, flags);
1211 1212
	cdd->is_suspended = false;
	cppi41_run_queue(cdd);
1213 1214 1215 1216
	spin_unlock_irqrestore(&cdd->lock, flags);

	return 0;
}
1217

1218 1219 1220 1221 1222 1223
static const struct dev_pm_ops cppi41_pm_ops = {
	SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume)
	SET_RUNTIME_PM_OPS(cppi41_runtime_suspend,
			   cppi41_runtime_resume,
			   NULL)
};
1224

1225 1226 1227 1228 1229
static struct platform_driver cpp41_dma_driver = {
	.probe  = cppi41_dma_probe,
	.remove = cppi41_dma_remove,
	.driver = {
		.name = "cppi41-dma-engine",
1230
		.pm = &cppi41_pm_ops,
1231 1232 1233 1234 1235 1236 1237
		.of_match_table = of_match_ptr(cppi41_dma_ids),
	},
};

module_platform_driver(cpp41_dma_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");