pcie-sh7786.c 14.8 KB
Newer Older
1 2 3
/*
 * Low-Level PCI Express Support for the SH7786
 *
4
 *  Copyright (C) 2009 - 2011  Paul Mundt
5 6 7 8 9
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 */
10 11
#define pr_fmt(fmt) "PCI: " fmt

12 13 14 15
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/io.h>
16
#include <linux/async.h>
17
#include <linux/delay.h>
18
#include <linux/slab.h>
19 20
#include <linux/clk.h>
#include <linux/sh_clk.h>
P
Paul Mundt 已提交
21
#include <linux/sh_intc.h>
22
#include <cpu/sh7786.h>
23 24 25 26 27
#include "pcie-sh7786.h"
#include <asm/sizes.h>

struct sh7786_pcie_port {
	struct pci_channel	*hose;
28
	struct clk		*fclk, phy_clk;
29 30 31 32 33 34 35
	unsigned int		index;
	int			endpoint;
	int			link;
};

static struct sh7786_pcie_port *sh7786_pcie_ports;
static unsigned int nr_ports;
36
static unsigned long dma_pfn_offset;
37 38 39

static struct sh7786_pcie_hwops {
	int (*core_init)(void);
40
	async_func_t port_init_hw;
41 42
} *sh7786_pcie_hwops;

43
static struct resource sh7786_pci0_resources[] = {
44
	{
45
		.name	= "PCIe0 MEM 0",
46 47
		.start	= 0xfd000000,
		.end	= 0xfd000000 + SZ_8M - 1,
48
		.flags	= IORESOURCE_MEM,
49
	}, {
50
		.name	= "PCIe0 MEM 1",
51 52 53
		.start	= 0xc0000000,
		.end	= 0xc0000000 + SZ_512M - 1,
		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
54
	}, {
55
		.name	= "PCIe0 MEM 2",
56 57
		.start	= 0x10000000,
		.end	= 0x10000000 + SZ_64M - 1,
58
		.flags	= IORESOURCE_MEM,
59
	}, {
60
		.name	= "PCIe0 IO",
61 62
		.start	= 0xfe100000,
		.end	= 0xfe100000 + SZ_1M - 1,
63
		.flags	= IORESOURCE_IO,
64 65 66
	},
};

67 68
static struct resource sh7786_pci1_resources[] = {
	{
69
		.name	= "PCIe1 MEM 0",
70 71
		.start	= 0xfd800000,
		.end	= 0xfd800000 + SZ_8M - 1,
72
		.flags	= IORESOURCE_MEM,
73
	}, {
74
		.name	= "PCIe1 MEM 1",
75 76 77 78
		.start	= 0xa0000000,
		.end	= 0xa0000000 + SZ_512M - 1,
		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
	}, {
79
		.name	= "PCIe1 MEM 2",
80 81 82 83
		.start	= 0x30000000,
		.end	= 0x30000000 + SZ_256M - 1,
		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
	}, {
84
		.name	= "PCIe1 IO",
85 86
		.start	= 0xfe300000,
		.end	= 0xfe300000 + SZ_1M - 1,
87
		.flags	= IORESOURCE_IO,
88
	},
89 90
};

91
static struct resource sh7786_pci2_resources[] = {
92
	{
93
		.name	= "PCIe2 MEM 0",
94 95
		.start	= 0xfc800000,
		.end	= 0xfc800000 + SZ_4M - 1,
96
		.flags	= IORESOURCE_MEM,
97
	}, {
98
		.name	= "PCIe2 MEM 1",
99 100 101
		.start	= 0x80000000,
		.end	= 0x80000000 + SZ_512M - 1,
		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
102
	}, {
103
		.name	= "PCIe2 MEM 2",
104 105 106 107
		.start	= 0x20000000,
		.end	= 0x20000000 + SZ_256M - 1,
		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
	}, {
108
		.name	= "PCIe2 IO",
109 110
		.start	= 0xfcd00000,
		.end	= 0xfcd00000 + SZ_1M - 1,
111
		.flags	= IORESOURCE_IO,
112 113 114 115 116
	},
};

extern struct pci_ops sh7786_pci_ops;

117 118 119 120 121 122 123 124
#define DEFINE_CONTROLLER(start, idx)					\
{									\
	.pci_ops	= &sh7786_pci_ops,				\
	.resources	= sh7786_pci##idx##_resources,			\
	.nr_resources	= ARRAY_SIZE(sh7786_pci##idx##_resources),	\
	.reg_base	= start,					\
	.mem_offset	= 0,						\
	.io_offset	= 0,						\
125 126 127 128 129 130 131 132
}

static struct pci_channel sh7786_pci_channels[] = {
	DEFINE_CONTROLLER(0xfe000000, 0),
	DEFINE_CONTROLLER(0xfe200000, 1),
	DEFINE_CONTROLLER(0xfcc00000, 2),
};

133 134 135 136
static struct clk fixed_pciexclkp = {
	.rate = 100000000,	/* 100 MHz reference clock */
};

137
static void sh7786_pci_fixup(struct pci_dev *dev)
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
{
	/*
	 * Prevent enumeration of root complex resources.
	 */
	if (pci_is_root_bus(dev->bus) && dev->devfn == 0) {
		int i;

		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
			dev->resource[i].start	= 0;
			dev->resource[i].end	= 0;
			dev->resource[i].flags	= 0;
		}
	}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786,
			 sh7786_pci_fixup);

155
static int __init phy_wait_for_ack(struct pci_channel *chan)
156 157 158 159 160 161 162 163 164 165 166 167 168
{
	unsigned int timeout = 100;

	while (timeout--) {
		if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK))
			return 0;

		udelay(100);
	}

	return -ETIMEDOUT;
}

169
static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask)
170 171 172 173 174 175 176 177 178 179 180 181 182
{
	unsigned int timeout = 100;

	while (timeout--) {
		if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask)
			return 0;

		udelay(100);
	}

	return -ETIMEDOUT;
}

183 184
static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr,
				 unsigned int lane, unsigned int data)
185
{
186
	unsigned long phyaddr;
187 188 189 190 191 192 193 194 195 196 197

	phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) +
			((addr & 0xff) << BITS_ADR);

	/* Set write data */
	pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR);
	pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR);

	phy_wait_for_ack(chan);

	/* Clear command */
198
	pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR);
199 200 201 202 203
	pci_write_reg(chan, 0, SH4A_PCIEPHYADRR);

	phy_wait_for_ack(chan);
}

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
static int __init pcie_clk_init(struct sh7786_pcie_port *port)
{
	struct pci_channel *chan = port->hose;
	struct clk *clk;
	char fclk_name[16];
	int ret;

	/*
	 * First register the fixed clock
	 */
	ret = clk_register(&fixed_pciexclkp);
	if (unlikely(ret != 0))
		return ret;

	/*
	 * Grab the port's function clock, which the PHY clock depends
	 * on. clock lookups don't help us much at this point, since no
	 * dev_id is available this early. Lame.
	 */
	snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index);

	port->fclk = clk_get(NULL, fclk_name);
	if (IS_ERR(port->fclk)) {
		ret = PTR_ERR(port->fclk);
		goto err_fclk;
	}

	clk_enable(port->fclk);

	/*
	 * And now, set up the PHY clock
	 */
	clk = &port->phy_clk;

	memset(clk, 0, sizeof(struct clk));

	clk->parent = &fixed_pciexclkp;
	clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR);
	clk->enable_bit = BITS_CKE;

244
	ret = sh_clk_mstp_register(clk, 1);
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
	if (unlikely(ret < 0))
		goto err_phy;

	return 0;

err_phy:
	clk_disable(port->fclk);
	clk_put(port->fclk);
err_fclk:
	clk_unregister(&fixed_pciexclkp);

	return ret;
}

static int __init phy_init(struct sh7786_pcie_port *port)
260
{
261
	struct pci_channel *chan = port->hose;
262 263
	unsigned int timeout = 100;

264
	clk_enable(&port->phy_clk);
265

266 267 268 269 270 271 272 273
	/* Initialize the phy */
	phy_write_reg(chan, 0x60, 0xf, 0x004b008b);
	phy_write_reg(chan, 0x61, 0xf, 0x00007b41);
	phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00);
	phy_write_reg(chan, 0x65, 0xf, 0x09070907);
	phy_write_reg(chan, 0x66, 0xf, 0x00000010);
	phy_write_reg(chan, 0x74, 0xf, 0x0007001c);
	phy_write_reg(chan, 0x79, 0xf, 0x01fc000d);
274
	phy_write_reg(chan, 0xb0, 0xf, 0x00000610);
275 276

	/* Deassert Standby */
277 278 279
	phy_write_reg(chan, 0x67, 0x1, 0x00000400);

	/* Disable clock */
280
	clk_disable(&port->phy_clk);
281 282 283 284 285 286 287 288 289 290 291

	while (timeout--) {
		if (pci_read_reg(chan, SH4A_PCIEPHYSR))
			return 0;

		udelay(100);
	}

	return -ETIMEDOUT;
}

292
static void __init pcie_reset(struct sh7786_pcie_port *port)
293 294 295 296 297 298 299 300 301
{
	struct pci_channel *chan = port->hose;

	pci_write_reg(chan, 1, SH4A_PCIESRSTR);
	pci_write_reg(chan, 0, SH4A_PCIETCTLR);
	pci_write_reg(chan, 0, SH4A_PCIESRSTR);
	pci_write_reg(chan, 0, SH4A_PCIETXVC0SR);
}

302
static int __init pcie_init(struct sh7786_pcie_port *port)
303 304 305
{
	struct pci_channel *chan = port->hose;
	unsigned int data;
306
	phys_addr_t memstart, memend;
307
	size_t memsize;
308
	int ret, i, win;
309 310

	/* Begin initialization */
311
	pcie_reset(port);
312

313 314 315 316 317 318
	/*
	 * Initial header for port config space is type 1, set the device
	 * class to match. Hardware takes care of propagating the IDSETR
	 * settings, so there is no need to bother with a quirk.
	 */
	pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1);
319 320 321 322 323 324 325 326 327 328 329 330 331

	/* Initialize default capabilities. */
	data = pci_read_reg(chan, SH4A_PCIEEXPCAP0);
	data &= ~(PCI_EXP_FLAGS_TYPE << 16);

	if (port->endpoint)
		data |= PCI_EXP_TYPE_ENDPOINT << 20;
	else
		data |= PCI_EXP_TYPE_ROOT_PORT << 20;

	data |= PCI_CAP_ID_EXP;
	pci_write_reg(chan, data, SH4A_PCIEEXPCAP0);

332 333 334 335
	/* Enable data link layer active state reporting */
	pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3);

	/* Enable extended sync and ASPM L0s support */
336
	data = pci_read_reg(chan, SH4A_PCIEEXPCAP4);
337 338
	data &= ~PCI_EXP_LNKCTL_ASPMC;
	data |= PCI_EXP_LNKCTL_ES | 1;
339 340
	pci_write_reg(chan, data, SH4A_PCIEEXPCAP4);

341 342 343 344 345 346
	/* Write out the physical slot number */
	data = pci_read_reg(chan, SH4A_PCIEEXPCAP5);
	data &= ~PCI_EXP_SLTCAP_PSN;
	data |= (port->index + 1) << 19;
	pci_write_reg(chan, data, SH4A_PCIEEXPCAP5);

347 348
	/* Set the completion timer timeout to the maximum 32ms. */
	data = pci_read_reg(chan, SH4A_PCIETLCTLR);
349
	data &= ~0x3f00;
350 351 352 353 354 355 356 357 358 359 360 361
	data |= 0x32 << 8;
	pci_write_reg(chan, data, SH4A_PCIETLCTLR);

	/*
	 * Set fast training sequences to the maximum 255,
	 * and enable MAC data scrambling.
	 */
	data = pci_read_reg(chan, SH4A_PCIEMACCTLR);
	data &= ~PCIEMACCTLR_SCR_DIS;
	data |= (0xff << 16);
	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);

362 363 364 365 366 367 368 369 370 371 372
	memstart = __pa(memory_start);
	memend   = __pa(memory_end);
	memsize = roundup_pow_of_two(memend - memstart);

	/*
	 * The start address must be aligned on its size. So we round
	 * it down, and then recalculate the size so that it covers
	 * the entire memory.
	 */
	memstart = ALIGN_DOWN(memstart, memsize);
	memsize = roundup_pow_of_two(memend - memstart);
373

374 375
	dma_pfn_offset = memstart >> PAGE_SHIFT;

376 377 378 379 380
	/*
	 * If there's more than 512MB of memory, we need to roll over to
	 * LAR1/LAMR1.
	 */
	if (memsize > SZ_512M) {
381
		pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1);
382 383
		pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1,
			      SH4A_PCIELAMR1);
384 385 386 387 388
		memsize = SZ_512M;
	} else {
		/*
		 * Otherwise just zero it out and disable it.
		 */
389 390
		pci_write_reg(chan, 0, SH4A_PCIELAR1);
		pci_write_reg(chan, 0, SH4A_PCIELAMR1);
391 392 393 394 395 396
	}

	/*
	 * LAR0/LAMR0 covers up to the first 512MB, which is enough to
	 * cover all of lowmem on most platforms.
	 */
397
	pci_write_reg(chan, memstart, SH4A_PCIELAR0);
398
	pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0);
399

400 401 402 403 404
	/* Finish initialization */
	data = pci_read_reg(chan, SH4A_PCIETCTLR);
	data |= 0x1;
	pci_write_reg(chan, data, SH4A_PCIETCTLR);

405 406 407
	/* Let things settle down a bit.. */
	mdelay(100);

408 409 410 411 412 413 414 415 416 417
	/* Enable DL_Active Interrupt generation */
	data = pci_read_reg(chan, SH4A_PCIEDLINTENR);
	data |= PCIEDLINTENR_DLL_ACT_ENABLE;
	pci_write_reg(chan, data, SH4A_PCIEDLINTENR);

	/* Disable MAC data scrambling. */
	data = pci_read_reg(chan, SH4A_PCIEMACCTLR);
	data |= PCIEMACCTLR_SCR_DIS | (0xff << 16);
	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);

418 419 420 421 422
	/*
	 * This will timeout if we don't have a link, but we permit the
	 * port to register anyways in order to support hotplug on future
	 * hardware.
	 */
423 424
	ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL);

425 426 427 428 429 430
	data = pci_read_reg(chan, SH4A_PCIEPCICONF1);
	data &= ~(PCI_STATUS_DEVSEL_MASK << 16);
	data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
		(PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16;
	pci_write_reg(chan, data, SH4A_PCIEPCICONF1);

431 432 433 434 435
	pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR);
	pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR);

	wmb();

436 437 438 439 440 441 442
	if (ret == 0) {
		data = pci_read_reg(chan, SH4A_PCIEMACSR);
		printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n",
		       port->index, (data >> 20) & 0x3f);
	} else
		printk(KERN_NOTICE "PCI: PCIe#%d link down\n",
		       port->index);
443

444
	for (i = win = 0; i < chan->nr_resources; i++) {
445 446
		struct resource *res = chan->resources + i;
		resource_size_t size;
447
		u32 mask;
448

449 450 451 452 453
		/*
		 * We can't use the 32-bit mode windows in legacy 29-bit
		 * mode, so just skip them entirely.
		 */
		if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode())
454 455 456
			res->flags |= IORESOURCE_DISABLED;

		if (res->flags & IORESOURCE_DISABLED)
457 458 459
			continue;

		pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win));
460 461 462 463 464

		/*
		 * The PAMR mask is calculated in units of 256kB, which
		 * keeps things pretty simple.
		 */
465 466 467
		size = resource_size(res);
		mask = (roundup_pow_of_two(size) / SZ_256K) - 1;
		pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win));
468

469
		pci_write_reg(chan, upper_32_bits(res->start),
470
			      SH4A_PCIEPARH(win));
471
		pci_write_reg(chan, lower_32_bits(res->start),
472
			      SH4A_PCIEPARL(win));
473

474
		mask = MASK_PARE;
475
		if (res->flags & IORESOURCE_IO)
476
			mask |= MASK_SPC;
477

478
		pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win));
479 480

		win++;
481
	}
482 483 484 485

	return 0;
}

486
int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
487
{
P
Paul Mundt 已提交
488
        return evt2irq(0xae0);
489 490
}

491 492 493 494 495
void pcibios_bus_add_device(struct pci_dev *pdev)
{
	pdev->dev.dma_pfn_offset = dma_pfn_offset;
}

496
static int __init sh7786_pcie_core_init(void)
497 498 499 500 501
{
	/* Return the number of ports */
	return test_mode_pin(MODE_PIN12) ? 3 : 2;
}

502
static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie)
503
{
504
	struct sh7786_pcie_port *port = data;
505 506 507 508 509 510 511 512
	int ret;

	/*
	 * Check if we are configured in endpoint or root complex mode,
	 * this is a fixed pin setting that applies to all PCIe ports.
	 */
	port->endpoint = test_mode_pin(MODE_PIN11);

513 514 515 516
	/*
	 * Setup clocks, needed both for PHY and PCIe registers.
	 */
	ret = pcie_clk_init(port);
517 518 519 520 521
	if (unlikely(ret < 0)) {
		pr_err("clock initialization failed for port#%d\n",
		       port->index);
		return;
	}
522 523

	ret = phy_init(port);
524 525 526 527 528
	if (unlikely(ret < 0)) {
		pr_err("phy initialization failed for port#%d\n",
		       port->index);
		return;
	}
529

530
	ret = pcie_init(port);
531 532 533 534 535
	if (unlikely(ret < 0)) {
		pr_err("core initialization failed for port#%d\n",
			       port->index);
		return;
	}
536

537 538 539 540
	/* In the interest of preserving device ordering, synchronize */
	async_synchronize_cookie(cookie);

	register_pci_controller(port->hose);
541 542 543 544 545 546 547 548 549
}

static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = {
	.core_init	= sh7786_pcie_core_init,
	.port_init_hw	= sh7786_pcie_init_hw,
};

static int __init sh7786_pcie_init(void)
{
550
	struct clk *platclk;
551
	u32 mm_sel;
552
	int i;
553

554
	printk(KERN_NOTICE "PCI: Starting initialization.\n");
555 556 557 558 559 560 561 562 563 564 565 566 567 568

	sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops;

	nr_ports = sh7786_pcie_hwops->core_init();
	BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels));

	if (unlikely(nr_ports == 0))
		return -ENODEV;

	sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port),
				    GFP_KERNEL);
	if (unlikely(!sh7786_pcie_ports))
		return -ENOMEM;

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
	/*
	 * Fetch any optional platform clock associated with this block.
	 *
	 * This is a rather nasty hack for boards with spec-mocking FPGAs
	 * that have a secondary set of clocks outside of the on-chip
	 * ones that need to be accounted for before there is any chance
	 * of touching the existing MSTP bits or CPG clocks.
	 */
	platclk = clk_get(NULL, "pcie_plat_clk");
	if (IS_ERR(platclk)) {
		/* Sane hardware should probably get a WARN_ON.. */
		platclk = NULL;
	}

	clk_enable(platclk);

585 586 587 588 589 590 591 592 593 594
	mm_sel = sh7786_mm_sel();

	/*
	 * Depending on the MMSELR register value, the PCIe0 MEM 1
	 * area may not be available. See Table 13.11 of the SH7786
	 * datasheet.
	 */
	if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6)
		sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED;

595 596 597 598 599 600 601
	printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports);

	for (i = 0; i < nr_ports; i++) {
		struct sh7786_pcie_port *port = sh7786_pcie_ports + i;

		port->index		= i;
		port->hose		= sh7786_pci_channels + i;
602
		port->hose->io_map_base	= port->hose->resources[0].start;
603

604
		async_schedule(sh7786_pcie_hwops->port_init_hw, port);
605
	}
606

607 608
	async_synchronize_full();

609 610 611
	return 0;
}
arch_initcall(sh7786_pcie_init);