npu-dma.c 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * This file implements the DMA operations for NVLink devices. The NPU
 * devices all point to the same iommu table as the parent PCI device.
 *
 * Copyright Alistair Popple, IBM Corporation 2015.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */

#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
15
#include <linux/iommu.h>
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

#include <asm/iommu.h>
#include <asm/pnv-pci.h>
#include <asm/msi_bitmap.h>
#include <asm/opal.h>

#include "powernv.h"
#include "pci.h"

/*
 * Other types of TCE cache invalidation are not functional in the
 * hardware.
 */
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
	return PCI_DN(dn)->pcidev;
}

/* Given a NPU device get the associated PCI device. */
struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
{
	struct device_node *dn;
	struct pci_dev *gpdev;

	/* Get assoicated PCI device */
	dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
	if (!dn)
		return NULL;

	gpdev = get_pci_dev(dn);
	of_node_put(dn);

	return gpdev;
}
EXPORT_SYMBOL(pnv_pci_get_gpu_dev);

/* Given the real PCI device get a linked NPU device. */
struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
{
	struct device_node *dn;
	struct pci_dev *npdev;

	/* Get assoicated PCI device */
	dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
	if (!dn)
		return NULL;

	npdev = get_pci_dev(dn);
	of_node_put(dn);

	return npdev;
}
EXPORT_SYMBOL(pnv_pci_get_npu_dev);

#define NPU_DMA_OP_UNSUPPORTED()					\
	dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
		__func__)

static void *dma_npu_alloc(struct device *dev, size_t size,
			   dma_addr_t *dma_handle, gfp_t flag,
76
			   unsigned long attrs)
77 78 79 80 81 82 83
{
	NPU_DMA_OP_UNSUPPORTED();
	return NULL;
}

static void dma_npu_free(struct device *dev, size_t size,
			 void *vaddr, dma_addr_t dma_handle,
84
			 unsigned long attrs)
85 86 87 88 89 90 91
{
	NPU_DMA_OP_UNSUPPORTED();
}

static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
				   unsigned long offset, size_t size,
				   enum dma_data_direction direction,
92
				   unsigned long attrs)
93 94 95 96 97 98 99
{
	NPU_DMA_OP_UNSUPPORTED();
	return 0;
}

static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
			  int nelems, enum dma_data_direction direction,
100
			  unsigned long attrs)
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
{
	NPU_DMA_OP_UNSUPPORTED();
	return 0;
}

static int dma_npu_dma_supported(struct device *dev, u64 mask)
{
	NPU_DMA_OP_UNSUPPORTED();
	return 0;
}

static u64 dma_npu_get_required_mask(struct device *dev)
{
	NPU_DMA_OP_UNSUPPORTED();
	return 0;
}

118
static struct dma_map_ops dma_npu_ops = {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	.map_page		= dma_npu_map_page,
	.map_sg			= dma_npu_map_sg,
	.alloc			= dma_npu_alloc,
	.free			= dma_npu_free,
	.dma_supported		= dma_npu_dma_supported,
	.get_required_mask	= dma_npu_get_required_mask,
};

/*
 * Returns the PE assoicated with the PCI device of the given
 * NPU. Returns the linked pci device if pci_dev != NULL.
 */
static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
						  struct pci_dev **gpdev)
{
	struct pnv_phb *phb;
	struct pci_controller *hose;
	struct pci_dev *pdev;
	struct pnv_ioda_pe *pe;
	struct pci_dn *pdn;

140 141 142
	pdev = pnv_pci_get_gpu_dev(npe->pdev);
	if (!pdev)
		return NULL;
143

144 145 146 147 148 149 150
	pdn = pci_get_pdn(pdev);
	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
		return NULL;

	hose = pci_bus_to_host(pdev->bus);
	phb = hose->private_data;
	pe = &phb->ioda.pe_array[pdn->pe_number];
151 152 153 154 155 156 157

	if (gpdev)
		*gpdev = pdev;

	return pe;
}

158
long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
		struct iommu_table *tbl)
{
	struct pnv_phb *phb = npe->phb;
	int64_t rc;
	const unsigned long size = tbl->it_indirect_levels ?
		tbl->it_level_size : tbl->it_size;
	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
	const __u64 win_size = tbl->it_size << tbl->it_page_shift;

	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
			start_addr, start_addr + win_size - 1,
			IOMMU_PAGE_SIZE(tbl));

	rc = opal_pci_map_pe_dma_window(phb->opal_id,
			npe->pe_number,
			npe->pe_number,
			tbl->it_indirect_levels + 1,
			__pa(tbl->it_base),
			size << 3,
			IOMMU_PAGE_SIZE(tbl));
	if (rc) {
		pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
		return rc;
	}
183
	pnv_pci_phb3_tce_invalidate_entire(phb, false);
184

185
	/* Add the table to the list so its TCE cache will get invalidated */
186
	pnv_pci_link_table_and_group(phb->hose->node, num,
187 188
			tbl, &npe->table_group);

189 190 191
	return 0;
}

192
long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
193 194 195 196 197 198 199 200 201 202 203 204 205 206
{
	struct pnv_phb *phb = npe->phb;
	int64_t rc;

	pe_info(npe, "Removing DMA window\n");

	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
			npe->pe_number,
			0/* levels */, 0/* table address */,
			0/* table size */, 0/* page size */);
	if (rc) {
		pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
		return rc;
	}
207
	pnv_pci_phb3_tce_invalidate_entire(phb, false);
208

209
	pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
210
			&npe->table_group);
211

212
	return 0;
213 214 215
}

/*
216
 * Enables 32 bit DMA on NPU.
217
 */
218
static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
{
	struct pci_dev *gpdev;
	struct pnv_ioda_pe *gpe;
	int64_t rc;

	/*
	 * Find the assoicated PCI devices and get the dma window
	 * information from there.
	 */
	if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
		return;

	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
	if (!gpe)
		return;

235
	rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
236 237 238 239 240 241 242 243 244

	/*
	 * We don't initialise npu_pe->tce32_table as we always use
	 * dma_npu_ops which are nops.
	 */
	set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
}

/*
245
 * Enables bypass mode on the NPU. The NPU only supports one
246
 * window per link, so bypass needs to be explicitly enabled or
247 248 249
 * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
 * active at the same time.
 */
250
static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
251 252 253
{
	struct pnv_phb *phb = npe->phb;
	int64_t rc = 0;
254
	phys_addr_t top = memblock_end_of_DRAM();
255 256 257 258

	if (phb->type != PNV_PHB_NPU || !npe->pdev)
		return -EINVAL;

259
	rc = pnv_npu_unset_window(npe, 0);
260 261 262
	if (rc != OPAL_SUCCESS)
		return rc;

263 264 265 266 267 268 269 270
	/* Enable the bypass window */

	top = roundup_pow_of_two(top);
	dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
			npe->pe_number);
	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
			npe->pe_number, npe->pe_number,
			0 /* bypass base */, top);
271

272
	if (rc == OPAL_SUCCESS)
273
		pnv_pci_phb3_tce_invalidate_entire(phb, false);
274

275 276 277
	return rc;
}

278
void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
279
{
280 281 282 283 284
	int i;
	struct pnv_phb *phb;
	struct pci_dn *pdn;
	struct pnv_ioda_pe *npe;
	struct pci_dev *npdev;
285

286 287
	for (i = 0; ; ++i) {
		npdev = pnv_pci_get_npu_dev(gpdev, i);
288

289 290
		if (!npdev)
			break;
291

292 293 294
		pdn = pci_get_pdn(npdev);
		if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
			return;
295

296
		phb = pci_bus_to_host(npdev->bus)->private_data;
297

298 299
		/* We only do bypass if it's enabled on the linked device */
		npe = &phb->ioda.pe_array[pdn->pe_number];
300

301 302 303 304 305 306 307 308 309
		if (bypass) {
			dev_info(&npdev->dev,
					"Using 64-bit DMA iommu bypass\n");
			pnv_npu_dma_set_bypass(npe);
		} else {
			dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
			pnv_npu_dma_set_32(npe);
		}
	}
310
}
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

/* Switch ownership from platform code to external user (e.g. VFIO) */
void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
{
	struct pnv_phb *phb = npe->phb;
	int64_t rc;

	/*
	 * Note: NPU has just a single TVE in the hardware which means that
	 * while used by the kernel, it can have either 32bit window or
	 * DMA bypass but never both. So we deconfigure 32bit window only
	 * if it was enabled at the moment of ownership change.
	 */
	if (npe->table_group.tables[0]) {
		pnv_npu_unset_window(npe, 0);
		return;
	}

	/* Disable bypass */
	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
			npe->pe_number, npe->pe_number,
			0 /* bypass base */, 0);
	if (rc) {
		pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
		return;
	}
337
	pnv_pci_phb3_tce_invalidate_entire(npe->phb, false);
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
}

struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
{
	struct pnv_phb *phb = npe->phb;
	struct pci_bus *pbus = phb->hose->bus;
	struct pci_dev *npdev, *gpdev = NULL, *gptmp;
	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);

	if (!gpe || !gpdev)
		return NULL;

	list_for_each_entry(npdev, &pbus->devices, bus_list) {
		gptmp = pnv_pci_get_gpu_dev(npdev);

		if (gptmp != gpdev)
			continue;

		pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
		iommu_group_add_device(gpe->table_group.group, &npdev->dev);
	}

	return gpe;
}