kfd_device.c 20.0 KB
Newer Older
O
Oded Gabbay 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

23
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
O
Oded Gabbay 已提交
24
#include <linux/amd-iommu.h>
25
#endif
O
Oded Gabbay 已提交
26 27 28 29
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
30
#include "kfd_device_queue_manager.h"
31
#include "kfd_pm4_headers_vi.h"
F
Felix Kuehling 已提交
32
#include "cwsr_trap_handler_gfx8.asm"
33
#include "kfd_iommu.h"
O
Oded Gabbay 已提交
34

35 36
#define MQD_SIZE_ALIGNED 768

37
#ifdef KFD_SUPPORT_IOMMU_V2
O
Oded Gabbay 已提交
38
static const struct kfd_device_info kaveri_device_info = {
39 40
	.asic_family = CHIP_KAVERI,
	.max_pasid_bits = 16,
41 42
	/* max num of queues for KV.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
43
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
44
	.event_interrupt_class = &event_interrupt_class_cik,
45
	.num_of_watch_points = 4,
F
Felix Kuehling 已提交
46 47
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = false,
48
	.needs_iommu_device = true,
49
	.needs_pci_atomics = false,
50 51 52 53
};

static const struct kfd_device_info carrizo_device_info = {
	.asic_family = CHIP_CARRIZO,
O
Oded Gabbay 已提交
54
	.max_pasid_bits = 16,
55 56
	/* max num of queues for CZ.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
57
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
58
	.event_interrupt_class = &event_interrupt_class_cik,
59
	.num_of_watch_points = 4,
F
Felix Kuehling 已提交
60 61
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
62
	.needs_iommu_device = true,
63
	.needs_pci_atomics = false,
O
Oded Gabbay 已提交
64
};
65
#endif
O
Oded Gabbay 已提交
66

67 68 69 70 71 72 73 74 75 76
static const struct kfd_device_info hawaii_device_info = {
	.asic_family = CHIP_HAWAII,
	.max_pasid_bits = 16,
	/* max num of queues for KV.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = false,
77
	.needs_iommu_device = false,
78 79 80 81 82 83 84 85 86 87 88 89
	.needs_pci_atomics = false,
};

static const struct kfd_device_info tonga_device_info = {
	.asic_family = CHIP_TONGA,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = false,
90
	.needs_iommu_device = false,
91 92 93 94 95 96 97 98 99 100 101 102
	.needs_pci_atomics = true,
};

static const struct kfd_device_info tonga_vf_device_info = {
	.asic_family = CHIP_TONGA,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = false,
103
	.needs_iommu_device = false,
104 105 106 107 108 109 110 111 112 113 114 115
	.needs_pci_atomics = false,
};

static const struct kfd_device_info fiji_device_info = {
	.asic_family = CHIP_FIJI,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
116
	.needs_iommu_device = false,
117 118 119 120 121 122 123 124 125 126 127 128
	.needs_pci_atomics = true,
};

static const struct kfd_device_info fiji_vf_device_info = {
	.asic_family = CHIP_FIJI,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
129
	.needs_iommu_device = false,
130 131 132 133 134 135 136 137 138 139 140 141 142
	.needs_pci_atomics = false,
};


static const struct kfd_device_info polaris10_device_info = {
	.asic_family = CHIP_POLARIS10,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
143
	.needs_iommu_device = false,
144 145 146 147 148 149 150 151 152 153 154 155
	.needs_pci_atomics = true,
};

static const struct kfd_device_info polaris10_vf_device_info = {
	.asic_family = CHIP_POLARIS10,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
156
	.needs_iommu_device = false,
157 158 159 160 161 162 163 164 165 166 167 168
	.needs_pci_atomics = false,
};

static const struct kfd_device_info polaris11_device_info = {
	.asic_family = CHIP_POLARIS11,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
169
	.needs_iommu_device = false,
170 171 172 173
	.needs_pci_atomics = true,
};


O
Oded Gabbay 已提交
174 175 176 177 178 179
struct kfd_deviceid {
	unsigned short did;
	const struct kfd_device_info *device_info;
};

static const struct kfd_deviceid supported_devices[] = {
180
#ifdef KFD_SUPPORT_IOMMU_V2
O
Oded Gabbay 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
	{ 0x1304, &kaveri_device_info },	/* Kaveri */
	{ 0x1305, &kaveri_device_info },	/* Kaveri */
	{ 0x1306, &kaveri_device_info },	/* Kaveri */
	{ 0x1307, &kaveri_device_info },	/* Kaveri */
	{ 0x1309, &kaveri_device_info },	/* Kaveri */
	{ 0x130A, &kaveri_device_info },	/* Kaveri */
	{ 0x130B, &kaveri_device_info },	/* Kaveri */
	{ 0x130C, &kaveri_device_info },	/* Kaveri */
	{ 0x130D, &kaveri_device_info },	/* Kaveri */
	{ 0x130E, &kaveri_device_info },	/* Kaveri */
	{ 0x130F, &kaveri_device_info },	/* Kaveri */
	{ 0x1310, &kaveri_device_info },	/* Kaveri */
	{ 0x1311, &kaveri_device_info },	/* Kaveri */
	{ 0x1312, &kaveri_device_info },	/* Kaveri */
	{ 0x1313, &kaveri_device_info },	/* Kaveri */
	{ 0x1315, &kaveri_device_info },	/* Kaveri */
	{ 0x1316, &kaveri_device_info },	/* Kaveri */
	{ 0x1317, &kaveri_device_info },	/* Kaveri */
	{ 0x1318, &kaveri_device_info },	/* Kaveri */
	{ 0x131B, &kaveri_device_info },	/* Kaveri */
	{ 0x131C, &kaveri_device_info },	/* Kaveri */
202 203 204 205 206
	{ 0x131D, &kaveri_device_info },	/* Kaveri */
	{ 0x9870, &carrizo_device_info },	/* Carrizo */
	{ 0x9874, &carrizo_device_info },	/* Carrizo */
	{ 0x9875, &carrizo_device_info },	/* Carrizo */
	{ 0x9876, &carrizo_device_info },	/* Carrizo */
207
	{ 0x9877, &carrizo_device_info },	/* Carrizo */
208
#endif
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
	{ 0x67A0, &hawaii_device_info },	/* Hawaii */
	{ 0x67A1, &hawaii_device_info },	/* Hawaii */
	{ 0x67A2, &hawaii_device_info },	/* Hawaii */
	{ 0x67A8, &hawaii_device_info },	/* Hawaii */
	{ 0x67A9, &hawaii_device_info },	/* Hawaii */
	{ 0x67AA, &hawaii_device_info },	/* Hawaii */
	{ 0x67B0, &hawaii_device_info },	/* Hawaii */
	{ 0x67B1, &hawaii_device_info },	/* Hawaii */
	{ 0x67B8, &hawaii_device_info },	/* Hawaii */
	{ 0x67B9, &hawaii_device_info },	/* Hawaii */
	{ 0x67BA, &hawaii_device_info },	/* Hawaii */
	{ 0x67BE, &hawaii_device_info },	/* Hawaii */
	{ 0x6920, &tonga_device_info },		/* Tonga */
	{ 0x6921, &tonga_device_info },		/* Tonga */
	{ 0x6928, &tonga_device_info },		/* Tonga */
	{ 0x6929, &tonga_device_info },		/* Tonga */
	{ 0x692B, &tonga_device_info },		/* Tonga */
	{ 0x692F, &tonga_vf_device_info },	/* Tonga vf */
	{ 0x6938, &tonga_device_info },		/* Tonga */
	{ 0x6939, &tonga_device_info },		/* Tonga */
	{ 0x7300, &fiji_device_info },		/* Fiji */
	{ 0x730F, &fiji_vf_device_info },	/* Fiji vf*/
	{ 0x67C0, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C1, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C2, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C4, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C7, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C8, &polaris10_device_info },	/* Polaris10 */
	{ 0x67C9, &polaris10_device_info },	/* Polaris10 */
	{ 0x67CA, &polaris10_device_info },	/* Polaris10 */
	{ 0x67CC, &polaris10_device_info },	/* Polaris10 */
	{ 0x67CF, &polaris10_device_info },	/* Polaris10 */
	{ 0x67D0, &polaris10_vf_device_info },	/* Polaris10 vf*/
	{ 0x67DF, &polaris10_device_info },	/* Polaris10 */
	{ 0x67E0, &polaris11_device_info },	/* Polaris11 */
	{ 0x67E1, &polaris11_device_info },	/* Polaris11 */
	{ 0x67E3, &polaris11_device_info },	/* Polaris11 */
	{ 0x67E7, &polaris11_device_info },	/* Polaris11 */
	{ 0x67E8, &polaris11_device_info },	/* Polaris11 */
	{ 0x67E9, &polaris11_device_info },	/* Polaris11 */
	{ 0x67EB, &polaris11_device_info },	/* Polaris11 */
	{ 0x67EF, &polaris11_device_info },	/* Polaris11 */
	{ 0x67FF, &polaris11_device_info },	/* Polaris11 */
O
Oded Gabbay 已提交
252 253
};

254 255 256 257
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
				unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);

258 259
static int kfd_resume(struct kfd_dev *kfd);

O
Oded Gabbay 已提交
260 261 262 263 264 265
static const struct kfd_device_info *lookup_device_info(unsigned short did)
{
	size_t i;

	for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
		if (supported_devices[i].did == did) {
266
			WARN_ON(!supported_devices[i].device_info);
O
Oded Gabbay 已提交
267 268 269 270
			return supported_devices[i].device_info;
		}
	}

271 272 273
	dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
		 did);

O
Oded Gabbay 已提交
274 275 276
	return NULL;
}

277 278
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
O
Oded Gabbay 已提交
279 280 281 282 283 284
{
	struct kfd_dev *kfd;

	const struct kfd_device_info *device_info =
					lookup_device_info(pdev->device);

285 286
	if (!device_info) {
		dev_err(kfd_device, "kgd2kfd_probe failed\n");
O
Oded Gabbay 已提交
287
		return NULL;
288
	}
O
Oded Gabbay 已提交
289

290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
	if (device_info->needs_pci_atomics) {
		/* Allow BIF to recode atomics to PCIe 3.0
		 * AtomicOps. 32 and 64-bit requests are possible and
		 * must be supported.
		 */
		if (pci_enable_atomic_ops_to_root(pdev,
				PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
				PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
			dev_info(kfd_device,
				"skipped device %x:%x, PCI rejects atomics",
				 pdev->vendor, pdev->device);
			return NULL;
		}
	}

O
Oded Gabbay 已提交
305 306 307 308 309 310 311
	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
	if (!kfd)
		return NULL;

	kfd->kgd = kgd;
	kfd->device_info = device_info;
	kfd->pdev = pdev;
312
	kfd->init_complete = false;
313 314 315 316 317
	kfd->kfd2kgd = f2g;

	mutex_init(&kfd->doorbell_mutex);
	memset(&kfd->doorbell_available_index, 0,
		sizeof(kfd->doorbell_available_index));
O
Oded Gabbay 已提交
318 319 320 321

	return kfd;
}

F
Felix Kuehling 已提交
322 323 324 325 326 327 328 329 330 331 332
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
	if (cwsr_enable && kfd->device_info->supports_cwsr) {
		BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);

		kfd->cwsr_isa = cwsr_trap_gfx8_hex;
		kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
		kfd->cwsr_enabled = true;
	}
}

O
Oded Gabbay 已提交
333 334 335
bool kgd2kfd_device_init(struct kfd_dev *kfd,
			 const struct kgd2kfd_shared_resources *gpu_resources)
{
336 337
	unsigned int size;

O
Oded Gabbay 已提交
338 339
	kfd->shared_resources = *gpu_resources;

340 341 342 343 344
	kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
	kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
			- kfd->vm_info.first_vmid_kfd + 1;

345 346 347 348 349 350 351 352 353 354 355
	/* Verify module parameters regarding mapped process number*/
	if ((hws_max_conc_proc < 0)
			|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
		dev_err(kfd_device,
			"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
			hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
			kfd->vm_info.vmid_num_kfd);
		kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
	} else
		kfd->max_proc_per_quantum = hws_max_conc_proc;

356
	/* calculate max size of mqds needed for queues */
357 358
	size = max_num_of_queues_per_device *
			kfd->device_info->mqd_size_aligned;
359

360 361 362 363
	/*
	 * calculate max size of runlist packet.
	 * There can be only 2 packets at once
	 */
364 365 366
	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
		+ sizeof(struct pm4_mes_runlist)) * 2;
367 368 369 370 371

	/* Add size of HIQ & DIQ */
	size += KFD_KERNEL_QUEUE_SIZE * 2;

	/* add another 512KB for all other allocations on gart (HPD, fences) */
372 373
	size += 512 * 1024;

374 375 376
	if (kfd->kfd2kgd->init_gtt_mem_allocation(
			kfd->kgd, size, &kfd->gtt_mem,
			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
377
		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
378 379 380
		goto out;
	}

381
	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
382

383 384
	/* Initialize GTT sa with 512 byte chunk size */
	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
385
		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
386 387 388
		goto kfd_gtt_sa_init_error;
	}

389 390 391 392 393
	if (kfd_doorbell_init(kfd)) {
		dev_err(kfd_device,
			"Error initializing doorbell aperture\n");
		goto kfd_doorbell_error;
	}
394

395
	if (kfd_topology_add_device(kfd)) {
396
		dev_err(kfd_device, "Error adding device to topology\n");
397 398 399
		goto kfd_topology_add_device_error;
	}

400
	if (kfd_interrupt_init(kfd)) {
401
		dev_err(kfd_device, "Error initializing interrupts\n");
402 403 404
		goto kfd_interrupt_error;
	}

405 406
	kfd->dqm = device_queue_manager_init(kfd);
	if (!kfd->dqm) {
407
		dev_err(kfd_device, "Error initializing queue manager\n");
408 409 410
		goto device_queue_manager_error;
	}

411 412 413
	if (kfd_iommu_device_init(kfd)) {
		dev_err(kfd_device, "Error initializing iommuv2\n");
		goto device_iommu_error;
414 415
	}

F
Felix Kuehling 已提交
416 417
	kfd_cwsr_init(kfd);

418 419 420
	if (kfd_resume(kfd))
		goto kfd_resume_error;

421 422
	kfd->dbgmgr = NULL;

O
Oded Gabbay 已提交
423
	kfd->init_complete = true;
424
	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
O
Oded Gabbay 已提交
425 426
		 kfd->pdev->device);

427
	pr_debug("Starting kfd with the following scheduling policy %d\n",
428
		kfd->dqm->sched_policy);
429

430 431
	goto out;

432
kfd_resume_error:
433
device_iommu_error:
434 435
	device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error:
436 437
	kfd_interrupt_exit(kfd);
kfd_interrupt_error:
438
	kfd_topology_remove_device(kfd);
439
kfd_topology_add_device_error:
440 441
	kfd_doorbell_fini(kfd);
kfd_doorbell_error:
442 443
	kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
444
	kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
445
	dev_err(kfd_device,
446
		"device %x:%x NOT added due to errors\n",
447 448 449
		kfd->pdev->vendor, kfd->pdev->device);
out:
	return kfd->init_complete;
O
Oded Gabbay 已提交
450 451 452 453
}

void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
454
	if (kfd->init_complete) {
455
		kgd2kfd_suspend(kfd);
456
		device_queue_manager_uninit(kfd->dqm);
457
		kfd_interrupt_exit(kfd);
458
		kfd_topology_remove_device(kfd);
459
		kfd_doorbell_fini(kfd);
460
		kfd_gtt_sa_fini(kfd);
461
		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
462
	}
463

O
Oded Gabbay 已提交
464 465 466 467 468
	kfree(kfd);
}

void kgd2kfd_suspend(struct kfd_dev *kfd)
{
469 470 471 472 473
	if (!kfd->init_complete)
		return;

	kfd->dqm->ops.stop(kfd->dqm);

474
	kfd_iommu_suspend(kfd);
O
Oded Gabbay 已提交
475 476 477 478
}

int kgd2kfd_resume(struct kfd_dev *kfd)
{
479 480
	if (!kfd->init_complete)
		return 0;
481

482
	return kfd_resume(kfd);
483

484 485 486 487 488 489
}

static int kfd_resume(struct kfd_dev *kfd)
{
	int err = 0;

490 491 492 493 494 495 496
	err = kfd_iommu_resume(kfd);
	if (err) {
		dev_err(kfd_device,
			"Failed to resume IOMMU for device %x:%x\n",
			kfd->pdev->vendor, kfd->pdev->device);
		return err;
	}
497

498 499 500 501 502 503
	err = kfd->dqm->ops.start(kfd->dqm);
	if (err) {
		dev_err(kfd_device,
			"Error starting queue manager for device %x:%x\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto dqm_start_error;
504 505
	}

506 507 508
	return err;

dqm_start_error:
509
	kfd_iommu_suspend(kfd);
510
	return err;
O
Oded Gabbay 已提交
511 512
}

513 514
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
O
Oded Gabbay 已提交
515
{
516 517 518 519 520 521 522 523
	if (!kfd->init_complete)
		return;

	spin_lock(&kfd->interrupt_lock);

	if (kfd->interrupts_active
	    && interrupt_is_wanted(kfd, ih_ring_entry)
	    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
524
		queue_work(kfd->ih_wq, &kfd->interrupt_work);
525 526

	spin_unlock(&kfd->interrupt_lock);
O
Oded Gabbay 已提交
527
}
528 529 530 531

static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
				unsigned int chunk_size)
{
532
	unsigned int num_of_longs;
533

534 535 536 537 538 539
	if (WARN_ON(buf_size < chunk_size))
		return -EINVAL;
	if (WARN_ON(buf_size == 0))
		return -EINVAL;
	if (WARN_ON(chunk_size == 0))
		return -EINVAL;
540 541 542 543

	kfd->gtt_sa_chunk_size = chunk_size;
	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;

544 545
	num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
		BITS_PER_LONG;
546

547
	kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
548 549 550 551

	if (!kfd->gtt_sa_bitmap)
		return -ENOMEM;

552
	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);

	mutex_init(&kfd->gtt_sa_lock);

	return 0;

}

static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
{
	mutex_destroy(&kfd->gtt_sa_lock);
	kfree(kfd->gtt_sa_bitmap);
}

static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
						unsigned int bit_num,
						unsigned int chunk_size)
{
	return start_addr + bit_num * chunk_size;
}

static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
						unsigned int bit_num,
						unsigned int chunk_size)
{
	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
}

int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
			struct kfd_mem_obj **mem_obj)
{
	unsigned int found, start_search, cur_size;

	if (size == 0)
		return -EINVAL;

	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
		return -ENOMEM;

	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
	if ((*mem_obj) == NULL)
		return -ENOMEM;

596
	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
597 598 599 600 601 602 603 604 605 606 607

	start_search = 0;

	mutex_lock(&kfd->gtt_sa_lock);

kfd_gtt_restart_search:
	/* Find the first chunk that is free */
	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
					kfd->gtt_sa_num_of_chunks,
					start_search);

608
	pr_debug("Found = %d\n", found);
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625

	/* If there wasn't any free chunk, bail out */
	if (found == kfd->gtt_sa_num_of_chunks)
		goto kfd_gtt_no_free_chunk;

	/* Update fields of mem_obj */
	(*mem_obj)->range_start = found;
	(*mem_obj)->range_end = found;
	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
					kfd->gtt_start_gpu_addr,
					found,
					kfd->gtt_sa_chunk_size);
	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
					kfd->gtt_start_cpu_ptr,
					found,
					kfd->gtt_sa_chunk_size);

626
	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
627 628 629 630
			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);

	/* If we need only one chunk, mark it as allocated and get out */
	if (size <= kfd->gtt_sa_chunk_size) {
631
		pr_debug("Single bit\n");
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
		set_bit(found, kfd->gtt_sa_bitmap);
		goto kfd_gtt_out;
	}

	/* Otherwise, try to see if we have enough contiguous chunks */
	cur_size = size - kfd->gtt_sa_chunk_size;
	do {
		(*mem_obj)->range_end =
			find_next_zero_bit(kfd->gtt_sa_bitmap,
					kfd->gtt_sa_num_of_chunks, ++found);
		/*
		 * If next free chunk is not contiguous than we need to
		 * restart our search from the last free chunk we found (which
		 * wasn't contiguous to the previous ones
		 */
		if ((*mem_obj)->range_end != found) {
			start_search = found;
			goto kfd_gtt_restart_search;
		}

		/*
		 * If we reached end of buffer, bail out with error
		 */
		if (found == kfd->gtt_sa_num_of_chunks)
			goto kfd_gtt_no_free_chunk;

		/* Check if we don't need another chunk */
		if (cur_size <= kfd->gtt_sa_chunk_size)
			cur_size = 0;
		else
			cur_size -= kfd->gtt_sa_chunk_size;

	} while (cur_size > 0);

666
	pr_debug("range_start = %d, range_end = %d\n",
667 668 669 670 671 672 673 674 675 676 677 678 679
		(*mem_obj)->range_start, (*mem_obj)->range_end);

	/* Mark the chunks as allocated */
	for (found = (*mem_obj)->range_start;
		found <= (*mem_obj)->range_end;
		found++)
		set_bit(found, kfd->gtt_sa_bitmap);

kfd_gtt_out:
	mutex_unlock(&kfd->gtt_sa_lock);
	return 0;

kfd_gtt_no_free_chunk:
680
	pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
681 682 683 684 685 686 687 688 689
	mutex_unlock(&kfd->gtt_sa_lock);
	kfree(mem_obj);
	return -ENOMEM;
}

int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
{
	unsigned int bit;

690 691 692
	/* Act like kfree when trying to free a NULL object */
	if (!mem_obj)
		return 0;
693

694
	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
			mem_obj, mem_obj->range_start, mem_obj->range_end);

	mutex_lock(&kfd->gtt_sa_lock);

	/* Mark the chunks as free */
	for (bit = mem_obj->range_start;
		bit <= mem_obj->range_end;
		bit++)
		clear_bit(bit, kfd->gtt_sa_bitmap);

	mutex_unlock(&kfd->gtt_sa_lock);

	kfree(mem_obj);
	return 0;
}