adreno_gpu.c 14.0 KB
Newer Older
R
Rob Clark 已提交
1 2 3 4
/*
 * Copyright (C) 2013 Red Hat
 * Author: Rob Clark <robdclark@gmail.com>
 *
5 6
 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
 *
R
Rob Clark 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "adreno_gpu.h"
#include "msm_gem.h"
22
#include "msm_mmu.h"
R
Rob Clark 已提交
23 24 25 26 27 28 29 30 31 32 33


int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);

	switch (param) {
	case MSM_PARAM_GPU_ID:
		*value = adreno_gpu->info->revn;
		return 0;
	case MSM_PARAM_GMEM_SIZE:
R
Rob Clark 已提交
34
		*value = adreno_gpu->gmem;
R
Rob Clark 已提交
35
		return 0;
J
Jordan Crouse 已提交
36 37 38
	case MSM_PARAM_GMEM_BASE:
		*value = 0x100000;
		return 0;
R
Rob Clark 已提交
39 40 41 42 43 44
	case MSM_PARAM_CHIP_ID:
		*value = adreno_gpu->rev.patchid |
				(adreno_gpu->rev.minor << 8) |
				(adreno_gpu->rev.major << 16) |
				(adreno_gpu->rev.core << 24);
		return 0;
45 46 47
	case MSM_PARAM_MAX_FREQ:
		*value = adreno_gpu->base.fast_rate;
		return 0;
R
Rob Clark 已提交
48
	case MSM_PARAM_TIMESTAMP:
49 50 51 52 53 54 55 56 57
		if (adreno_gpu->funcs->get_timestamp) {
			int ret;

			pm_runtime_get_sync(&gpu->pdev->dev);
			ret = adreno_gpu->funcs->get_timestamp(gpu, value);
			pm_runtime_put_autosuspend(&gpu->pdev->dev);

			return ret;
		}
R
Rob Clark 已提交
58
		return -EINVAL;
59 60 61
	case MSM_PARAM_NR_RINGS:
		*value = gpu->nr_rings;
		return 0;
R
Rob Clark 已提交
62 63 64 65 66 67
	default:
		DBG("%s: invalid param: %u", gpu->name, param);
		return -EINVAL;
	}
}

68 69
const struct firmware *
adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname)
70 71
{
	struct drm_device *drm = adreno_gpu->base.dev;
72
	const struct firmware *fw = NULL;
73
	char newname[strlen("qcom/") + strlen(fwname) + 1];
74 75
	int ret;

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
	sprintf(newname, "qcom/%s", fwname);

	/*
	 * Try first to load from qcom/$fwfile using a direct load (to avoid
	 * a potential timeout waiting for usermode helper)
	 */
	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
	    (adreno_gpu->fwloc == FW_LOCATION_NEW)) {

		ret = request_firmware_direct(&fw, newname, drm->dev);
		if (!ret) {
			dev_info(drm->dev, "loaded %s from new location\n",
				newname);
			adreno_gpu->fwloc = FW_LOCATION_NEW;
			return fw;
		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
			dev_err(drm->dev, "failed to load %s: %d\n",
				newname, ret);
			return ERR_PTR(ret);
		}
	}

	/*
	 * Then try the legacy location without qcom/ prefix
	 */
	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
	    (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) {

		ret = request_firmware_direct(&fw, fwname, drm->dev);
		if (!ret) {
			dev_info(drm->dev, "loaded %s from legacy location\n",
				newname);
			adreno_gpu->fwloc = FW_LOCATION_LEGACY;
			return fw;
		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
			dev_err(drm->dev, "failed to load %s: %d\n",
				fwname, ret);
			return ERR_PTR(ret);
		}
	}

	/*
	 * Finally fall back to request_firmware() for cases where the
	 * usermode helper is needed (I think mainly android)
	 */
	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
	    (adreno_gpu->fwloc == FW_LOCATION_HELPER)) {

		ret = request_firmware(&fw, newname, drm->dev);
		if (!ret) {
			dev_info(drm->dev, "loaded %s with helper\n",
				newname);
			adreno_gpu->fwloc = FW_LOCATION_HELPER;
			return fw;
		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
			dev_err(drm->dev, "failed to load %s: %d\n",
				newname, ret);
			return ERR_PTR(ret);
		}
135 136
	}

137 138
	dev_err(drm->dev, "failed to load %s\n", fwname);
	return ERR_PTR(-ENOENT);
139 140 141 142 143 144
}

static int adreno_load_fw(struct adreno_gpu *adreno_gpu)
{
	const struct firmware *fw;

145 146 147
	if (adreno_gpu->pm4)
		return 0;

148 149 150 151
	fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw);
	if (IS_ERR(fw))
		return PTR_ERR(fw);
	adreno_gpu->pm4 = fw;
152

153 154
	fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw);
	if (IS_ERR(fw)) {
155 156
		release_firmware(adreno_gpu->pm4);
		adreno_gpu->pm4 = NULL;
157
		return PTR_ERR(fw);
158
	}
159
	adreno_gpu->pfp = fw;
160 161 162 163

	return 0;
}

R
Rob Clark 已提交
164 165 166
int adreno_hw_init(struct msm_gpu *gpu)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
167
	int ret, i;
R
Rob Clark 已提交
168 169 170

	DBG("%s", gpu->name);

171 172 173 174
	ret = adreno_load_fw(adreno_gpu);
	if (ret)
		return ret;

175 176
	for (i = 0; i < gpu->nr_rings; i++) {
		struct msm_ringbuffer *ring = gpu->rb[i];
R
Rob Clark 已提交
177

178 179
		if (!ring)
			continue;
180

181 182 183 184 185 186 187 188 189
		ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova);
		if (ret) {
			ring->iova = 0;
			dev_err(gpu->dev->dev,
				"could not map ringbuffer %d: %d\n", i, ret);
			return ret;
		}

		ring->cur = ring->start;
190
		ring->next = ring->start;
191 192 193 194 195

		/* reset completed fence seqno: */
		ring->memptrs->fence = ring->seqno;
		ring->memptrs->rptr = 0;
	}
196

197 198 199 200 201 202
	/*
	 * Setup REG_CP_RB_CNTL.  The same value is used across targets (with
	 * the excpetion of A430 that disables the RPTR shadow) - the cacluation
	 * for the ringbuffer size and block size is moved to msm_gpu.h for the
	 * pre-processor to deal with and the A430 variant is ORed in here
	 */
203
	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
204
		MSM_GPU_RB_CNTL_DEFAULT |
205
		(adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
R
Rob Clark 已提交
206

207
	/* Setup ringbuffer address - use ringbuffer[0] for GPU init */
J
Jordan Crouse 已提交
208
	adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
209
		REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova);
R
Rob Clark 已提交
210

J
Jordan Crouse 已提交
211 212
	if (!adreno_is_a430(adreno_gpu)) {
		adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
213 214
			REG_ADRENO_CP_RB_RPTR_ADDR_HI,
			rbmemptr(gpu->rb[0], rptr));
J
Jordan Crouse 已提交
215
	}
R
Rob Clark 已提交
216 217 218 219 220 221 222 223 224

	return 0;
}

static uint32_t get_wptr(struct msm_ringbuffer *ring)
{
	return ring->cur - ring->start;
}

225
/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
226 227
static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
		struct msm_ringbuffer *ring)
228 229
{
	if (adreno_is_a430(adreno_gpu))
230
		return ring->memptrs->rptr = adreno_gpu_read(
231 232
			adreno_gpu, REG_ADRENO_CP_RB_RPTR);
	else
233 234 235 236 237 238
		return ring->memptrs->rptr;
}

struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
{
	return gpu->rb[0];
R
Rob Clark 已提交
239 240
}

241 242 243 244 245
void adreno_recover(struct msm_gpu *gpu)
{
	struct drm_device *dev = gpu->dev;
	int ret;

R
Rob Clark 已提交
246 247 248
	// XXX pm-runtime??  we *need* the device to be off after this
	// so maybe continuing to call ->pm_suspend/resume() is better?

249 250
	gpu->funcs->pm_suspend(gpu);
	gpu->funcs->pm_resume(gpu);
251

R
Rob Clark 已提交
252
	ret = msm_gpu_hw_init(gpu);
253 254 255 256 257 258
	if (ret) {
		dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
		/* hmm, oh well? */
	}
}

259
void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
R
Rob Clark 已提交
260 261 262 263
		struct msm_file_private *ctx)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	struct msm_drm_private *priv = gpu->dev->dev_private;
264
	struct msm_ringbuffer *ring = submit->ring;
265
	unsigned i;
R
Rob Clark 已提交
266 267 268 269 270 271 272 273 274 275 276

	for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
			/* ignore IB-targets */
			break;
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
			/* ignore if there has not been a ctx switch: */
			if (priv->lastctx == ctx)
				break;
		case MSM_SUBMIT_CMD_BUF:
277 278
			OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
R
Rob Clark 已提交
279 280
			OUT_RING(ring, submit->cmd[i].iova);
			OUT_RING(ring, submit->cmd[i].size);
281
			OUT_PKT2(ring);
R
Rob Clark 已提交
282 283 284 285 286
			break;
		}
	}

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
287
	OUT_RING(ring, submit->seqno);
R
Rob Clark 已提交
288

289
	if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
R
Rob Clark 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302
		/* Flush HLSQ lazy updates to make sure there is nothing
		 * pending for indirect loads after the timestamp has
		 * passed:
		 */
		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
		OUT_RING(ring, HLSQ_FLUSH);

		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
		OUT_RING(ring, 0x00000000);
	}

	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
	OUT_RING(ring, CACHE_FLUSH_TS);
303 304
	OUT_RING(ring, rbmemptr(ring, fence));
	OUT_RING(ring, submit->seqno);
R
Rob Clark 已提交
305 306 307 308 309

	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
	OUT_PKT3(ring, CP_INTERRUPT, 1);
	OUT_RING(ring, 0x80000000);

310 311 312 313 314 315 316 317 318 319 320
	/* Workaround for missing irq issue on 8x16/a306.  Unsure if the
	 * root cause is a platform issue or some a306 quirk, but this
	 * keeps things humming along:
	 */
	if (adreno_is_a306(adreno_gpu)) {
		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
		OUT_RING(ring, 0x00000000);
		OUT_PKT3(ring, CP_INTERRUPT, 1);
		OUT_RING(ring, 0x80000000);
	}

R
Rob Clark 已提交
321 322 323 324 325 326 327 328 329
#if 0
	if (adreno_is_a3xx(adreno_gpu)) {
		/* Dummy set-constant to trigger context rollover */
		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
		OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
		OUT_RING(ring, 0x00000000);
	}
#endif

330
	gpu->funcs->flush(gpu, ring);
R
Rob Clark 已提交
331 332
}

333
void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
R
Rob Clark 已提交
334
{
335
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
336 337
	uint32_t wptr;

338 339 340
	/* Copy the shadow to the actual register */
	ring->cur = ring->next;

341 342 343 344 345
	/*
	 * Mask wptr value that we calculate to fit in the HW range. This is
	 * to account for the possibility that the last command fit exactly into
	 * the ringbuffer and rb->next hasn't wrapped to zero yet
	 */
346
	wptr = (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2);
R
Rob Clark 已提交
347 348 349 350

	/* ensure writes to ringbuffer have hit system memory: */
	mb();

351
	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr);
R
Rob Clark 已提交
352 353
}

354
bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
R
Rob Clark 已提交
355 356
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
357
	uint32_t wptr = get_wptr(ring);
R
Rob Clark 已提交
358

R
Rob Clark 已提交
359
	/* wait for CP to drain ringbuffer: */
360
	if (!spin_until(get_rptr(adreno_gpu, ring) == wptr))
361
		return true;
R
Rob Clark 已提交
362 363

	/* TODO maybe we need to reset GPU here to recover from hang? */
364 365
	DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name,
		ring->id);
366
	return false;
R
Rob Clark 已提交
367 368 369 370 371 372
}

#ifdef CONFIG_DEBUG_FS
void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
373
	int i;
R
Rob Clark 已提交
374 375 376 377 378 379

	seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
			adreno_gpu->info->revn, adreno_gpu->rev.core,
			adreno_gpu->rev.major, adreno_gpu->rev.minor,
			adreno_gpu->rev.patchid);

380 381 382 383 384 385 386 387 388 389
	for (i = 0; i < gpu->nr_rings; i++) {
		struct msm_ringbuffer *ring = gpu->rb[i];

		seq_printf(m, "rb %d: fence:    %d/%d\n", i,
			ring->memptrs->fence, ring->seqno);

		seq_printf(m, "      rptr:     %d\n",
			get_rptr(adreno_gpu, ring));
		seq_printf(m, "rb wptr:  %d\n", get_wptr(ring));
	}
390 391 392 393 394 395 396 397 398 399 400 401 402

	/* dump these out in a form that can be parsed by demsm: */
	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
		uint32_t start = adreno_gpu->registers[i];
		uint32_t end   = adreno_gpu->registers[i+1];
		uint32_t addr;

		for (addr = start; addr <= end; addr++) {
			uint32_t val = gpu_read(gpu, addr);
			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
		}
	}
R
Rob Clark 已提交
403 404 405
}
#endif

406 407 408 409 410 411 412
/* Dump common gpu status and scratch registers on any hang, to make
 * the hangcheck logs more useful.  The scratch registers seem always
 * safe to read when GPU has hung (unlike some other regs, depending
 * on how the GPU hung), and they are useful to match up to cmdstream
 * dumps when debugging hangs:
 */
void adreno_dump_info(struct msm_gpu *gpu)
R
Rob Clark 已提交
413 414
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
415
	int i;
R
Rob Clark 已提交
416 417 418 419 420 421

	printk("revision: %d (%d.%d.%d.%d)\n",
			adreno_gpu->info->revn, adreno_gpu->rev.core,
			adreno_gpu->rev.major, adreno_gpu->rev.minor,
			adreno_gpu->rev.patchid);

422 423 424 425 426 427 428 429 430 431
	for (i = 0; i < gpu->nr_rings; i++) {
		struct msm_ringbuffer *ring = gpu->rb[i];

		printk("rb %d: fence:    %d/%d\n", i,
			ring->memptrs->fence,
			ring->seqno);

		printk("rptr:     %d\n", get_rptr(adreno_gpu, ring));
		printk("rb wptr:  %d\n", get_wptr(ring));
	}
432 433 434 435 436 437 438 439
}

/* would be nice to not have to duplicate the _show() stuff with printk(): */
void adreno_dump(struct msm_gpu *gpu)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	int i;

440 441 442 443 444 445 446 447 448 449 450 451
	/* dump these out in a form that can be parsed by demsm: */
	printk("IO:region %s 00000000 00020000\n", gpu->name);
	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
		uint32_t start = adreno_gpu->registers[i];
		uint32_t end   = adreno_gpu->registers[i+1];
		uint32_t addr;

		for (addr = start; addr <= end; addr++) {
			uint32_t val = gpu_read(gpu, addr);
			printk("IO:R %08x %08x\n", addr<<2, val);
		}
	}
R
Rob Clark 已提交
452 453
}

454
static uint32_t ring_freewords(struct msm_ringbuffer *ring)
R
Rob Clark 已提交
455
{
456 457
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
	uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
458 459
	/* Use ring->next to calculate free size */
	uint32_t wptr = ring->next - ring->start;
460
	uint32_t rptr = get_rptr(adreno_gpu, ring);
R
Rob Clark 已提交
461 462 463
	return (rptr + (size - 1) - wptr) % size;
}

464
void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
R
Rob Clark 已提交
465
{
466 467 468 469
	if (spin_until(ring_freewords(ring) >= ndwords))
		DRM_DEV_ERROR(ring->gpu->dev->dev,
			"timeout waiting for space in ringubffer %d\n",
			ring->id);
R
Rob Clark 已提交
470 471 472
}

int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
473 474
		struct adreno_gpu *adreno_gpu,
		const struct adreno_gpu_funcs *funcs, int nr_rings)
R
Rob Clark 已提交
475
{
476
	struct adreno_platform_config *config = pdev->dev.platform_data;
477
	struct msm_gpu_config adreno_gpu_config  = { 0 };
478
	struct msm_gpu *gpu = &adreno_gpu->base;
R
Rob Clark 已提交
479

480 481 482 483 484 485 486 487
	adreno_gpu->funcs = funcs;
	adreno_gpu->info = adreno_info(config->rev);
	adreno_gpu->gmem = adreno_gpu->info->gmem;
	adreno_gpu->revn = adreno_gpu->info->revn;
	adreno_gpu->rev = config->rev;

	gpu->fast_rate = config->fast_rate;
	gpu->bus_freq  = config->bus_freq;
488
#ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
489 490 491
	gpu->bus_scale_table = config->bus_scale_table;
#endif

492 493
	DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u",
			gpu->fast_rate, gpu->bus_freq);
R
Rob Clark 已提交
494

495 496 497 498 499 500
	adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
	adreno_gpu_config.irqname = "kgsl_3d0_irq";

	adreno_gpu_config.va_start = SZ_16M;
	adreno_gpu_config.va_end = 0xffffffff;

501
	adreno_gpu_config.nr_rings = nr_rings;
502

503 504 505 506
	pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
	pm_runtime_use_autosuspend(&pdev->dev);
	pm_runtime_enable(&pdev->dev);

J
Jordan Crouse 已提交
507
	return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
508
			adreno_gpu->info->name, &adreno_gpu_config);
R
Rob Clark 已提交
509 510
}

511
void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
R
Rob Clark 已提交
512
{
513 514
	release_firmware(adreno_gpu->pm4);
	release_firmware(adreno_gpu->pfp);
515

J
Jordan Crouse 已提交
516
	msm_gpu_cleanup(&adreno_gpu->base);
R
Rob Clark 已提交
517
}