msm_gpu.c 23.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
R
Rob Clark 已提交
2 3 4 5 6 7 8
/*
 * Copyright (C) 2013 Red Hat
 * Author: Rob Clark <robdclark@gmail.com>
 */

#include "msm_gpu.h"
#include "msm_gem.h"
9
#include "msm_mmu.h"
10
#include "msm_fence.h"
11
#include "msm_gpu_trace.h"
J
Jonathan Marek 已提交
12
#include "adreno/adreno_gpu.h"
R
Rob Clark 已提交
13

14
#include <generated/utsrelease.h>
15
#include <linux/string_helpers.h>
16 17
#include <linux/pm_opp.h>
#include <linux/devfreq.h>
18
#include <linux/devcoredump.h>
19
#include <linux/sched/task.h>
R
Rob Clark 已提交
20 21 22 23 24

/*
 * Power Management:
 */

25 26 27 28 29 30 31 32 33 34 35
static int msm_devfreq_target(struct device *dev, unsigned long *freq,
		u32 flags)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	struct dev_pm_opp *opp;

	opp = devfreq_recommended_opp(dev, freq, flags);

	if (IS_ERR(opp))
		return PTR_ERR(opp);

S
Sharat Masetty 已提交
36 37 38 39 40
	if (gpu->funcs->gpu_set_freq)
		gpu->funcs->gpu_set_freq(gpu, (u64)*freq);
	else
		clk_set_rate(gpu->core_clk, *freq);

41 42 43 44 45 46 47 48 49 50 51
	dev_pm_opp_put(opp);

	return 0;
}

static int msm_devfreq_get_dev_status(struct device *dev,
		struct devfreq_dev_status *status)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	ktime_t time;

S
Sharat Masetty 已提交
52 53 54 55
	if (gpu->funcs->gpu_get_freq)
		status->current_frequency = gpu->funcs->gpu_get_freq(gpu);
	else
		status->current_frequency = clk_get_rate(gpu->core_clk);
56

S
Sharat Masetty 已提交
57
	status->busy_time = gpu->funcs->gpu_busy(gpu);
58 59 60 61 62 63 64 65 66 67 68 69

	time = ktime_get();
	status->total_time = ktime_us_delta(time, gpu->devfreq.time);
	gpu->devfreq.time = time;

	return 0;
}

static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));

S
Sharat Masetty 已提交
70 71 72 73
	if (gpu->funcs->gpu_get_freq)
		*freq = gpu->funcs->gpu_get_freq(gpu);
	else
		*freq = clk_get_rate(gpu->core_clk);
74 75 76 77 78 79 80 81 82 83 84 85 86 87

	return 0;
}

static struct devfreq_dev_profile msm_devfreq_profile = {
	.polling_ms = 10,
	.target = msm_devfreq_target,
	.get_dev_status = msm_devfreq_get_dev_status,
	.get_cur_freq = msm_devfreq_get_cur_freq,
};

static void msm_devfreq_init(struct msm_gpu *gpu)
{
	/* We need target support to do devfreq */
S
Sharat Masetty 已提交
88
	if (!gpu->funcs->gpu_busy)
89 90 91 92 93 94 95 96 97 98
		return;

	msm_devfreq_profile.initial_freq = gpu->fast_rate;

	/*
	 * Don't set the freq_table or max_state and let devfreq build the table
	 * from OPP
	 */

	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
99 100
			&msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND,
			NULL);
101 102

	if (IS_ERR(gpu->devfreq.devfreq)) {
103
		DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
104 105
		gpu->devfreq.devfreq = NULL;
	}
S
Sharat Masetty 已提交
106 107

	devfreq_suspend_device(gpu->devfreq.devfreq);
108 109
}

R
Rob Clark 已提交
110 111 112 113 114 115 116 117
static int enable_pwrrail(struct msm_gpu *gpu)
{
	struct drm_device *dev = gpu->dev;
	int ret = 0;

	if (gpu->gpu_reg) {
		ret = regulator_enable(gpu->gpu_reg);
		if (ret) {
118
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
R
Rob Clark 已提交
119 120 121 122 123 124 125
			return ret;
		}
	}

	if (gpu->gpu_cx) {
		ret = regulator_enable(gpu->gpu_cx);
		if (ret) {
126
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
R
Rob Clark 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
			return ret;
		}
	}

	return 0;
}

static int disable_pwrrail(struct msm_gpu *gpu)
{
	if (gpu->gpu_cx)
		regulator_disable(gpu->gpu_cx);
	if (gpu->gpu_reg)
		regulator_disable(gpu->gpu_reg);
	return 0;
}

static int enable_clk(struct msm_gpu *gpu)
{
145 146
	if (gpu->core_clk && gpu->fast_rate)
		clk_set_rate(gpu->core_clk, gpu->fast_rate);
R
Rob Clark 已提交
147

148
	/* Set the RBBM timer rate to 19.2Mhz */
149 150
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 19200000);
151

152
	return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
153 154 155 156
}

static int disable_clk(struct msm_gpu *gpu)
{
157
	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
158

159 160 161 162 163
	/*
	 * Set the clock to a deliberately low rate. On older targets the clock
	 * speed had to be non zero to avoid problems. On newer targets this
	 * will be rounded down to zero anyway so it all works out.
	 */
164 165
	if (gpu->core_clk)
		clk_set_rate(gpu->core_clk, 27000000);
166

167 168
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 0);
169

R
Rob Clark 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
	return 0;
}

static int enable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_prepare_enable(gpu->ebi1_clk);
	return 0;
}

static int disable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_disable_unprepare(gpu->ebi1_clk);
	return 0;
}

S
Sharat Masetty 已提交
187 188 189 190 191 192 193 194
void msm_gpu_resume_devfreq(struct msm_gpu *gpu)
{
	gpu->devfreq.busy_cycles = 0;
	gpu->devfreq.time = ktime_get();

	devfreq_resume_device(gpu->devfreq.devfreq);
}

R
Rob Clark 已提交
195 196 197 198
int msm_gpu_pm_resume(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
199
	DBG("%s", gpu->name);
R
Rob Clark 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212

	ret = enable_pwrrail(gpu);
	if (ret)
		return ret;

	ret = enable_clk(gpu);
	if (ret)
		return ret;

	ret = enable_axi(gpu);
	if (ret)
		return ret;

S
Sharat Masetty 已提交
213
	msm_gpu_resume_devfreq(gpu);
214

R
Rob Clark 已提交
215 216
	gpu->needs_hw_init = true;

R
Rob Clark 已提交
217 218 219 220 221 222 223
	return 0;
}

int msm_gpu_pm_suspend(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
224
	DBG("%s", gpu->name);
R
Rob Clark 已提交
225

S
Sharat Masetty 已提交
226
	devfreq_suspend_device(gpu->devfreq.devfreq);
227

R
Rob Clark 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
	ret = disable_axi(gpu);
	if (ret)
		return ret;

	ret = disable_clk(gpu);
	if (ret)
		return ret;

	ret = disable_pwrrail(gpu);
	if (ret)
		return ret;

	return 0;
}

R
Rob Clark 已提交
243
int msm_gpu_hw_init(struct msm_gpu *gpu)
244
{
R
Rob Clark 已提交
245
	int ret;
246

247 248
	WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));

R
Rob Clark 已提交
249 250
	if (!gpu->needs_hw_init)
		return 0;
251

R
Rob Clark 已提交
252 253 254 255 256
	disable_irq(gpu->irq);
	ret = gpu->funcs->hw_init(gpu);
	if (!ret)
		gpu->needs_hw_init = false;
	enable_irq(gpu->irq);
257

R
Rob Clark 已提交
258
	return ret;
259 260
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
#ifdef CONFIG_DEV_COREDUMP
static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
		size_t count, void *data, size_t datalen)
{
	struct msm_gpu *gpu = data;
	struct drm_print_iterator iter;
	struct drm_printer p;
	struct msm_gpu_state *state;

	state = msm_gpu_crashstate_get(gpu);
	if (!state)
		return 0;

	iter.data = buffer;
	iter.offset = 0;
	iter.start = offset;
	iter.remain = count;

	p = drm_coredump_printer(&iter);

	drm_printf(&p, "---\n");
	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
284 285
	drm_printf(&p, "time: %lld.%09ld\n",
		state->time.tv_sec, state->time.tv_nsec);
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
	if (state->comm)
		drm_printf(&p, "comm: %s\n", state->comm);
	if (state->cmd)
		drm_printf(&p, "cmdline: %s\n", state->cmd);

	gpu->funcs->show(gpu, state, &p);

	msm_gpu_crashstate_put(gpu);

	return count - iter.remain;
}

static void msm_gpu_devcoredump_free(void *data)
{
	struct msm_gpu *gpu = data;

	msm_gpu_crashstate_put(gpu);
}

305 306 307 308 309 310 311 312 313
static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
		struct msm_gem_object *obj, u64 iova, u32 flags)
{
	struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos];

	/* Don't record write only objects */
	state_bo->size = obj->base.size;
	state_bo->iova = iova;

314 315
	/* Only store data for non imported buffer objects marked for read */
	if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) {
316 317 318 319
		void *ptr;

		state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL);
		if (!state_bo->data)
320
			goto out;
321 322 323 324

		ptr = msm_gem_get_vaddr_active(&obj->base);
		if (IS_ERR(ptr)) {
			kvfree(state_bo->data);
325 326
			state_bo->data = NULL;
			goto out;
327 328 329 330 331
		}

		memcpy(state_bo->data, ptr, obj->base.size);
		msm_gem_put_vaddr(&obj->base);
	}
332
out:
333 334 335 336 337
	state->nr_bos++;
}

static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
338 339 340
{
	struct msm_gpu_state *state;

341 342 343 344
	/* Check if the target supports capturing crash state */
	if (!gpu->funcs->gpu_state_get)
		return;

345 346 347 348 349 350 351 352 353 354 355 356
	/* Only save one crash state at a time */
	if (gpu->crashstate)
		return;

	state = gpu->funcs->gpu_state_get(gpu);
	if (IS_ERR_OR_NULL(state))
		return;

	/* Fill in the additional crash state information */
	state->comm = kstrdup(comm, GFP_KERNEL);
	state->cmd = kstrdup(cmd, GFP_KERNEL);

357
	if (submit) {
358 359 360 361 362 363 364 365 366 367 368 369
		int i, nr = 0;

		/* count # of buffers to dump: */
		for (i = 0; i < submit->nr_bos; i++)
			if (should_dump(submit, i))
				nr++;
		/* always dump cmd bo's, but don't double count them: */
		for (i = 0; i < submit->nr_cmds; i++)
			if (!should_dump(submit, submit->cmd[i].idx))
				nr++;

		state->bos = kcalloc(nr,
370 371
			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);

372 373 374 375 376 377 378
		for (i = 0; i < submit->nr_bos; i++) {
			if (should_dump(submit, i)) {
				msm_gpu_crashstate_get_bo(state, submit->bos[i].obj,
					submit->bos[i].iova, submit->bos[i].flags);
			}
		}

379 380 381
		for (i = 0; state->bos && i < submit->nr_cmds; i++) {
			int idx = submit->cmd[i].idx;

382 383 384 385
			if (!should_dump(submit, submit->cmd[i].idx)) {
				msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj,
					submit->bos[idx].iova, submit->bos[idx].flags);
			}
386
		}
387 388
	}

389 390 391 392 393 394 395 396
	/* Set the active crash state to be dumped on failure */
	gpu->crashstate = state;

	/* FIXME: Release the crashstate if this errors out? */
	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
		msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
}
#else
397 398
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
399 400 401 402
{
}
#endif

403 404 405 406
/*
 * Hangcheck detection for locked gpu:
 */

407 408 409 410 411 412 413 414 415 416 417 418 419 420
static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		uint32_t fence)
{
	struct msm_gem_submit *submit;

	list_for_each_entry(submit, &ring->submits, node) {
		if (submit->seqno > fence)
			break;

		msm_update_fence(submit->ring->fctx,
			submit->fence->seqno);
	}
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434
static struct msm_gem_submit *
find_submit(struct msm_ringbuffer *ring, uint32_t fence)
{
	struct msm_gem_submit *submit;

	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));

	list_for_each_entry(submit, &ring->submits, node)
		if (submit->seqno == fence)
			return submit;

	return NULL;
}

R
Rob Clark 已提交
435
static void retire_submits(struct msm_gpu *gpu);
436

437 438 439 440
static void recover_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
	struct drm_device *dev = gpu->dev;
441
	struct msm_drm_private *priv = dev->dev_private;
442
	struct msm_gem_submit *submit;
443
	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
444
	char *comm = NULL, *cmd = NULL;
445 446
	int i;

447
	mutex_lock(&dev->struct_mutex);
448

449
	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
450

451
	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
452 453 454
	if (submit) {
		struct task_struct *task;

455
		/* Increment the fault counts */
456
		gpu->global_faults++;
457
		submit->queue->faults++;
458

459
		task = get_pid_task(submit->pid, PIDTYPE_PID);
460
		if (task) {
461 462 463
			comm = kstrdup(task->comm, GFP_KERNEL);
			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
			put_task_struct(task);
464
		}
465

466
		if (comm && cmd) {
467
			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
468
				gpu->name, comm, cmd);
469 470

			msm_rd_dump_submit(priv->hangrd, submit,
471 472
				"offending task: %s (%s)", comm, cmd);
		} else
473 474 475
			msm_rd_dump_submit(priv->hangrd, submit, NULL);
	}

476 477
	/* Record the crash state */
	pm_runtime_get_sync(&gpu->pdev->dev);
478
	msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
479 480
	pm_runtime_put_sync(&gpu->pdev->dev);

481 482
	kfree(cmd);
	kfree(comm);
483 484 485 486 487 488

	/*
	 * Update all the rings with the latest and greatest fence.. this
	 * needs to happen after msm_rd_dump_submit() to ensure that the
	 * bo's referenced by the offending submit are still around.
	 */
489
	for (i = 0; i < gpu->nr_rings; i++) {
490 491 492
		struct msm_ringbuffer *ring = gpu->rb[i];

		uint32_t fence = ring->memptrs->fence;
493

494 495 496 497 498 499 500 501
		/*
		 * For the current (faulting?) ring/submit advance the fence by
		 * one more to clear the faulting submit
		 */
		if (ring == cur_ring)
			fence++;

		update_fences(gpu, ring, fence);
502 503 504
	}

	if (msm_gpu_active(gpu)) {
505
		/* retire completed submits, plus the one that hung: */
R
Rob Clark 已提交
506
		retire_submits(gpu);
507

R
Rob Clark 已提交
508
		pm_runtime_get_sync(&gpu->pdev->dev);
509
		gpu->funcs->recover(gpu);
R
Rob Clark 已提交
510
		pm_runtime_put_sync(&gpu->pdev->dev);
511

512 513 514 515
		/*
		 * Replay all remaining submits starting with highest priority
		 * ring
		 */
516
		for (i = 0; i < gpu->nr_rings; i++) {
517 518 519 520
			struct msm_ringbuffer *ring = gpu->rb[i];

			list_for_each_entry(submit, &ring->submits, node)
				gpu->funcs->submit(gpu, submit, NULL);
521
		}
522
	}
523

524 525 526 527 528 529 530 531 532 533 534 535
	mutex_unlock(&dev->struct_mutex);

	msm_gpu_retire(gpu);
}

static void hangcheck_timer_reset(struct msm_gpu *gpu)
{
	DBG("%s", gpu->name);
	mod_timer(&gpu->hangcheck_timer,
			round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
}

536
static void hangcheck_handler(struct timer_list *t)
537
{
538
	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
R
Rob Clark 已提交
539 540
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
541 542
	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
	uint32_t fence = ring->memptrs->fence;
543

544
	if (fence != ring->hangcheck_fence) {
545
		/* some progress has been made.. ya! */
546 547
		ring->hangcheck_fence = fence;
	} else if (fence < ring->seqno) {
548
		/* no progress and not done.. hung! */
549
		ring->hangcheck_fence = fence;
550
		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
551
				gpu->name, ring->id);
552
		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
R
Rob Clark 已提交
553
				gpu->name, fence);
554
		DRM_DEV_ERROR(dev->dev, "%s:     submitted fence: %u\n",
555 556
				gpu->name, ring->seqno);

557 558 559 560
		queue_work(priv->wq, &gpu->recover_work);
	}

	/* if still more pending work, reset the hangcheck timer: */
561
	if (ring->seqno > ring->hangcheck_fence)
562
		hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
563 564 565

	/* workaround for missing irq: */
	queue_work(priv->wq, &gpu->retire_work);
566 567
}

R
Rob Clark 已提交
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
/*
 * Performance Counters:
 */

/* called under perf_lock */
static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
{
	uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
	int i, n = min(ncntrs, gpu->num_perfcntrs);

	/* read current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);

	/* update cntrs: */
	for (i = 0; i < n; i++)
		cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];

	/* save current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		gpu->last_cntrs[i] = current_cntrs[i];

	return n;
}

static void update_sw_cntrs(struct msm_gpu *gpu)
{
	ktime_t time;
	uint32_t elapsed;
	unsigned long flags;

	spin_lock_irqsave(&gpu->perf_lock, flags);
	if (!gpu->perfcntr_active)
		goto out;

	time = ktime_get();
	elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));

	gpu->totaltime += elapsed;
	if (gpu->last_sample.active)
		gpu->activetime += elapsed;

	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = time;

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
{
	unsigned long flags;

R
Rob Clark 已提交
621 622
	pm_runtime_get_sync(&gpu->pdev->dev);

R
Rob Clark 已提交
623 624 625 626 627 628 629 630 631 632 633 634 635
	spin_lock_irqsave(&gpu->perf_lock, flags);
	/* we could dynamically enable/disable perfcntr registers too.. */
	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = ktime_get();
	gpu->activetime = gpu->totaltime = 0;
	gpu->perfcntr_active = true;
	update_hw_cntrs(gpu, 0, NULL);
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
{
	gpu->perfcntr_active = false;
R
Rob Clark 已提交
636
	pm_runtime_put_sync(&gpu->pdev->dev);
R
Rob Clark 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
}

/* returns -errno or # of cntrs sampled */
int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&gpu->perf_lock, flags);

	if (!gpu->perfcntr_active) {
		ret = -EINVAL;
		goto out;
	}

	*activetime = gpu->activetime;
	*totaltime = gpu->totaltime;

	gpu->activetime = gpu->totaltime = 0;

	ret = update_hw_cntrs(gpu, ncntrs, cntrs);

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);

	return ret;
}

R
Rob Clark 已提交
666 667 668 669
/*
 * Cmdstream submission/retirement:
 */

670 671
static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		struct msm_gem_submit *submit)
672
{
673 674 675
	int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
	volatile struct msm_gpu_submit_stats *stats;
	u64 elapsed, clock = 0;
676 677
	int i;

678 679 680 681 682 683 684 685 686 687 688 689 690 691
	stats = &ring->memptrs->stats[index];
	/* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
	elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000;
	do_div(elapsed, 192);

	/* Calculate the clock frequency from the number of CP cycles */
	if (elapsed) {
		clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
		do_div(clock, elapsed);
	}

	trace_msm_gpu_submit_retired(submit, elapsed, clock,
		stats->alwayson_start, stats->alwayson_end);

692 693 694 695
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
		/* move to inactive: */
		msm_gem_move_to_inactive(&msm_obj->base);
696
		msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
697
		drm_gem_object_put_locked(&msm_obj->base);
698 699
	}

R
Rob Clark 已提交
700 701
	pm_runtime_mark_last_busy(&gpu->pdev->dev);
	pm_runtime_put_autosuspend(&gpu->pdev->dev);
702
	msm_gem_submit_free(submit);
703 704
}

R
Rob Clark 已提交
705
static void retire_submits(struct msm_gpu *gpu)
706 707
{
	struct drm_device *dev = gpu->dev;
708 709
	struct msm_gem_submit *submit, *tmp;
	int i;
710 711 712

	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

713
	/* Retire the commits starting with highest priority */
714
	for (i = 0; i < gpu->nr_rings; i++) {
715
		struct msm_ringbuffer *ring = gpu->rb[i];
716

717 718
		list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
			if (dma_fence_is_signaled(submit->fence))
719
				retire_submit(gpu, ring, submit);
720 721 722 723
		}
	}
}

R
Rob Clark 已提交
724 725 726 727
static void retire_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
	struct drm_device *dev = gpu->dev;
728
	int i;
R
Rob Clark 已提交
729

730 731
	for (i = 0; i < gpu->nr_rings; i++)
		update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
R
Rob Clark 已提交
732

R
Rob Clark 已提交
733
	mutex_lock(&dev->struct_mutex);
R
Rob Clark 已提交
734
	retire_submits(gpu);
R
Rob Clark 已提交
735 736 737 738 739 740 741 742
	mutex_unlock(&dev->struct_mutex);
}

/* call from irq handler to schedule work to retire bo's */
void msm_gpu_retire(struct msm_gpu *gpu)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	queue_work(priv->wq, &gpu->retire_work);
R
Rob Clark 已提交
743
	update_sw_cntrs(gpu);
R
Rob Clark 已提交
744 745 746
}

/* add bo's to gpu's ring, and kick gpu: */
747
void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
R
Rob Clark 已提交
748 749 750 751
		struct msm_file_private *ctx)
{
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
752
	struct msm_ringbuffer *ring = submit->ring;
753
	int i;
R
Rob Clark 已提交
754

755 756
	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

R
Rob Clark 已提交
757 758 759
	pm_runtime_get_sync(&gpu->pdev->dev);

	msm_gpu_hw_init(gpu);
760

761 762 763
	submit->seqno = ++ring->seqno;

	list_add_tail(&submit->node, &ring->submits);
764

765
	msm_rd_dump_submit(priv->rd, submit, NULL);
R
Rob Clark 已提交
766

R
Rob Clark 已提交
767 768
	update_sw_cntrs(gpu);

R
Rob Clark 已提交
769 770
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
R
Rob Clark 已提交
771
		uint64_t iova;
R
Rob Clark 已提交
772 773 774 775 776 777

		/* can't happen yet.. but when we add 2d support we'll have
		 * to deal w/ cross-ring synchronization:
		 */
		WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));

778
		/* submit takes a reference to the bo and iova until retired: */
779
		drm_gem_object_get(&msm_obj->base);
780
		msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova);
R
Rob Clark 已提交
781

R
Rob Clark 已提交
782 783
		if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
			msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
R
Rob Clark 已提交
784 785
		else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
			msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
R
Rob Clark 已提交
786
	}
787

788
	gpu->funcs->submit(gpu, submit, ctx);
789 790
	priv->lastctx = ctx;

791
	hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
792 793 794 795 796 797 798 799 800 801 802 803
}

/*
 * Init/Cleanup:
 */

static irqreturn_t irq_handler(int irq, void *data)
{
	struct msm_gpu *gpu = data;
	return gpu->funcs->irq(gpu);
}

804 805
static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
{
806
	int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks);
807

808
	if (ret < 1) {
809
		gpu->nr_clocks = 0;
810
		return ret;
811
	}
812

813
	gpu->nr_clocks = ret;
814

815 816
	gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "core");
817

818 819
	gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "rbbmtimer");
820 821 822

	return 0;
}
R
Rob Clark 已提交
823

824 825 826 827 828 829 830 831 832 833 834 835
static struct msm_gem_address_space *
msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
		uint64_t va_start, uint64_t va_end)
{
	struct msm_gem_address_space *aspace;
	int ret;

	/*
	 * Setup IOMMU.. eventually we will (I think) do this once per context
	 * and have separate page tables per context.  For now, to keep things
	 * simple and to get something working, just use a single address space:
	 */
J
Jonathan Marek 已提交
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
	if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
		struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
		if (!iommu)
			return NULL;

		iommu->geometry.aperture_start = va_start;
		iommu->geometry.aperture_end = va_end;

		DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);

		aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
		if (IS_ERR(aspace))
			iommu_domain_free(iommu);
	} else {
		aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
			va_start, va_end);
	}
853 854

	if (IS_ERR(aspace)) {
J
Jonathan Marek 已提交
855
		DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
856 857 858 859
			PTR_ERR(aspace));
		return ERR_CAST(aspace);
	}

860
	ret = aspace->mmu->funcs->attach(aspace->mmu);
861 862 863 864 865 866 867 868
	if (ret) {
		msm_gem_address_space_put(aspace);
		return ERR_PTR(ret);
	}

	return aspace;
}

R
Rob Clark 已提交
869 870
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
871
		const char *name, struct msm_gpu_config *config)
R
Rob Clark 已提交
872
{
873 874 875
	int i, ret, nr_rings = config->nr_rings;
	void *memptrs;
	uint64_t memptrs_iova;
R
Rob Clark 已提交
876

R
Rob Clark 已提交
877 878 879
	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);

R
Rob Clark 已提交
880 881 882 883 884 885
	gpu->dev = drm;
	gpu->funcs = funcs;
	gpu->name = name;

	INIT_LIST_HEAD(&gpu->active_list);
	INIT_WORK(&gpu->retire_work, retire_worker);
886 887
	INIT_WORK(&gpu->recover_work, recover_worker);

888

889
	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
R
Rob Clark 已提交
890

R
Rob Clark 已提交
891 892
	spin_lock_init(&gpu->perf_lock);

R
Rob Clark 已提交
893 894

	/* Map registers: */
895
	gpu->mmio = msm_ioremap(pdev, config->ioname, name);
R
Rob Clark 已提交
896 897 898 899 900 901
	if (IS_ERR(gpu->mmio)) {
		ret = PTR_ERR(gpu->mmio);
		goto fail;
	}

	/* Get Interrupt: */
902
	gpu->irq = platform_get_irq(pdev, 0);
R
Rob Clark 已提交
903 904
	if (gpu->irq < 0) {
		ret = gpu->irq;
905
		DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret);
R
Rob Clark 已提交
906 907 908 909 910 911
		goto fail;
	}

	ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
			IRQF_TRIGGER_HIGH, gpu->name, gpu);
	if (ret) {
912
		DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
R
Rob Clark 已提交
913 914 915
		goto fail;
	}

916 917 918
	ret = get_clocks(pdev, gpu);
	if (ret)
		goto fail;
R
Rob Clark 已提交
919

920
	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
R
Rob Clark 已提交
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
	DBG("ebi1_clk: %p", gpu->ebi1_clk);
	if (IS_ERR(gpu->ebi1_clk))
		gpu->ebi1_clk = NULL;

	/* Acquire regulators: */
	gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
	DBG("gpu_reg: %p", gpu->gpu_reg);
	if (IS_ERR(gpu->gpu_reg))
		gpu->gpu_reg = NULL;

	gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
	DBG("gpu_cx: %p", gpu->gpu_cx);
	if (IS_ERR(gpu->gpu_cx))
		gpu->gpu_cx = NULL;

936 937 938
	gpu->pdev = pdev;
	platform_set_drvdata(pdev, gpu);

939 940
	msm_devfreq_init(gpu);

941 942 943 944
	gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
		config->va_start, config->va_end);

	if (gpu->aspace == NULL)
945
		DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
946 947 948
	else if (IS_ERR(gpu->aspace)) {
		ret = PTR_ERR(gpu->aspace);
		goto fail;
R
Rob Clark 已提交
949
	}
950

951 952
	memptrs = msm_gem_kernel_new(drm,
		sizeof(struct msm_rbmemptrs) * nr_rings,
J
Jordan Crouse 已提交
953
		MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
954
		&memptrs_iova);
J
Jordan Crouse 已提交
955

956 957
	if (IS_ERR(memptrs)) {
		ret = PTR_ERR(memptrs);
958
		DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret);
J
Jordan Crouse 已提交
959 960 961
		goto fail;
	}

962 963
	msm_gem_object_set_name(gpu->memptrs_bo, "memptrs");

964
	if (nr_rings > ARRAY_SIZE(gpu->rb)) {
965
		DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
966 967
			ARRAY_SIZE(gpu->rb));
		nr_rings = ARRAY_SIZE(gpu->rb);
R
Rob Clark 已提交
968 969
	}

970 971 972 973 974 975
	/* Create ringbuffer(s): */
	for (i = 0; i < nr_rings; i++) {
		gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);

		if (IS_ERR(gpu->rb[i])) {
			ret = PTR_ERR(gpu->rb[i]);
976
			DRM_DEV_ERROR(drm->dev,
977 978 979 980 981 982 983 984 985 986
				"could not create ringbuffer %d: %d\n", i, ret);
			goto fail;
		}

		memptrs += sizeof(struct msm_rbmemptrs);
		memptrs_iova += sizeof(struct msm_rbmemptrs);
	}

	gpu->nr_rings = nr_rings;

R
Rob Clark 已提交
987 988 989
	return 0;

fail:
990 991 992 993 994
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++)  {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
	}

995
	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
J
Jordan Crouse 已提交
996

997
	platform_set_drvdata(pdev, NULL);
R
Rob Clark 已提交
998 999 1000 1001 1002
	return ret;
}

void msm_gpu_cleanup(struct msm_gpu *gpu)
{
1003 1004
	int i;

R
Rob Clark 已提交
1005 1006 1007 1008
	DBG("%s", gpu->name);

	WARN_ON(!list_empty(&gpu->active_list));

1009 1010 1011
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
R
Rob Clark 已提交
1012
	}
J
Jordan Crouse 已提交
1013

1014
	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
J
Jordan Crouse 已提交
1015 1016

	if (!IS_ERR_OR_NULL(gpu->aspace)) {
1017
		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu);
1018 1019
		msm_gem_address_space_put(gpu->aspace);
	}
R
Rob Clark 已提交
1020
}