msm_gpu.c 24.0 KB
Newer Older
R
Rob Clark 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (C) 2013 Red Hat
 * Author: Rob Clark <robdclark@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "msm_gpu.h"
#include "msm_gem.h"
20
#include "msm_mmu.h"
21
#include "msm_fence.h"
22
#include "msm_gpu_trace.h"
J
Jonathan Marek 已提交
23
#include "adreno/adreno_gpu.h"
R
Rob Clark 已提交
24

25
#include <generated/utsrelease.h>
26
#include <linux/string_helpers.h>
27 28
#include <linux/pm_opp.h>
#include <linux/devfreq.h>
29
#include <linux/devcoredump.h>
R
Rob Clark 已提交
30 31 32 33 34

/*
 * Power Management:
 */

35 36 37 38 39 40 41 42 43 44 45
static int msm_devfreq_target(struct device *dev, unsigned long *freq,
		u32 flags)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	struct dev_pm_opp *opp;

	opp = devfreq_recommended_opp(dev, freq, flags);

	if (IS_ERR(opp))
		return PTR_ERR(opp);

S
Sharat Masetty 已提交
46 47 48 49 50
	if (gpu->funcs->gpu_set_freq)
		gpu->funcs->gpu_set_freq(gpu, (u64)*freq);
	else
		clk_set_rate(gpu->core_clk, *freq);

51 52 53 54 55 56 57 58 59 60 61
	dev_pm_opp_put(opp);

	return 0;
}

static int msm_devfreq_get_dev_status(struct device *dev,
		struct devfreq_dev_status *status)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	ktime_t time;

S
Sharat Masetty 已提交
62 63 64 65
	if (gpu->funcs->gpu_get_freq)
		status->current_frequency = gpu->funcs->gpu_get_freq(gpu);
	else
		status->current_frequency = clk_get_rate(gpu->core_clk);
66

S
Sharat Masetty 已提交
67
	status->busy_time = gpu->funcs->gpu_busy(gpu);
68 69 70 71 72 73 74 75 76 77 78 79

	time = ktime_get();
	status->total_time = ktime_us_delta(time, gpu->devfreq.time);
	gpu->devfreq.time = time;

	return 0;
}

static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));

S
Sharat Masetty 已提交
80 81 82 83
	if (gpu->funcs->gpu_get_freq)
		*freq = gpu->funcs->gpu_get_freq(gpu);
	else
		*freq = clk_get_rate(gpu->core_clk);
84 85 86 87 88 89 90 91 92 93 94 95 96 97

	return 0;
}

static struct devfreq_dev_profile msm_devfreq_profile = {
	.polling_ms = 10,
	.target = msm_devfreq_target,
	.get_dev_status = msm_devfreq_get_dev_status,
	.get_cur_freq = msm_devfreq_get_cur_freq,
};

static void msm_devfreq_init(struct msm_gpu *gpu)
{
	/* We need target support to do devfreq */
S
Sharat Masetty 已提交
98
	if (!gpu->funcs->gpu_busy)
99 100 101 102 103 104 105 106 107 108 109 110 111
		return;

	msm_devfreq_profile.initial_freq = gpu->fast_rate;

	/*
	 * Don't set the freq_table or max_state and let devfreq build the table
	 * from OPP
	 */

	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
			&msm_devfreq_profile, "simple_ondemand", NULL);

	if (IS_ERR(gpu->devfreq.devfreq)) {
112
		DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
113 114
		gpu->devfreq.devfreq = NULL;
	}
S
Sharat Masetty 已提交
115 116

	devfreq_suspend_device(gpu->devfreq.devfreq);
117 118
}

R
Rob Clark 已提交
119 120 121 122 123 124 125 126
static int enable_pwrrail(struct msm_gpu *gpu)
{
	struct drm_device *dev = gpu->dev;
	int ret = 0;

	if (gpu->gpu_reg) {
		ret = regulator_enable(gpu->gpu_reg);
		if (ret) {
127
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
R
Rob Clark 已提交
128 129 130 131 132 133 134
			return ret;
		}
	}

	if (gpu->gpu_cx) {
		ret = regulator_enable(gpu->gpu_cx);
		if (ret) {
135
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
R
Rob Clark 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
			return ret;
		}
	}

	return 0;
}

static int disable_pwrrail(struct msm_gpu *gpu)
{
	if (gpu->gpu_cx)
		regulator_disable(gpu->gpu_cx);
	if (gpu->gpu_reg)
		regulator_disable(gpu->gpu_reg);
	return 0;
}

static int enable_clk(struct msm_gpu *gpu)
{
154 155
	if (gpu->core_clk && gpu->fast_rate)
		clk_set_rate(gpu->core_clk, gpu->fast_rate);
R
Rob Clark 已提交
156

157
	/* Set the RBBM timer rate to 19.2Mhz */
158 159
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 19200000);
160

161
	return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
162 163 164 165
}

static int disable_clk(struct msm_gpu *gpu)
{
166
	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
167

168 169 170 171 172
	/*
	 * Set the clock to a deliberately low rate. On older targets the clock
	 * speed had to be non zero to avoid problems. On newer targets this
	 * will be rounded down to zero anyway so it all works out.
	 */
173 174
	if (gpu->core_clk)
		clk_set_rate(gpu->core_clk, 27000000);
175

176 177
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 0);
178

R
Rob Clark 已提交
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	return 0;
}

static int enable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_prepare_enable(gpu->ebi1_clk);
	return 0;
}

static int disable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_disable_unprepare(gpu->ebi1_clk);
	return 0;
}

S
Sharat Masetty 已提交
196 197 198 199 200 201 202 203
void msm_gpu_resume_devfreq(struct msm_gpu *gpu)
{
	gpu->devfreq.busy_cycles = 0;
	gpu->devfreq.time = ktime_get();

	devfreq_resume_device(gpu->devfreq.devfreq);
}

R
Rob Clark 已提交
204 205 206 207
int msm_gpu_pm_resume(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
208
	DBG("%s", gpu->name);
R
Rob Clark 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221

	ret = enable_pwrrail(gpu);
	if (ret)
		return ret;

	ret = enable_clk(gpu);
	if (ret)
		return ret;

	ret = enable_axi(gpu);
	if (ret)
		return ret;

S
Sharat Masetty 已提交
222
	msm_gpu_resume_devfreq(gpu);
223

R
Rob Clark 已提交
224 225
	gpu->needs_hw_init = true;

R
Rob Clark 已提交
226 227 228 229 230 231 232
	return 0;
}

int msm_gpu_pm_suspend(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
233
	DBG("%s", gpu->name);
R
Rob Clark 已提交
234

S
Sharat Masetty 已提交
235
	devfreq_suspend_device(gpu->devfreq.devfreq);
236

R
Rob Clark 已提交
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
	ret = disable_axi(gpu);
	if (ret)
		return ret;

	ret = disable_clk(gpu);
	if (ret)
		return ret;

	ret = disable_pwrrail(gpu);
	if (ret)
		return ret;

	return 0;
}

R
Rob Clark 已提交
252
int msm_gpu_hw_init(struct msm_gpu *gpu)
253
{
R
Rob Clark 已提交
254
	int ret;
255

256 257
	WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));

R
Rob Clark 已提交
258 259
	if (!gpu->needs_hw_init)
		return 0;
260

R
Rob Clark 已提交
261 262 263 264 265
	disable_irq(gpu->irq);
	ret = gpu->funcs->hw_init(gpu);
	if (!ret)
		gpu->needs_hw_init = false;
	enable_irq(gpu->irq);
266

R
Rob Clark 已提交
267
	return ret;
268 269
}

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
#ifdef CONFIG_DEV_COREDUMP
static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
		size_t count, void *data, size_t datalen)
{
	struct msm_gpu *gpu = data;
	struct drm_print_iterator iter;
	struct drm_printer p;
	struct msm_gpu_state *state;

	state = msm_gpu_crashstate_get(gpu);
	if (!state)
		return 0;

	iter.data = buffer;
	iter.offset = 0;
	iter.start = offset;
	iter.remain = count;

	p = drm_coredump_printer(&iter);

	drm_printf(&p, "---\n");
	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
293 294
	drm_printf(&p, "time: %lld.%09ld\n",
		state->time.tv_sec, state->time.tv_nsec);
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
	if (state->comm)
		drm_printf(&p, "comm: %s\n", state->comm);
	if (state->cmd)
		drm_printf(&p, "cmdline: %s\n", state->cmd);

	gpu->funcs->show(gpu, state, &p);

	msm_gpu_crashstate_put(gpu);

	return count - iter.remain;
}

static void msm_gpu_devcoredump_free(void *data)
{
	struct msm_gpu *gpu = data;

	msm_gpu_crashstate_put(gpu);
}

314 315 316 317 318 319 320 321 322
static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
		struct msm_gem_object *obj, u64 iova, u32 flags)
{
	struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos];

	/* Don't record write only objects */
	state_bo->size = obj->base.size;
	state_bo->iova = iova;

323 324
	/* Only store data for non imported buffer objects marked for read */
	if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) {
325 326 327 328
		void *ptr;

		state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL);
		if (!state_bo->data)
329
			goto out;
330 331 332 333

		ptr = msm_gem_get_vaddr_active(&obj->base);
		if (IS_ERR(ptr)) {
			kvfree(state_bo->data);
334 335
			state_bo->data = NULL;
			goto out;
336 337 338 339 340
		}

		memcpy(state_bo->data, ptr, obj->base.size);
		msm_gem_put_vaddr(&obj->base);
	}
341
out:
342 343 344 345 346
	state->nr_bos++;
}

static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
{
	struct msm_gpu_state *state;

	/* Only save one crash state at a time */
	if (gpu->crashstate)
		return;

	state = gpu->funcs->gpu_state_get(gpu);
	if (IS_ERR_OR_NULL(state))
		return;

	/* Fill in the additional crash state information */
	state->comm = kstrdup(comm, GFP_KERNEL);
	state->cmd = kstrdup(cmd, GFP_KERNEL);

362 363 364
	if (submit) {
		int i;

365
		state->bos = kcalloc(submit->nr_cmds,
366 367
			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);

368 369 370 371 372 373
		for (i = 0; state->bos && i < submit->nr_cmds; i++) {
			int idx = submit->cmd[i].idx;

			msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj,
				submit->bos[idx].iova, submit->bos[idx].flags);
		}
374 375
	}

376 377 378 379 380 381 382 383
	/* Set the active crash state to be dumped on failure */
	gpu->crashstate = state;

	/* FIXME: Release the crashstate if this errors out? */
	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
		msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
}
#else
384 385
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
386 387 388 389
{
}
#endif

390 391 392 393
/*
 * Hangcheck detection for locked gpu:
 */

394 395 396 397 398 399 400 401 402 403 404 405 406 407
static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		uint32_t fence)
{
	struct msm_gem_submit *submit;

	list_for_each_entry(submit, &ring->submits, node) {
		if (submit->seqno > fence)
			break;

		msm_update_fence(submit->ring->fctx,
			submit->fence->seqno);
	}
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421
static struct msm_gem_submit *
find_submit(struct msm_ringbuffer *ring, uint32_t fence)
{
	struct msm_gem_submit *submit;

	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));

	list_for_each_entry(submit, &ring->submits, node)
		if (submit->seqno == fence)
			return submit;

	return NULL;
}

R
Rob Clark 已提交
422
static void retire_submits(struct msm_gpu *gpu);
423

424 425 426 427
static void recover_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
	struct drm_device *dev = gpu->dev;
428
	struct msm_drm_private *priv = dev->dev_private;
429
	struct msm_gem_submit *submit;
430
	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
431
	char *comm = NULL, *cmd = NULL;
432 433
	int i;

434
	mutex_lock(&dev->struct_mutex);
435

436
	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
437

438
	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
439 440 441 442 443 444
	if (submit) {
		struct task_struct *task;

		rcu_read_lock();
		task = pid_task(submit->pid, PIDTYPE_PID);
		if (task) {
445
			comm = kstrdup(task->comm, GFP_ATOMIC);
446 447 448 449 450 451 452 453 454 455 456 457

			/*
			 * So slightly annoying, in other paths like
			 * mmap'ing gem buffers, mmap_sem is acquired
			 * before struct_mutex, which means we can't
			 * hold struct_mutex across the call to
			 * get_cmdline().  But submits are retired
			 * from the same in-order workqueue, so we can
			 * safely drop the lock here without worrying
			 * about the submit going away.
			 */
			mutex_unlock(&dev->struct_mutex);
458
			cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC);
459
			mutex_lock(&dev->struct_mutex);
460 461
		}
		rcu_read_unlock();
462

463
		if (comm && cmd) {
464
			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
465
				gpu->name, comm, cmd);
466 467

			msm_rd_dump_submit(priv->hangrd, submit,
468 469
				"offending task: %s (%s)", comm, cmd);
		} else
470 471 472
			msm_rd_dump_submit(priv->hangrd, submit, NULL);
	}

473 474
	/* Record the crash state */
	pm_runtime_get_sync(&gpu->pdev->dev);
475
	msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
476 477
	pm_runtime_put_sync(&gpu->pdev->dev);

478 479
	kfree(cmd);
	kfree(comm);
480 481 482 483 484 485

	/*
	 * Update all the rings with the latest and greatest fence.. this
	 * needs to happen after msm_rd_dump_submit() to ensure that the
	 * bo's referenced by the offending submit are still around.
	 */
486
	for (i = 0; i < gpu->nr_rings; i++) {
487 488 489
		struct msm_ringbuffer *ring = gpu->rb[i];

		uint32_t fence = ring->memptrs->fence;
490

491 492 493 494 495 496 497 498
		/*
		 * For the current (faulting?) ring/submit advance the fence by
		 * one more to clear the faulting submit
		 */
		if (ring == cur_ring)
			fence++;

		update_fences(gpu, ring, fence);
499 500 501
	}

	if (msm_gpu_active(gpu)) {
502
		/* retire completed submits, plus the one that hung: */
R
Rob Clark 已提交
503
		retire_submits(gpu);
504

R
Rob Clark 已提交
505
		pm_runtime_get_sync(&gpu->pdev->dev);
506
		gpu->funcs->recover(gpu);
R
Rob Clark 已提交
507
		pm_runtime_put_sync(&gpu->pdev->dev);
508

509 510 511 512
		/*
		 * Replay all remaining submits starting with highest priority
		 * ring
		 */
513
		for (i = 0; i < gpu->nr_rings; i++) {
514 515 516 517
			struct msm_ringbuffer *ring = gpu->rb[i];

			list_for_each_entry(submit, &ring->submits, node)
				gpu->funcs->submit(gpu, submit, NULL);
518
		}
519
	}
520

521 522 523 524 525 526 527 528 529 530 531 532
	mutex_unlock(&dev->struct_mutex);

	msm_gpu_retire(gpu);
}

static void hangcheck_timer_reset(struct msm_gpu *gpu)
{
	DBG("%s", gpu->name);
	mod_timer(&gpu->hangcheck_timer,
			round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
}

533
static void hangcheck_handler(struct timer_list *t)
534
{
535
	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
R
Rob Clark 已提交
536 537
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
538 539
	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
	uint32_t fence = ring->memptrs->fence;
540

541
	if (fence != ring->hangcheck_fence) {
542
		/* some progress has been made.. ya! */
543 544
		ring->hangcheck_fence = fence;
	} else if (fence < ring->seqno) {
545
		/* no progress and not done.. hung! */
546
		ring->hangcheck_fence = fence;
547
		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
548
				gpu->name, ring->id);
549
		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
R
Rob Clark 已提交
550
				gpu->name, fence);
551
		DRM_DEV_ERROR(dev->dev, "%s:     submitted fence: %u\n",
552 553
				gpu->name, ring->seqno);

554 555 556 557
		queue_work(priv->wq, &gpu->recover_work);
	}

	/* if still more pending work, reset the hangcheck timer: */
558
	if (ring->seqno > ring->hangcheck_fence)
559
		hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
560 561 562

	/* workaround for missing irq: */
	queue_work(priv->wq, &gpu->retire_work);
563 564
}

R
Rob Clark 已提交
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
/*
 * Performance Counters:
 */

/* called under perf_lock */
static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
{
	uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
	int i, n = min(ncntrs, gpu->num_perfcntrs);

	/* read current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);

	/* update cntrs: */
	for (i = 0; i < n; i++)
		cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];

	/* save current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		gpu->last_cntrs[i] = current_cntrs[i];

	return n;
}

static void update_sw_cntrs(struct msm_gpu *gpu)
{
	ktime_t time;
	uint32_t elapsed;
	unsigned long flags;

	spin_lock_irqsave(&gpu->perf_lock, flags);
	if (!gpu->perfcntr_active)
		goto out;

	time = ktime_get();
	elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));

	gpu->totaltime += elapsed;
	if (gpu->last_sample.active)
		gpu->activetime += elapsed;

	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = time;

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
{
	unsigned long flags;

R
Rob Clark 已提交
618 619
	pm_runtime_get_sync(&gpu->pdev->dev);

R
Rob Clark 已提交
620 621 622 623 624 625 626 627 628 629 630 631 632
	spin_lock_irqsave(&gpu->perf_lock, flags);
	/* we could dynamically enable/disable perfcntr registers too.. */
	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = ktime_get();
	gpu->activetime = gpu->totaltime = 0;
	gpu->perfcntr_active = true;
	update_hw_cntrs(gpu, 0, NULL);
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
{
	gpu->perfcntr_active = false;
R
Rob Clark 已提交
633
	pm_runtime_put_sync(&gpu->pdev->dev);
R
Rob Clark 已提交
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
}

/* returns -errno or # of cntrs sampled */
int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&gpu->perf_lock, flags);

	if (!gpu->perfcntr_active) {
		ret = -EINVAL;
		goto out;
	}

	*activetime = gpu->activetime;
	*totaltime = gpu->totaltime;

	gpu->activetime = gpu->totaltime = 0;

	ret = update_hw_cntrs(gpu, ncntrs, cntrs);

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);

	return ret;
}

R
Rob Clark 已提交
663 664 665 666
/*
 * Cmdstream submission/retirement:
 */

667 668
static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		struct msm_gem_submit *submit)
669
{
670 671 672
	int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
	volatile struct msm_gpu_submit_stats *stats;
	u64 elapsed, clock = 0;
673 674
	int i;

675 676 677 678 679 680 681 682 683 684 685 686 687 688
	stats = &ring->memptrs->stats[index];
	/* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
	elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000;
	do_div(elapsed, 192);

	/* Calculate the clock frequency from the number of CP cycles */
	if (elapsed) {
		clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
		do_div(clock, elapsed);
	}

	trace_msm_gpu_submit_retired(submit, elapsed, clock,
		stats->alwayson_start, stats->alwayson_end);

689 690 691 692
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
		/* move to inactive: */
		msm_gem_move_to_inactive(&msm_obj->base);
693
		msm_gem_unpin_iova(&msm_obj->base, gpu->aspace);
694
		drm_gem_object_put(&msm_obj->base);
695 696
	}

R
Rob Clark 已提交
697 698
	pm_runtime_mark_last_busy(&gpu->pdev->dev);
	pm_runtime_put_autosuspend(&gpu->pdev->dev);
699
	msm_gem_submit_free(submit);
700 701
}

R
Rob Clark 已提交
702
static void retire_submits(struct msm_gpu *gpu)
703 704
{
	struct drm_device *dev = gpu->dev;
705 706
	struct msm_gem_submit *submit, *tmp;
	int i;
707 708 709

	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

710
	/* Retire the commits starting with highest priority */
711
	for (i = 0; i < gpu->nr_rings; i++) {
712
		struct msm_ringbuffer *ring = gpu->rb[i];
713

714 715
		list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
			if (dma_fence_is_signaled(submit->fence))
716
				retire_submit(gpu, ring, submit);
717 718 719 720
		}
	}
}

R
Rob Clark 已提交
721 722 723 724
static void retire_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
	struct drm_device *dev = gpu->dev;
725
	int i;
R
Rob Clark 已提交
726

727 728
	for (i = 0; i < gpu->nr_rings; i++)
		update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
R
Rob Clark 已提交
729

R
Rob Clark 已提交
730
	mutex_lock(&dev->struct_mutex);
R
Rob Clark 已提交
731
	retire_submits(gpu);
R
Rob Clark 已提交
732 733 734 735 736 737 738 739
	mutex_unlock(&dev->struct_mutex);
}

/* call from irq handler to schedule work to retire bo's */
void msm_gpu_retire(struct msm_gpu *gpu)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	queue_work(priv->wq, &gpu->retire_work);
R
Rob Clark 已提交
740
	update_sw_cntrs(gpu);
R
Rob Clark 已提交
741 742 743
}

/* add bo's to gpu's ring, and kick gpu: */
744
void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
R
Rob Clark 已提交
745 746 747 748
		struct msm_file_private *ctx)
{
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
749
	struct msm_ringbuffer *ring = submit->ring;
750
	int i;
R
Rob Clark 已提交
751

752 753
	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

R
Rob Clark 已提交
754 755 756
	pm_runtime_get_sync(&gpu->pdev->dev);

	msm_gpu_hw_init(gpu);
757

758 759 760
	submit->seqno = ++ring->seqno;

	list_add_tail(&submit->node, &ring->submits);
761

762
	msm_rd_dump_submit(priv->rd, submit, NULL);
R
Rob Clark 已提交
763

R
Rob Clark 已提交
764 765
	update_sw_cntrs(gpu);

R
Rob Clark 已提交
766 767
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
R
Rob Clark 已提交
768
		uint64_t iova;
R
Rob Clark 已提交
769 770 771 772 773 774

		/* can't happen yet.. but when we add 2d support we'll have
		 * to deal w/ cross-ring synchronization:
		 */
		WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));

775
		/* submit takes a reference to the bo and iova until retired: */
776
		drm_gem_object_get(&msm_obj->base);
777
		msm_gem_get_and_pin_iova(&msm_obj->base,
778
				submit->gpu->aspace, &iova);
R
Rob Clark 已提交
779

R
Rob Clark 已提交
780 781
		if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
			msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
R
Rob Clark 已提交
782 783
		else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
			msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
R
Rob Clark 已提交
784
	}
785

786
	gpu->funcs->submit(gpu, submit, ctx);
787 788
	priv->lastctx = ctx;

789
	hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
790 791 792 793 794 795 796 797 798 799 800 801
}

/*
 * Init/Cleanup:
 */

static irqreturn_t irq_handler(int irq, void *data)
{
	struct msm_gpu *gpu = data;
	return gpu->funcs->irq(gpu);
}

802 803
static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
{
804
	int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks);
805

806
	if (ret < 1) {
807
		gpu->nr_clocks = 0;
808
		return ret;
809
	}
810

811
	gpu->nr_clocks = ret;
812

813 814
	gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "core");
815

816 817
	gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "rbbmtimer");
818 819 820

	return 0;
}
R
Rob Clark 已提交
821

822 823 824 825 826 827 828 829 830 831 832 833
static struct msm_gem_address_space *
msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
		uint64_t va_start, uint64_t va_end)
{
	struct msm_gem_address_space *aspace;
	int ret;

	/*
	 * Setup IOMMU.. eventually we will (I think) do this once per context
	 * and have separate page tables per context.  For now, to keep things
	 * simple and to get something working, just use a single address space:
	 */
J
Jonathan Marek 已提交
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
	if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
		struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
		if (!iommu)
			return NULL;

		iommu->geometry.aperture_start = va_start;
		iommu->geometry.aperture_end = va_end;

		DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);

		aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
		if (IS_ERR(aspace))
			iommu_domain_free(iommu);
	} else {
		aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
			va_start, va_end);
	}
851 852

	if (IS_ERR(aspace)) {
J
Jonathan Marek 已提交
853
		DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
854 855 856 857 858 859 860 861 862 863 864 865 866
			PTR_ERR(aspace));
		return ERR_CAST(aspace);
	}

	ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
	if (ret) {
		msm_gem_address_space_put(aspace);
		return ERR_PTR(ret);
	}

	return aspace;
}

R
Rob Clark 已提交
867 868
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
869
		const char *name, struct msm_gpu_config *config)
R
Rob Clark 已提交
870
{
871 872 873
	int i, ret, nr_rings = config->nr_rings;
	void *memptrs;
	uint64_t memptrs_iova;
R
Rob Clark 已提交
874

R
Rob Clark 已提交
875 876 877
	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);

R
Rob Clark 已提交
878 879 880 881 882 883
	gpu->dev = drm;
	gpu->funcs = funcs;
	gpu->name = name;

	INIT_LIST_HEAD(&gpu->active_list);
	INIT_WORK(&gpu->retire_work, retire_worker);
884 885
	INIT_WORK(&gpu->recover_work, recover_worker);

886

887
	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
R
Rob Clark 已提交
888

R
Rob Clark 已提交
889 890
	spin_lock_init(&gpu->perf_lock);

R
Rob Clark 已提交
891 892

	/* Map registers: */
893
	gpu->mmio = msm_ioremap(pdev, config->ioname, name);
R
Rob Clark 已提交
894 895 896 897 898 899
	if (IS_ERR(gpu->mmio)) {
		ret = PTR_ERR(gpu->mmio);
		goto fail;
	}

	/* Get Interrupt: */
900
	gpu->irq = platform_get_irq_byname(pdev, config->irqname);
R
Rob Clark 已提交
901 902
	if (gpu->irq < 0) {
		ret = gpu->irq;
903
		DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret);
R
Rob Clark 已提交
904 905 906 907 908 909
		goto fail;
	}

	ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
			IRQF_TRIGGER_HIGH, gpu->name, gpu);
	if (ret) {
910
		DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
R
Rob Clark 已提交
911 912 913
		goto fail;
	}

914 915 916
	ret = get_clocks(pdev, gpu);
	if (ret)
		goto fail;
R
Rob Clark 已提交
917

918
	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
R
Rob Clark 已提交
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
	DBG("ebi1_clk: %p", gpu->ebi1_clk);
	if (IS_ERR(gpu->ebi1_clk))
		gpu->ebi1_clk = NULL;

	/* Acquire regulators: */
	gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
	DBG("gpu_reg: %p", gpu->gpu_reg);
	if (IS_ERR(gpu->gpu_reg))
		gpu->gpu_reg = NULL;

	gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
	DBG("gpu_cx: %p", gpu->gpu_cx);
	if (IS_ERR(gpu->gpu_cx))
		gpu->gpu_cx = NULL;

934 935 936
	gpu->pdev = pdev;
	platform_set_drvdata(pdev, gpu);

937 938
	msm_devfreq_init(gpu);

939 940 941 942
	gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
		config->va_start, config->va_end);

	if (gpu->aspace == NULL)
943
		DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
944 945 946
	else if (IS_ERR(gpu->aspace)) {
		ret = PTR_ERR(gpu->aspace);
		goto fail;
R
Rob Clark 已提交
947
	}
948

949 950
	memptrs = msm_gem_kernel_new(drm,
		sizeof(struct msm_rbmemptrs) * nr_rings,
J
Jordan Crouse 已提交
951
		MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
952
		&memptrs_iova);
J
Jordan Crouse 已提交
953

954 955
	if (IS_ERR(memptrs)) {
		ret = PTR_ERR(memptrs);
956
		DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret);
J
Jordan Crouse 已提交
957 958 959
		goto fail;
	}

960 961
	msm_gem_object_set_name(gpu->memptrs_bo, "memptrs");

962
	if (nr_rings > ARRAY_SIZE(gpu->rb)) {
963
		DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
964 965
			ARRAY_SIZE(gpu->rb));
		nr_rings = ARRAY_SIZE(gpu->rb);
R
Rob Clark 已提交
966 967
	}

968 969 970 971 972 973
	/* Create ringbuffer(s): */
	for (i = 0; i < nr_rings; i++) {
		gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);

		if (IS_ERR(gpu->rb[i])) {
			ret = PTR_ERR(gpu->rb[i]);
974
			DRM_DEV_ERROR(drm->dev,
975 976 977 978 979 980 981 982 983 984
				"could not create ringbuffer %d: %d\n", i, ret);
			goto fail;
		}

		memptrs += sizeof(struct msm_rbmemptrs);
		memptrs_iova += sizeof(struct msm_rbmemptrs);
	}

	gpu->nr_rings = nr_rings;

R
Rob Clark 已提交
985 986 987
	return 0;

fail:
988 989 990 991 992
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++)  {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
	}

993
	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
J
Jordan Crouse 已提交
994

995
	platform_set_drvdata(pdev, NULL);
R
Rob Clark 已提交
996 997 998 999 1000
	return ret;
}

void msm_gpu_cleanup(struct msm_gpu *gpu)
{
1001 1002
	int i;

R
Rob Clark 已提交
1003 1004 1005 1006
	DBG("%s", gpu->name);

	WARN_ON(!list_empty(&gpu->active_list));

1007 1008 1009
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
R
Rob Clark 已提交
1010
	}
J
Jordan Crouse 已提交
1011

1012
	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
J
Jordan Crouse 已提交
1013 1014

	if (!IS_ERR_OR_NULL(gpu->aspace)) {
1015 1016 1017 1018
		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
			NULL, 0);
		msm_gem_address_space_put(gpu->aspace);
	}
R
Rob Clark 已提交
1019
}