msm_gpu.c 23.8 KB
Newer Older
R
Rob Clark 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (C) 2013 Red Hat
 * Author: Rob Clark <robdclark@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "msm_gpu.h"
#include "msm_gem.h"
20
#include "msm_mmu.h"
21
#include "msm_fence.h"
22
#include "msm_gpu_trace.h"
R
Rob Clark 已提交
23

24
#include <generated/utsrelease.h>
25
#include <linux/string_helpers.h>
26 27
#include <linux/pm_opp.h>
#include <linux/devfreq.h>
28
#include <linux/devcoredump.h>
R
Rob Clark 已提交
29 30 31 32 33

/*
 * Power Management:
 */

34 35 36 37 38 39 40 41 42 43 44
static int msm_devfreq_target(struct device *dev, unsigned long *freq,
		u32 flags)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	struct dev_pm_opp *opp;

	opp = devfreq_recommended_opp(dev, freq, flags);

	if (IS_ERR(opp))
		return PTR_ERR(opp);

S
Sharat Masetty 已提交
45 46 47 48 49
	if (gpu->funcs->gpu_set_freq)
		gpu->funcs->gpu_set_freq(gpu, (u64)*freq);
	else
		clk_set_rate(gpu->core_clk, *freq);

50 51 52 53 54 55 56 57 58 59 60
	dev_pm_opp_put(opp);

	return 0;
}

static int msm_devfreq_get_dev_status(struct device *dev,
		struct devfreq_dev_status *status)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
	ktime_t time;

S
Sharat Masetty 已提交
61 62 63 64
	if (gpu->funcs->gpu_get_freq)
		status->current_frequency = gpu->funcs->gpu_get_freq(gpu);
	else
		status->current_frequency = clk_get_rate(gpu->core_clk);
65

S
Sharat Masetty 已提交
66
	status->busy_time = gpu->funcs->gpu_busy(gpu);
67 68 69 70 71 72 73 74 75 76 77 78

	time = ktime_get();
	status->total_time = ktime_us_delta(time, gpu->devfreq.time);
	gpu->devfreq.time = time;

	return 0;
}

static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
{
	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));

S
Sharat Masetty 已提交
79 80 81 82
	if (gpu->funcs->gpu_get_freq)
		*freq = gpu->funcs->gpu_get_freq(gpu);
	else
		*freq = clk_get_rate(gpu->core_clk);
83 84 85 86 87 88 89 90 91 92 93 94 95 96

	return 0;
}

static struct devfreq_dev_profile msm_devfreq_profile = {
	.polling_ms = 10,
	.target = msm_devfreq_target,
	.get_dev_status = msm_devfreq_get_dev_status,
	.get_cur_freq = msm_devfreq_get_cur_freq,
};

static void msm_devfreq_init(struct msm_gpu *gpu)
{
	/* We need target support to do devfreq */
S
Sharat Masetty 已提交
97
	if (!gpu->funcs->gpu_busy)
98 99 100 101 102 103 104 105 106 107 108 109 110
		return;

	msm_devfreq_profile.initial_freq = gpu->fast_rate;

	/*
	 * Don't set the freq_table or max_state and let devfreq build the table
	 * from OPP
	 */

	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
			&msm_devfreq_profile, "simple_ondemand", NULL);

	if (IS_ERR(gpu->devfreq.devfreq)) {
111
		DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
112 113
		gpu->devfreq.devfreq = NULL;
	}
S
Sharat Masetty 已提交
114 115

	devfreq_suspend_device(gpu->devfreq.devfreq);
116 117
}

R
Rob Clark 已提交
118 119 120 121 122 123 124 125
static int enable_pwrrail(struct msm_gpu *gpu)
{
	struct drm_device *dev = gpu->dev;
	int ret = 0;

	if (gpu->gpu_reg) {
		ret = regulator_enable(gpu->gpu_reg);
		if (ret) {
126
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
R
Rob Clark 已提交
127 128 129 130 131 132 133
			return ret;
		}
	}

	if (gpu->gpu_cx) {
		ret = regulator_enable(gpu->gpu_cx);
		if (ret) {
134
			DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
R
Rob Clark 已提交
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
			return ret;
		}
	}

	return 0;
}

static int disable_pwrrail(struct msm_gpu *gpu)
{
	if (gpu->gpu_cx)
		regulator_disable(gpu->gpu_cx);
	if (gpu->gpu_reg)
		regulator_disable(gpu->gpu_reg);
	return 0;
}

static int enable_clk(struct msm_gpu *gpu)
{
153 154
	if (gpu->core_clk && gpu->fast_rate)
		clk_set_rate(gpu->core_clk, gpu->fast_rate);
R
Rob Clark 已提交
155

156
	/* Set the RBBM timer rate to 19.2Mhz */
157 158
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 19200000);
159

160
	return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
161 162 163 164
}

static int disable_clk(struct msm_gpu *gpu)
{
165
	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
R
Rob Clark 已提交
166

167 168 169 170 171
	/*
	 * Set the clock to a deliberately low rate. On older targets the clock
	 * speed had to be non zero to avoid problems. On newer targets this
	 * will be rounded down to zero anyway so it all works out.
	 */
172 173
	if (gpu->core_clk)
		clk_set_rate(gpu->core_clk, 27000000);
174

175 176
	if (gpu->rbbmtimer_clk)
		clk_set_rate(gpu->rbbmtimer_clk, 0);
177

R
Rob Clark 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
	return 0;
}

static int enable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_prepare_enable(gpu->ebi1_clk);
	return 0;
}

static int disable_axi(struct msm_gpu *gpu)
{
	if (gpu->ebi1_clk)
		clk_disable_unprepare(gpu->ebi1_clk);
	return 0;
}

S
Sharat Masetty 已提交
195 196 197 198 199 200 201 202
void msm_gpu_resume_devfreq(struct msm_gpu *gpu)
{
	gpu->devfreq.busy_cycles = 0;
	gpu->devfreq.time = ktime_get();

	devfreq_resume_device(gpu->devfreq.devfreq);
}

R
Rob Clark 已提交
203 204 205 206
int msm_gpu_pm_resume(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
207
	DBG("%s", gpu->name);
R
Rob Clark 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220

	ret = enable_pwrrail(gpu);
	if (ret)
		return ret;

	ret = enable_clk(gpu);
	if (ret)
		return ret;

	ret = enable_axi(gpu);
	if (ret)
		return ret;

S
Sharat Masetty 已提交
221
	msm_gpu_resume_devfreq(gpu);
222

R
Rob Clark 已提交
223 224
	gpu->needs_hw_init = true;

R
Rob Clark 已提交
225 226 227 228 229 230 231
	return 0;
}

int msm_gpu_pm_suspend(struct msm_gpu *gpu)
{
	int ret;

R
Rob Clark 已提交
232
	DBG("%s", gpu->name);
R
Rob Clark 已提交
233

S
Sharat Masetty 已提交
234
	devfreq_suspend_device(gpu->devfreq.devfreq);
235

R
Rob Clark 已提交
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
	ret = disable_axi(gpu);
	if (ret)
		return ret;

	ret = disable_clk(gpu);
	if (ret)
		return ret;

	ret = disable_pwrrail(gpu);
	if (ret)
		return ret;

	return 0;
}

R
Rob Clark 已提交
251
int msm_gpu_hw_init(struct msm_gpu *gpu)
252
{
R
Rob Clark 已提交
253
	int ret;
254

255 256
	WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));

R
Rob Clark 已提交
257 258
	if (!gpu->needs_hw_init)
		return 0;
259

R
Rob Clark 已提交
260 261 262 263 264
	disable_irq(gpu->irq);
	ret = gpu->funcs->hw_init(gpu);
	if (!ret)
		gpu->needs_hw_init = false;
	enable_irq(gpu->irq);
265

R
Rob Clark 已提交
266
	return ret;
267 268
}

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
#ifdef CONFIG_DEV_COREDUMP
static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
		size_t count, void *data, size_t datalen)
{
	struct msm_gpu *gpu = data;
	struct drm_print_iterator iter;
	struct drm_printer p;
	struct msm_gpu_state *state;

	state = msm_gpu_crashstate_get(gpu);
	if (!state)
		return 0;

	iter.data = buffer;
	iter.offset = 0;
	iter.start = offset;
	iter.remain = count;

	p = drm_coredump_printer(&iter);

	drm_printf(&p, "---\n");
	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
292 293
	drm_printf(&p, "time: %lld.%09ld\n",
		state->time.tv_sec, state->time.tv_nsec);
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	if (state->comm)
		drm_printf(&p, "comm: %s\n", state->comm);
	if (state->cmd)
		drm_printf(&p, "cmdline: %s\n", state->cmd);

	gpu->funcs->show(gpu, state, &p);

	msm_gpu_crashstate_put(gpu);

	return count - iter.remain;
}

static void msm_gpu_devcoredump_free(void *data)
{
	struct msm_gpu *gpu = data;

	msm_gpu_crashstate_put(gpu);
}

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
		struct msm_gem_object *obj, u64 iova, u32 flags)
{
	struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos];

	/* Don't record write only objects */

	state_bo->size = obj->base.size;
	state_bo->iova = iova;

	/* Only store the data for buffer objects marked for read */
	if ((flags & MSM_SUBMIT_BO_READ)) {
		void *ptr;

		state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL);
		if (!state_bo->data)
			return;

		ptr = msm_gem_get_vaddr_active(&obj->base);
		if (IS_ERR(ptr)) {
			kvfree(state_bo->data);
			return;
		}

		memcpy(state_bo->data, ptr, obj->base.size);
		msm_gem_put_vaddr(&obj->base);
	}

	state->nr_bos++;
}

static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
{
	struct msm_gpu_state *state;

	/* Only save one crash state at a time */
	if (gpu->crashstate)
		return;

	state = gpu->funcs->gpu_state_get(gpu);
	if (IS_ERR_OR_NULL(state))
		return;

	/* Fill in the additional crash state information */
	state->comm = kstrdup(comm, GFP_KERNEL);
	state->cmd = kstrdup(cmd, GFP_KERNEL);

361 362 363 364 365 366 367 368 369 370 371
	if (submit) {
		int i;

		state->bos = kcalloc(submit->nr_bos,
			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);

		for (i = 0; state->bos && i < submit->nr_bos; i++)
			msm_gpu_crashstate_get_bo(state, submit->bos[i].obj,
				submit->bos[i].iova, submit->bos[i].flags);
	}

372 373 374 375 376 377 378 379
	/* Set the active crash state to be dumped on failure */
	gpu->crashstate = state;

	/* FIXME: Release the crashstate if this errors out? */
	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
		msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
}
#else
380 381
static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
		struct msm_gem_submit *submit, char *comm, char *cmd)
382 383 384 385
{
}
#endif

386 387 388 389
/*
 * Hangcheck detection for locked gpu:
 */

390 391 392 393 394 395 396 397 398 399 400 401 402 403
static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		uint32_t fence)
{
	struct msm_gem_submit *submit;

	list_for_each_entry(submit, &ring->submits, node) {
		if (submit->seqno > fence)
			break;

		msm_update_fence(submit->ring->fctx,
			submit->fence->seqno);
	}
}

404 405 406 407 408 409 410 411 412 413 414 415 416 417
static struct msm_gem_submit *
find_submit(struct msm_ringbuffer *ring, uint32_t fence)
{
	struct msm_gem_submit *submit;

	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));

	list_for_each_entry(submit, &ring->submits, node)
		if (submit->seqno == fence)
			return submit;

	return NULL;
}

R
Rob Clark 已提交
418
static void retire_submits(struct msm_gpu *gpu);
419

420 421 422 423
static void recover_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
	struct drm_device *dev = gpu->dev;
424
	struct msm_drm_private *priv = dev->dev_private;
425
	struct msm_gem_submit *submit;
426
	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
427
	char *comm = NULL, *cmd = NULL;
428 429
	int i;

430
	mutex_lock(&dev->struct_mutex);
431

432
	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
433

434
	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
435 436 437 438 439 440
	if (submit) {
		struct task_struct *task;

		rcu_read_lock();
		task = pid_task(submit->pid, PIDTYPE_PID);
		if (task) {
441
			comm = kstrdup(task->comm, GFP_ATOMIC);
442 443 444 445 446 447 448 449 450 451 452 453

			/*
			 * So slightly annoying, in other paths like
			 * mmap'ing gem buffers, mmap_sem is acquired
			 * before struct_mutex, which means we can't
			 * hold struct_mutex across the call to
			 * get_cmdline().  But submits are retired
			 * from the same in-order workqueue, so we can
			 * safely drop the lock here without worrying
			 * about the submit going away.
			 */
			mutex_unlock(&dev->struct_mutex);
454
			cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC);
455
			mutex_lock(&dev->struct_mutex);
456 457
		}
		rcu_read_unlock();
458

459
		if (comm && cmd) {
460
			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
461
				gpu->name, comm, cmd);
462 463

			msm_rd_dump_submit(priv->hangrd, submit,
464 465
				"offending task: %s (%s)", comm, cmd);
		} else
466 467 468
			msm_rd_dump_submit(priv->hangrd, submit, NULL);
	}

469 470
	/* Record the crash state */
	pm_runtime_get_sync(&gpu->pdev->dev);
471
	msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
472 473
	pm_runtime_put_sync(&gpu->pdev->dev);

474 475
	kfree(cmd);
	kfree(comm);
476 477 478 479 480 481

	/*
	 * Update all the rings with the latest and greatest fence.. this
	 * needs to happen after msm_rd_dump_submit() to ensure that the
	 * bo's referenced by the offending submit are still around.
	 */
482
	for (i = 0; i < gpu->nr_rings; i++) {
483 484 485
		struct msm_ringbuffer *ring = gpu->rb[i];

		uint32_t fence = ring->memptrs->fence;
486

487 488 489 490 491 492 493 494
		/*
		 * For the current (faulting?) ring/submit advance the fence by
		 * one more to clear the faulting submit
		 */
		if (ring == cur_ring)
			fence++;

		update_fences(gpu, ring, fence);
495 496 497
	}

	if (msm_gpu_active(gpu)) {
498
		/* retire completed submits, plus the one that hung: */
R
Rob Clark 已提交
499
		retire_submits(gpu);
500

R
Rob Clark 已提交
501
		pm_runtime_get_sync(&gpu->pdev->dev);
502
		gpu->funcs->recover(gpu);
R
Rob Clark 已提交
503
		pm_runtime_put_sync(&gpu->pdev->dev);
504

505 506 507 508
		/*
		 * Replay all remaining submits starting with highest priority
		 * ring
		 */
509
		for (i = 0; i < gpu->nr_rings; i++) {
510 511 512 513
			struct msm_ringbuffer *ring = gpu->rb[i];

			list_for_each_entry(submit, &ring->submits, node)
				gpu->funcs->submit(gpu, submit, NULL);
514
		}
515
	}
516

517 518 519 520 521 522 523 524 525 526 527 528
	mutex_unlock(&dev->struct_mutex);

	msm_gpu_retire(gpu);
}

static void hangcheck_timer_reset(struct msm_gpu *gpu)
{
	DBG("%s", gpu->name);
	mod_timer(&gpu->hangcheck_timer,
			round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
}

529
static void hangcheck_handler(struct timer_list *t)
530
{
531
	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
R
Rob Clark 已提交
532 533
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
534 535
	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
	uint32_t fence = ring->memptrs->fence;
536

537
	if (fence != ring->hangcheck_fence) {
538
		/* some progress has been made.. ya! */
539 540
		ring->hangcheck_fence = fence;
	} else if (fence < ring->seqno) {
541
		/* no progress and not done.. hung! */
542
		ring->hangcheck_fence = fence;
543
		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
544
				gpu->name, ring->id);
545
		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
R
Rob Clark 已提交
546
				gpu->name, fence);
547
		DRM_DEV_ERROR(dev->dev, "%s:     submitted fence: %u\n",
548 549
				gpu->name, ring->seqno);

550 551 552 553
		queue_work(priv->wq, &gpu->recover_work);
	}

	/* if still more pending work, reset the hangcheck timer: */
554
	if (ring->seqno > ring->hangcheck_fence)
555
		hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
556 557 558

	/* workaround for missing irq: */
	queue_work(priv->wq, &gpu->retire_work);
559 560
}

R
Rob Clark 已提交
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
/*
 * Performance Counters:
 */

/* called under perf_lock */
static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
{
	uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
	int i, n = min(ncntrs, gpu->num_perfcntrs);

	/* read current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);

	/* update cntrs: */
	for (i = 0; i < n; i++)
		cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];

	/* save current values: */
	for (i = 0; i < gpu->num_perfcntrs; i++)
		gpu->last_cntrs[i] = current_cntrs[i];

	return n;
}

static void update_sw_cntrs(struct msm_gpu *gpu)
{
	ktime_t time;
	uint32_t elapsed;
	unsigned long flags;

	spin_lock_irqsave(&gpu->perf_lock, flags);
	if (!gpu->perfcntr_active)
		goto out;

	time = ktime_get();
	elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));

	gpu->totaltime += elapsed;
	if (gpu->last_sample.active)
		gpu->activetime += elapsed;

	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = time;

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
{
	unsigned long flags;

R
Rob Clark 已提交
614 615
	pm_runtime_get_sync(&gpu->pdev->dev);

R
Rob Clark 已提交
616 617 618 619 620 621 622 623 624 625 626 627 628
	spin_lock_irqsave(&gpu->perf_lock, flags);
	/* we could dynamically enable/disable perfcntr registers too.. */
	gpu->last_sample.active = msm_gpu_active(gpu);
	gpu->last_sample.time = ktime_get();
	gpu->activetime = gpu->totaltime = 0;
	gpu->perfcntr_active = true;
	update_hw_cntrs(gpu, 0, NULL);
	spin_unlock_irqrestore(&gpu->perf_lock, flags);
}

void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
{
	gpu->perfcntr_active = false;
R
Rob Clark 已提交
629
	pm_runtime_put_sync(&gpu->pdev->dev);
R
Rob Clark 已提交
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
}

/* returns -errno or # of cntrs sampled */
int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&gpu->perf_lock, flags);

	if (!gpu->perfcntr_active) {
		ret = -EINVAL;
		goto out;
	}

	*activetime = gpu->activetime;
	*totaltime = gpu->totaltime;

	gpu->activetime = gpu->totaltime = 0;

	ret = update_hw_cntrs(gpu, ncntrs, cntrs);

out:
	spin_unlock_irqrestore(&gpu->perf_lock, flags);

	return ret;
}

R
Rob Clark 已提交
659 660 661 662
/*
 * Cmdstream submission/retirement:
 */

663 664
static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
		struct msm_gem_submit *submit)
665
{
666 667 668
	int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
	volatile struct msm_gpu_submit_stats *stats;
	u64 elapsed, clock = 0;
669 670
	int i;

671 672 673 674 675 676 677 678 679 680 681 682 683 684
	stats = &ring->memptrs->stats[index];
	/* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
	elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000;
	do_div(elapsed, 192);

	/* Calculate the clock frequency from the number of CP cycles */
	if (elapsed) {
		clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
		do_div(clock, elapsed);
	}

	trace_msm_gpu_submit_retired(submit, elapsed, clock,
		stats->alwayson_start, stats->alwayson_end);

685 686 687 688
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
		/* move to inactive: */
		msm_gem_move_to_inactive(&msm_obj->base);
689
		msm_gem_put_iova(&msm_obj->base, gpu->aspace);
690
		drm_gem_object_put(&msm_obj->base);
691 692
	}

R
Rob Clark 已提交
693 694
	pm_runtime_mark_last_busy(&gpu->pdev->dev);
	pm_runtime_put_autosuspend(&gpu->pdev->dev);
695
	msm_gem_submit_free(submit);
696 697
}

R
Rob Clark 已提交
698
static void retire_submits(struct msm_gpu *gpu)
699 700
{
	struct drm_device *dev = gpu->dev;
701 702
	struct msm_gem_submit *submit, *tmp;
	int i;
703 704 705

	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

706
	/* Retire the commits starting with highest priority */
707
	for (i = 0; i < gpu->nr_rings; i++) {
708
		struct msm_ringbuffer *ring = gpu->rb[i];
709

710 711
		list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
			if (dma_fence_is_signaled(submit->fence))
712
				retire_submit(gpu, ring, submit);
713 714 715 716
		}
	}
}

R
Rob Clark 已提交
717 718 719 720
static void retire_worker(struct work_struct *work)
{
	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
	struct drm_device *dev = gpu->dev;
721
	int i;
R
Rob Clark 已提交
722

723 724
	for (i = 0; i < gpu->nr_rings; i++)
		update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
R
Rob Clark 已提交
725

R
Rob Clark 已提交
726
	mutex_lock(&dev->struct_mutex);
R
Rob Clark 已提交
727
	retire_submits(gpu);
R
Rob Clark 已提交
728 729 730 731 732 733 734 735
	mutex_unlock(&dev->struct_mutex);
}

/* call from irq handler to schedule work to retire bo's */
void msm_gpu_retire(struct msm_gpu *gpu)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	queue_work(priv->wq, &gpu->retire_work);
R
Rob Clark 已提交
736
	update_sw_cntrs(gpu);
R
Rob Clark 已提交
737 738 739
}

/* add bo's to gpu's ring, and kick gpu: */
740
void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
R
Rob Clark 已提交
741 742 743 744
		struct msm_file_private *ctx)
{
	struct drm_device *dev = gpu->dev;
	struct msm_drm_private *priv = dev->dev_private;
745
	struct msm_ringbuffer *ring = submit->ring;
746
	int i;
R
Rob Clark 已提交
747

748 749
	WARN_ON(!mutex_is_locked(&dev->struct_mutex));

R
Rob Clark 已提交
750 751 752
	pm_runtime_get_sync(&gpu->pdev->dev);

	msm_gpu_hw_init(gpu);
753

754 755 756
	submit->seqno = ++ring->seqno;

	list_add_tail(&submit->node, &ring->submits);
757

758
	msm_rd_dump_submit(priv->rd, submit, NULL);
R
Rob Clark 已提交
759

R
Rob Clark 已提交
760 761
	update_sw_cntrs(gpu);

R
Rob Clark 已提交
762 763
	for (i = 0; i < submit->nr_bos; i++) {
		struct msm_gem_object *msm_obj = submit->bos[i].obj;
R
Rob Clark 已提交
764
		uint64_t iova;
R
Rob Clark 已提交
765 766 767 768 769 770

		/* can't happen yet.. but when we add 2d support we'll have
		 * to deal w/ cross-ring synchronization:
		 */
		WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));

771
		/* submit takes a reference to the bo and iova until retired: */
772
		drm_gem_object_get(&msm_obj->base);
773
		msm_gem_get_iova(&msm_obj->base,
774
				submit->gpu->aspace, &iova);
R
Rob Clark 已提交
775

R
Rob Clark 已提交
776 777
		if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
			msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
R
Rob Clark 已提交
778 779
		else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
			msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
R
Rob Clark 已提交
780
	}
781

782
	gpu->funcs->submit(gpu, submit, ctx);
783 784
	priv->lastctx = ctx;

785
	hangcheck_timer_reset(gpu);
R
Rob Clark 已提交
786 787 788 789 790 791 792 793 794 795 796 797
}

/*
 * Init/Cleanup:
 */

static irqreturn_t irq_handler(int irq, void *data)
{
	struct msm_gpu *gpu = data;
	return gpu->funcs->irq(gpu);
}

798 799
static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
{
800
	int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks);
801

802
	if (ret < 1) {
803
		gpu->nr_clocks = 0;
804
		return ret;
805
	}
806

807
	gpu->nr_clocks = ret;
808

809 810
	gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "core");
811

812 813
	gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
		gpu->nr_clocks, "rbbmtimer");
814 815 816

	return 0;
}
R
Rob Clark 已提交
817

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
static struct msm_gem_address_space *
msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
		uint64_t va_start, uint64_t va_end)
{
	struct iommu_domain *iommu;
	struct msm_gem_address_space *aspace;
	int ret;

	/*
	 * Setup IOMMU.. eventually we will (I think) do this once per context
	 * and have separate page tables per context.  For now, to keep things
	 * simple and to get something working, just use a single address space:
	 */
	iommu = iommu_domain_alloc(&platform_bus_type);
	if (!iommu)
		return NULL;

	iommu->geometry.aperture_start = va_start;
	iommu->geometry.aperture_end = va_end;

838
	DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
839 840 841

	aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
	if (IS_ERR(aspace)) {
842
		DRM_DEV_ERROR(gpu->dev->dev, "failed to init iommu: %ld\n",
843 844 845 846 847 848 849 850 851 852 853 854 855 856
			PTR_ERR(aspace));
		iommu_domain_free(iommu);
		return ERR_CAST(aspace);
	}

	ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
	if (ret) {
		msm_gem_address_space_put(aspace);
		return ERR_PTR(ret);
	}

	return aspace;
}

R
Rob Clark 已提交
857 858
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
859
		const char *name, struct msm_gpu_config *config)
R
Rob Clark 已提交
860
{
861 862 863
	int i, ret, nr_rings = config->nr_rings;
	void *memptrs;
	uint64_t memptrs_iova;
R
Rob Clark 已提交
864

R
Rob Clark 已提交
865 866 867
	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);

R
Rob Clark 已提交
868 869 870 871 872 873
	gpu->dev = drm;
	gpu->funcs = funcs;
	gpu->name = name;

	INIT_LIST_HEAD(&gpu->active_list);
	INIT_WORK(&gpu->retire_work, retire_worker);
874 875
	INIT_WORK(&gpu->recover_work, recover_worker);

876

877
	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
R
Rob Clark 已提交
878

R
Rob Clark 已提交
879 880
	spin_lock_init(&gpu->perf_lock);

R
Rob Clark 已提交
881 882

	/* Map registers: */
883
	gpu->mmio = msm_ioremap(pdev, config->ioname, name);
R
Rob Clark 已提交
884 885 886 887 888 889
	if (IS_ERR(gpu->mmio)) {
		ret = PTR_ERR(gpu->mmio);
		goto fail;
	}

	/* Get Interrupt: */
890
	gpu->irq = platform_get_irq_byname(pdev, config->irqname);
R
Rob Clark 已提交
891 892
	if (gpu->irq < 0) {
		ret = gpu->irq;
893
		DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret);
R
Rob Clark 已提交
894 895 896 897 898 899
		goto fail;
	}

	ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
			IRQF_TRIGGER_HIGH, gpu->name, gpu);
	if (ret) {
900
		DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
R
Rob Clark 已提交
901 902 903
		goto fail;
	}

904 905 906
	ret = get_clocks(pdev, gpu);
	if (ret)
		goto fail;
R
Rob Clark 已提交
907

908
	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
R
Rob Clark 已提交
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
	DBG("ebi1_clk: %p", gpu->ebi1_clk);
	if (IS_ERR(gpu->ebi1_clk))
		gpu->ebi1_clk = NULL;

	/* Acquire regulators: */
	gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
	DBG("gpu_reg: %p", gpu->gpu_reg);
	if (IS_ERR(gpu->gpu_reg))
		gpu->gpu_reg = NULL;

	gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
	DBG("gpu_cx: %p", gpu->gpu_cx);
	if (IS_ERR(gpu->gpu_cx))
		gpu->gpu_cx = NULL;

924 925 926
	gpu->pdev = pdev;
	platform_set_drvdata(pdev, gpu);

927 928
	msm_devfreq_init(gpu);

929 930 931 932
	gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
		config->va_start, config->va_end);

	if (gpu->aspace == NULL)
933
		DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
934 935 936
	else if (IS_ERR(gpu->aspace)) {
		ret = PTR_ERR(gpu->aspace);
		goto fail;
R
Rob Clark 已提交
937
	}
938

939 940
	memptrs = msm_gem_kernel_new(drm,
		sizeof(struct msm_rbmemptrs) * nr_rings,
J
Jordan Crouse 已提交
941
		MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
942
		&memptrs_iova);
J
Jordan Crouse 已提交
943

944 945
	if (IS_ERR(memptrs)) {
		ret = PTR_ERR(memptrs);
946
		DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret);
J
Jordan Crouse 已提交
947 948 949
		goto fail;
	}

950
	if (nr_rings > ARRAY_SIZE(gpu->rb)) {
951
		DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
952 953
			ARRAY_SIZE(gpu->rb));
		nr_rings = ARRAY_SIZE(gpu->rb);
R
Rob Clark 已提交
954 955
	}

956 957 958 959 960 961
	/* Create ringbuffer(s): */
	for (i = 0; i < nr_rings; i++) {
		gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);

		if (IS_ERR(gpu->rb[i])) {
			ret = PTR_ERR(gpu->rb[i]);
962
			DRM_DEV_ERROR(drm->dev,
963 964 965 966 967 968 969 970 971 972
				"could not create ringbuffer %d: %d\n", i, ret);
			goto fail;
		}

		memptrs += sizeof(struct msm_rbmemptrs);
		memptrs_iova += sizeof(struct msm_rbmemptrs);
	}

	gpu->nr_rings = nr_rings;

R
Rob Clark 已提交
973 974 975
	return 0;

fail:
976 977 978 979 980
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++)  {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
	}

J
Jordan Crouse 已提交
981 982 983
	if (gpu->memptrs_bo) {
		msm_gem_put_vaddr(gpu->memptrs_bo);
		msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
984
		drm_gem_object_put_unlocked(gpu->memptrs_bo);
J
Jordan Crouse 已提交
985 986
	}

987
	platform_set_drvdata(pdev, NULL);
R
Rob Clark 已提交
988 989 990 991 992
	return ret;
}

void msm_gpu_cleanup(struct msm_gpu *gpu)
{
993 994
	int i;

R
Rob Clark 已提交
995 996 997 998
	DBG("%s", gpu->name);

	WARN_ON(!list_empty(&gpu->active_list));

999 1000 1001
	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
		msm_ringbuffer_destroy(gpu->rb[i]);
		gpu->rb[i] = NULL;
R
Rob Clark 已提交
1002
	}
J
Jordan Crouse 已提交
1003 1004 1005 1006

	if (gpu->memptrs_bo) {
		msm_gem_put_vaddr(gpu->memptrs_bo);
		msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
1007
		drm_gem_object_put_unlocked(gpu->memptrs_bo);
J
Jordan Crouse 已提交
1008 1009 1010
	}

	if (!IS_ERR_OR_NULL(gpu->aspace)) {
1011 1012 1013 1014
		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
			NULL, 0);
		msm_gem_address_space_put(gpu->aspace);
	}
R
Rob Clark 已提交
1015
}