intel_engine_cs.c 43.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright © 2016 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

25 26
#include <drm/drm_print.h>

27 28
#include "gem/i915_gem_context.h"

29
#include "i915_drv.h"
30

31 32
#include "gt/intel_gt.h"

33
#include "intel_engine.h"
34
#include "intel_engine_pm.h"
35
#include "intel_engine_pool.h"
36
#include "intel_engine_user.h"
37
#include "intel_context.h"
38
#include "intel_lrc.h"
39
#include "intel_reset.h"
40

41 42 43 44 45 46 47 48 49
/* Haswell does have the CXT_SIZE register however it does not appear to be
 * valid. Now, docs explain in dwords what is in the context object. The full
 * size is 70720 bytes, however, the power context and execlist context will
 * never be saved (power context is stored elsewhere, and execlists don't work
 * on HSW) - so the final size, including the extra state required for the
 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
 */
#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)

50
#define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
51 52
#define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
#define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
53
#define GEN10_LR_CONTEXT_RENDER_SIZE	(18 * PAGE_SIZE)
54
#define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
55 56 57

#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)

58
#define MAX_MMIO_BASES 3
59
struct engine_info {
60
	unsigned int hw_id;
61 62
	u8 class;
	u8 instance;
63 64 65 66 67
	/* mmio bases table *must* be sorted in reverse gen order */
	struct engine_mmio_base {
		u32 gen : 8;
		u32 base : 24;
	} mmio_bases[MAX_MMIO_BASES];
68 69 70
};

static const struct engine_info intel_engines[] = {
71 72
	[RCS0] = {
		.hw_id = RCS0_HW,
73 74
		.class = RENDER_CLASS,
		.instance = 0,
75 76 77
		.mmio_bases = {
			{ .gen = 1, .base = RENDER_RING_BASE }
		},
78
	},
79 80
	[BCS0] = {
		.hw_id = BCS0_HW,
81 82
		.class = COPY_ENGINE_CLASS,
		.instance = 0,
83 84 85
		.mmio_bases = {
			{ .gen = 6, .base = BLT_RING_BASE }
		},
86
	},
87 88
	[VCS0] = {
		.hw_id = VCS0_HW,
89 90
		.class = VIDEO_DECODE_CLASS,
		.instance = 0,
91 92 93 94 95
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_BSD_RING_BASE },
			{ .gen = 6, .base = GEN6_BSD_RING_BASE },
			{ .gen = 4, .base = BSD_RING_BASE }
		},
96
	},
97 98
	[VCS1] = {
		.hw_id = VCS1_HW,
99 100
		.class = VIDEO_DECODE_CLASS,
		.instance = 1,
101 102 103 104
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_BSD2_RING_BASE },
			{ .gen = 8, .base = GEN8_BSD2_RING_BASE }
		},
105
	},
106 107
	[VCS2] = {
		.hw_id = VCS2_HW,
108 109
		.class = VIDEO_DECODE_CLASS,
		.instance = 2,
110 111 112
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_BSD3_RING_BASE }
		},
113
	},
114 115
	[VCS3] = {
		.hw_id = VCS3_HW,
116 117
		.class = VIDEO_DECODE_CLASS,
		.instance = 3,
118 119 120
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_BSD4_RING_BASE }
		},
121
	},
122 123
	[VECS0] = {
		.hw_id = VECS0_HW,
124 125
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 0,
126 127 128 129
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_VEBOX_RING_BASE },
			{ .gen = 7, .base = VEBOX_RING_BASE }
		},
130
	},
131 132
	[VECS1] = {
		.hw_id = VECS1_HW,
133 134
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 1,
135 136 137
		.mmio_bases = {
			{ .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
		},
138
	},
139 140
};

141
/**
142
 * intel_engine_context_size() - return the size of the context for an engine
143 144 145 146 147 148 149 150 151 152 153 154
 * @dev_priv: i915 device private
 * @class: engine class
 *
 * Each engine class may require a different amount of space for a context
 * image.
 *
 * Return: size (in bytes) of an engine class specific context image
 *
 * Note: this size includes the HWSP, which is part of the context image
 * in LRC mode, but does not include the "shared data page" used with
 * GuC submission. The caller should account for this if using the GuC.
 */
155
u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
156 157 158 159 160 161 162 163 164 165
{
	u32 cxt_size;

	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);

	switch (class) {
	case RENDER_CLASS:
		switch (INTEL_GEN(dev_priv)) {
		default:
			MISSING_CASE(INTEL_GEN(dev_priv));
166
			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
167
		case 12:
168 169
		case 11:
			return GEN11_LR_CONTEXT_RENDER_SIZE;
170
		case 10:
O
Oscar Mateo 已提交
171
			return GEN10_LR_CONTEXT_RENDER_SIZE;
172 173 174
		case 9:
			return GEN9_LR_CONTEXT_RENDER_SIZE;
		case 8:
175
			return GEN8_LR_CONTEXT_RENDER_SIZE;
176 177 178 179 180 181 182 183 184 185 186 187
		case 7:
			if (IS_HASWELL(dev_priv))
				return HSW_CXT_TOTAL_SIZE;

			cxt_size = I915_READ(GEN7_CXT_SIZE);
			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
					PAGE_SIZE);
		case 6:
			cxt_size = I915_READ(CXT_SIZE);
			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
					PAGE_SIZE);
		case 5:
188
		case 4:
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
			/*
			 * There is a discrepancy here between the size reported
			 * by the register and the size of the context layout
			 * in the docs. Both are described as authorative!
			 *
			 * The discrepancy is on the order of a few cachelines,
			 * but the total is under one page (4k), which is our
			 * minimum allocation anyway so it should all come
			 * out in the wash.
			 */
			cxt_size = I915_READ(CXT_SIZE) + 1;
			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
					 INTEL_GEN(dev_priv),
					 cxt_size * 64,
					 cxt_size - 1);
			return round_up(cxt_size * 64, PAGE_SIZE);
205 206 207 208 209 210 211 212 213
		case 3:
		case 2:
		/* For the special day when i810 gets merged. */
		case 1:
			return 0;
		}
		break;
	default:
		MISSING_CASE(class);
214
		/* fall through */
215 216 217 218 219 220 221 222 223
	case VIDEO_DECODE_CLASS:
	case VIDEO_ENHANCEMENT_CLASS:
	case COPY_ENGINE_CLASS:
		if (INTEL_GEN(dev_priv) < 8)
			return 0;
		return GEN8_LR_CONTEXT_OTHER_SIZE;
	}
}

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
static u32 __engine_mmio_base(struct drm_i915_private *i915,
			      const struct engine_mmio_base *bases)
{
	int i;

	for (i = 0; i < MAX_MMIO_BASES; i++)
		if (INTEL_GEN(i915) >= bases[i].gen)
			break;

	GEM_BUG_ON(i == MAX_MMIO_BASES);
	GEM_BUG_ON(!bases[i].base);

	return bases[i].base;
}

239
static void __sprint_engine_name(struct intel_engine_cs *engine)
240
{
241 242 243 244 245 246 247 248
	/*
	 * Before we know what the uABI name for this engine will be,
	 * we still would like to keep track of this engine in the debug logs.
	 * We throw in a ' here as a reminder that this isn't its final name.
	 */
	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
			     intel_engine_class_repr(engine->class),
			     engine->instance) >= sizeof(engine->name));
249 250
}

251 252 253 254 255 256
void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
{
	/*
	 * Though they added more rings on g4x/ilk, they did not add
	 * per-engine HWSTAM until gen6.
	 */
257
	if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
258 259
		return;

260 261
	if (INTEL_GEN(engine->i915) >= 3)
		ENGINE_WRITE(engine, RING_HWSTAM, mask);
262
	else
263
		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
264 265 266 267 268 269 270 271
}

static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
{
	/* Mask off all writes into the unknown HWSP */
	intel_engine_set_hwsp_writemask(engine, ~0u);
}

272
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
273 274
{
	const struct engine_info *info = &intel_engines[id];
275 276
	struct intel_engine_cs *engine;

277 278 279
	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));

280 281 282
	if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
		return -EINVAL;

283
	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
284 285
		return -EINVAL;

286
	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
287 288
		return -EINVAL;

289
	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
290 291
		return -EINVAL;

292 293 294
	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
	if (!engine)
		return -ENOMEM;
295

296 297
	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);

298
	engine->id = id;
299
	engine->legacy_idx = INVALID_ENGINE;
300
	engine->mask = BIT(id);
301 302 303
	engine->i915 = gt->i915;
	engine->gt = gt;
	engine->uncore = gt->uncore;
304
	engine->hw_id = engine->guc_id = info->hw_id;
305
	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
306

307 308
	engine->class = info->class;
	engine->instance = info->instance;
309
	__sprint_engine_name(engine);
310

311 312 313 314 315 316
	/*
	 * To be overridden by the backend on setup. However to facilitate
	 * cleanup on error during setup, we always provide the destroy vfunc.
	 */
	engine->destroy = (typeof(engine->destroy))kfree;

317
	engine->context_size = intel_engine_context_size(gt->i915,
318
							 engine->class);
319 320
	if (WARN_ON(engine->context_size > BIT(20)))
		engine->context_size = 0;
321
	if (engine->context_size)
322
		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
323

324 325 326
	/* Nothing to do here, execute in order of dependencies */
	engine->schedule = NULL;

327
	seqlock_init(&engine->stats.lock);
328

329 330
	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);

331 332 333
	/* Scrub mmio state on takeover */
	intel_engine_sanitize_mmio(engine);

334
	gt->engine_class[info->class][info->instance] = engine;
335
	gt->engine[id] = engine;
336 337 338 339

	intel_engine_add_user(engine);
	gt->i915->engine[id] = engine;

340
	return 0;
341 342
}

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
{
	struct drm_i915_private *i915 = engine->i915;

	if (engine->class == VIDEO_DECODE_CLASS) {
		/*
		 * HEVC support is present on first engine instance
		 * before Gen11 and on all instances afterwards.
		 */
		if (INTEL_GEN(i915) >= 11 ||
		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
			engine->uabi_capabilities |=
				I915_VIDEO_CLASS_CAPABILITY_HEVC;

		/*
		 * SFC block is present only on even logical engine
		 * instances.
		 */
		if ((INTEL_GEN(i915) >= 11 &&
		     RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
			engine->uabi_capabilities |=
				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
		if (INTEL_GEN(i915) >= 9)
			engine->uabi_capabilities |=
				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
	}
}

static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

	for_each_engine(engine, i915, id)
		__setup_engine_capabilities(engine);
}

382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
/**
 * intel_engines_cleanup() - free the resources allocated for Command Streamers
 * @i915: the i915 devic
 */
void intel_engines_cleanup(struct drm_i915_private *i915)
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

	for_each_engine(engine, i915, id) {
		engine->destroy(engine);
		i915->engine[id] = NULL;
	}
}

397
/**
398
 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
399
 * @i915: the i915 device
400 401 402
 *
 * Return: non-zero if the initialization failed.
 */
403
int intel_engines_init_mmio(struct drm_i915_private *i915)
404
{
405 406
	struct intel_device_info *device_info = mkwrite_device_info(i915);
	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
407
	unsigned int mask = 0;
408
	unsigned int i;
409
	int err;
410

411 412
	WARN_ON(engine_mask == 0);
	WARN_ON(engine_mask &
413
		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
414

415
	if (i915_inject_probe_failure(i915))
416 417
		return -ENODEV;

418
	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
419
		if (!HAS_ENGINE(i915, i))
420 421
			continue;

422
		err = intel_engine_setup(&i915->gt, i);
423 424 425
		if (err)
			goto cleanup;

426
		mask |= BIT(i);
427 428 429 430 431 432 433
	}

	/*
	 * Catch failures to update intel_engines table when the new engines
	 * are added to the driver by a warning and disabling the forgotten
	 * engines.
	 */
434 435
	if (WARN_ON(mask != engine_mask))
		device_info->engine_mask = mask;
436

437
	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
438

439
	intel_gt_check_and_clear_faults(&i915->gt);
440

441 442
	intel_setup_engine_capabilities(i915);

443 444 445
	return 0;

cleanup:
446
	intel_engines_cleanup(i915);
447 448 449 450
	return err;
}

/**
451
 * intel_engines_init() - init the Engine Command Streamers
452
 * @i915: i915 device private
453 454 455
 *
 * Return: non-zero if the initialization failed.
 */
456
int intel_engines_init(struct drm_i915_private *i915)
457
{
458
	int (*init)(struct intel_engine_cs *engine);
459
	struct intel_engine_cs *engine;
460
	enum intel_engine_id id;
461
	int err;
462

463 464 465 466
	if (HAS_EXECLISTS(i915))
		init = intel_execlists_submission_init;
	else
		init = intel_ring_submission_init;
467

468
	for_each_engine(engine, i915, id) {
469
		err = init(engine);
470
		if (err)
471 472 473 474 475 476
			goto cleanup;
	}

	return 0;

cleanup:
477
	intel_engines_cleanup(i915);
478
	return err;
479 480
}

481
void intel_engine_init_execlists(struct intel_engine_cs *engine)
482 483 484
{
	struct intel_engine_execlists * const execlists = &engine->execlists;

485
	execlists->port_mask = 1;
486
	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
487 488
	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);

489 490 491 492
	memset(execlists->pending, 0, sizeof(execlists->pending));
	execlists->active =
		memset(execlists->inflight, 0, sizeof(execlists->inflight));

493
	execlists->queue_priority_hint = INT_MIN;
494
	execlists->queue = RB_ROOT_CACHED;
495 496
}

497
static void cleanup_status_page(struct intel_engine_cs *engine)
498
{
499 500
	struct i915_vma *vma;

501 502 503
	/* Prevent writes into HWSP after returning the page to the system */
	intel_engine_set_hwsp_writemask(engine, ~0u);

504 505 506
	vma = fetch_and_zero(&engine->status_page.vma);
	if (!vma)
		return;
507

508 509 510 511
	if (!HWS_NEEDS_PHYSICAL(engine->i915))
		i915_vma_unpin(vma);

	i915_gem_object_unpin_map(vma->obj);
512
	i915_gem_object_put(vma->obj);
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
}

static int pin_ggtt_status_page(struct intel_engine_cs *engine,
				struct i915_vma *vma)
{
	unsigned int flags;

	flags = PIN_GLOBAL;
	if (!HAS_LLC(engine->i915))
		/*
		 * On g33, we cannot place HWS above 256MiB, so
		 * restrict its pinning to the low mappable arena.
		 * Though this restriction is not documented for
		 * gen4, gen5, or byt, they also behave similarly
		 * and hang if the HWS is placed at the top of the
		 * GTT. To generalise, it appears that all !llc
		 * platforms have issues with us placing the HWS
		 * above the mappable region (even though we never
		 * actually map it).
		 */
		flags |= PIN_MAPPABLE;
	else
		flags |= PIN_HIGH;
536

537
	return i915_vma_pin(vma, 0, 0, flags);
538 539 540 541 542 543 544 545 546
}

static int init_status_page(struct intel_engine_cs *engine)
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;
	void *vaddr;
	int ret;

547 548 549 550 551 552 553
	/*
	 * Though the HWS register does support 36bit addresses, historically
	 * we have had hangs and corruption reported due to wild writes if
	 * the HWS is placed above 4G. We only allow objects to be allocated
	 * in GFP_DMA32 for i965, and no earlier physical address users had
	 * access to more than 4G.
	 */
554 555 556 557 558 559
	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
	if (IS_ERR(obj)) {
		DRM_ERROR("Failed to allocate status page\n");
		return PTR_ERR(obj);
	}

560
	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
561

562
	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
563 564 565 566 567 568 569 570
	if (IS_ERR(vma)) {
		ret = PTR_ERR(vma);
		goto err;
	}

	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
	if (IS_ERR(vaddr)) {
		ret = PTR_ERR(vaddr);
571
		goto err;
572 573
	}

574
	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
575
	engine->status_page.vma = vma;
576 577 578 579 580 581 582

	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
		ret = pin_ggtt_status_page(engine, vma);
		if (ret)
			goto err_unpin;
	}

583 584 585
	return 0;

err_unpin:
586
	i915_gem_object_unpin_map(obj);
587 588 589 590 591
err:
	i915_gem_object_put(obj);
	return ret;
}

592
static int intel_engine_setup_common(struct intel_engine_cs *engine)
593 594 595
{
	int err;

596 597
	init_llist_head(&engine->barrier_tasks);

598 599 600 601
	err = init_status_page(engine);
	if (err)
		return err;

602
	intel_engine_init_active(engine, ENGINE_PHYSICAL);
603
	intel_engine_init_breadcrumbs(engine);
604
	intel_engine_init_execlists(engine);
605 606
	intel_engine_init_hangcheck(engine);
	intel_engine_init_cmd_parser(engine);
607
	intel_engine_init__pm(engine);
608

609 610
	intel_engine_pool_init(&engine->pool);

611 612 613 614
	/* Use the whole device by default */
	engine->sseu =
		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);

615 616 617 618
	intel_engine_init_workarounds(engine);
	intel_engine_init_whitelist(engine);
	intel_engine_init_ctx_wa(engine);

619 620 621
	return 0;
}

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
/**
 * intel_engines_setup- setup engine state not requiring hw access
 * @i915: Device to setup.
 *
 * Initializes engine structure members shared between legacy and execlists
 * submission modes which do not require hardware access.
 *
 * Typically done early in the submission mode specific engine setup stage.
 */
int intel_engines_setup(struct drm_i915_private *i915)
{
	int (*setup)(struct intel_engine_cs *engine);
	struct intel_engine_cs *engine;
	enum intel_engine_id id;
	int err;

	if (HAS_EXECLISTS(i915))
		setup = intel_execlists_submission_setup;
	else
		setup = intel_ring_submission_setup;

	for_each_engine(engine, i915, id) {
		err = intel_engine_setup_common(engine);
		if (err)
			goto cleanup;

		err = setup(engine);
		if (err)
			goto cleanup;

652 653 654
		/* We expect the backend to take control over its state */
		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);

655 656 657 658 659 660
		GEM_BUG_ON(!engine->cops);
	}

	return 0;

cleanup:
661
	intel_engines_cleanup(i915);
662 663 664
	return err;
}

665 666
struct measure_breadcrumb {
	struct i915_request rq;
667
	struct intel_timeline timeline;
668 669 670 671
	struct intel_ring ring;
	u32 cs[1024];
};

672
static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
673 674
{
	struct measure_breadcrumb *frame;
675
	int dw = -ENOMEM;
676

677
	GEM_BUG_ON(!engine->gt->scratch);
678 679 680 681 682

	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
	if (!frame)
		return -ENOMEM;

683 684 685
	if (intel_timeline_init(&frame->timeline,
				engine->gt,
				engine->status_page.vma))
686
		goto out_frame;
687

688 689
	mutex_lock(&frame->timeline.mutex);

690 691 692 693 694 695 696 697
	frame->ring.vaddr = frame->cs;
	frame->ring.size = sizeof(frame->cs);
	frame->ring.effective_size = frame->ring.size;
	intel_ring_update_space(&frame->ring);

	frame->rq.i915 = engine->i915;
	frame->rq.engine = engine;
	frame->rq.ring = &frame->ring;
698
	rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
699

700
	dw = intel_timeline_pin(&frame->timeline);
701 702 703
	if (dw < 0)
		goto out_timeline;

704
	spin_lock_irq(&engine->active.lock);
705
	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
706 707
	spin_unlock_irq(&engine->active.lock);

708
	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
709

710
	intel_timeline_unpin(&frame->timeline);
711

712
out_timeline:
713
	mutex_unlock(&frame->timeline.mutex);
714
	intel_timeline_fini(&frame->timeline);
715 716
out_frame:
	kfree(frame);
717 718 719
	return dw;
}

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
void
intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
{
	INIT_LIST_HEAD(&engine->active.requests);

	spin_lock_init(&engine->active.lock);
	lockdep_set_subclass(&engine->active.lock, subclass);

	/*
	 * Due to an interesting quirk in lockdep's internal debug tracking,
	 * after setting a subclass we must ensure the lock is used. Otherwise,
	 * nr_unused_locks is incremented once too often.
	 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	local_irq_disable();
	lock_map_acquire(&engine->active.lock.dep_map);
	lock_map_release(&engine->active.lock.dep_map);
	local_irq_enable();
#endif
}

741 742 743
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
744
	static struct lock_class_key kernel;
745 746 747 748 749 750 751
	struct intel_context *ce;
	int err;

	ce = intel_context_create(engine->i915->kernel_context, engine);
	if (IS_ERR(ce))
		return ce;

752 753
	ce->ring = __intel_context_ring_size(SZ_4K);

754 755 756 757 758 759
	err = intel_context_pin(ce);
	if (err) {
		intel_context_put(ce);
		return ERR_PTR(err);
	}

760 761 762 763 764 765 766 767
	/*
	 * Give our perma-pinned kernel timelines a separate lockdep class,
	 * so that we can use them from within the normal user timelines
	 * should we need to inject GPU operations during their request
	 * construction.
	 */
	lockdep_set_class(&ce->timeline->mutex, &kernel);

768 769 770
	return ce;
}

771 772 773 774 775 776 777 778 779 780 781 782 783
/**
 * intel_engines_init_common - initialize cengine state which might require hw access
 * @engine: Engine to initialize.
 *
 * Initializes @engine@ structure members shared between legacy and execlists
 * submission modes which do require hardware access.
 *
 * Typcally done at later stages of submission mode specific engine setup.
 *
 * Returns zero on success or an error code on failure.
 */
int intel_engine_init_common(struct intel_engine_cs *engine)
{
784
	struct intel_context *ce;
785 786
	int ret;

787 788
	engine->set_default_submission(engine);

789 790
	/*
	 * We may need to do things with the shrinker which
791 792 793 794 795 796
	 * require us to immediately switch back to the default
	 * context. This can cause a problem as pinning the
	 * default context also requires GTT space which may not
	 * be available. To avoid this we always pin the default
	 * context.
	 */
797 798 799 800 801
	ce = create_kernel_context(engine);
	if (IS_ERR(ce))
		return PTR_ERR(ce);

	engine->kernel_context = ce;
802

803
	ret = measure_breadcrumb_dw(engine);
804
	if (ret < 0)
805
		goto err_unpin;
806

807
	engine->emit_fini_breadcrumb_dw = ret;
808

809
	return 0;
810

811
err_unpin:
812 813
	intel_context_unpin(ce);
	intel_context_put(ce);
814
	return ret;
815
}
816 817 818 819 820 821 822 823 824 825

/**
 * intel_engines_cleanup_common - cleans up the engine state created by
 *                                the common initiailizers.
 * @engine: Engine to cleanup.
 *
 * This cleans up everything created by the common helpers.
 */
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
826 827
	GEM_BUG_ON(!list_empty(&engine->active.requests));

828
	cleanup_status_page(engine);
829

830
	intel_engine_pool_fini(&engine->pool);
831
	intel_engine_fini_breadcrumbs(engine);
832
	intel_engine_cleanup_cmd_parser(engine);
833

834 835 836
	if (engine->default_state)
		i915_gem_object_put(engine->default_state);

837 838 839 840
	if (engine->kernel_context) {
		intel_context_unpin(engine->kernel_context);
		intel_context_put(engine->kernel_context);
	}
841
	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
842

843
	intel_wa_list_free(&engine->ctx_wa_list);
844
	intel_wa_list_free(&engine->wa_list);
845
	intel_wa_list_free(&engine->whitelist);
846
}
847

848
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
849
{
850 851
	struct drm_i915_private *i915 = engine->i915;

852 853
	u64 acthd;

854 855 856 857
	if (INTEL_GEN(i915) >= 8)
		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
	else if (INTEL_GEN(i915) >= 4)
		acthd = ENGINE_READ(engine, RING_ACTHD);
858
	else
859
		acthd = ENGINE_READ(engine, ACTHD);
860 861 862 863

	return acthd;
}

864
u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
865 866 867
{
	u64 bbaddr;

868 869
	if (INTEL_GEN(engine->i915) >= 8)
		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
870
	else
871
		bbaddr = ENGINE_READ(engine, RING_BBADDR);
872 873 874

	return bbaddr;
}
875

876 877
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
878
	struct intel_uncore *uncore = engine->uncore;
879 880 881 882
	const u32 base = engine->mmio_base;
	const i915_reg_t mode = RING_MI_MODE(base);
	int err;

883
	if (INTEL_GEN(engine->i915) < 3)
884 885 886 887
		return -ENODEV;

	GEM_TRACE("%s\n", engine->name);

888
	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
889 890

	err = 0;
891
	if (__intel_wait_for_register_fw(uncore,
892 893 894 895 896 897 898 899
					 mode, MODE_IDLE, MODE_IDLE,
					 1000, 0,
					 NULL)) {
		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
		err = -ETIMEDOUT;
	}

	/* A final mmio read to let GPU writes be hopefully flushed to memory */
900
	intel_uncore_posting_read_fw(uncore, mode);
901 902 903 904

	return err;
}

905 906 907 908
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
{
	GEM_TRACE("%s\n", engine->name);

909
	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
910 911
}

912 913 914 915 916 917 918 919 920 921 922
const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
{
	switch (type) {
	case I915_CACHE_NONE: return " uncached";
	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
	case I915_CACHE_L3_LLC: return " L3+LLC";
	case I915_CACHE_WT: return " WT";
	default: return "";
	}
}

923 924 925
static u32
read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
		  i915_reg_t reg)
926
{
927 928
	struct drm_i915_private *i915 = engine->i915;
	struct intel_uncore *uncore = engine->uncore;
929
	u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
930 931
	enum forcewake_domains fw_domains;

932
	if (INTEL_GEN(i915) >= 11) {
933 934
		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
		mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
935
	} else {
936 937
		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
		mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
938 939
	}

940
	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
941
						    FW_REG_READ);
942
	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
943 944 945
						     GEN8_MCR_SELECTOR,
						     FW_REG_READ | FW_REG_WRITE);

946 947
	spin_lock_irq(&uncore->lock);
	intel_uncore_forcewake_get__locked(uncore, fw_domains);
948

949
	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
950

951 952
	mcr &= ~mcr_mask;
	mcr |= mcr_ss;
953
	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
954

955
	val = intel_uncore_read_fw(uncore, reg);
956

957 958
	mcr &= ~mcr_mask;
	mcr |= old_mcr & mcr_mask;
959

960
	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
961

962 963
	intel_uncore_forcewake_put__locked(uncore, fw_domains);
	spin_unlock_irq(&uncore->lock);
964

965
	return val;
966 967 968 969 970 971
}

/* NB: please notice the memset */
void intel_engine_get_instdone(struct intel_engine_cs *engine,
			       struct intel_instdone *instdone)
{
972
	struct drm_i915_private *i915 = engine->i915;
973
	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
974
	struct intel_uncore *uncore = engine->uncore;
975 976 977 978 979 980
	u32 mmio_base = engine->mmio_base;
	int slice;
	int subslice;

	memset(instdone, 0, sizeof(*instdone));

981
	switch (INTEL_GEN(i915)) {
982
	default:
983 984
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
985

986
		if (engine->id != RCS0)
987 988
			break;

989 990
		instdone->slice_common =
			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
991
		for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
992
			instdone->sampler[slice][subslice] =
993
				read_subslice_reg(engine, slice, subslice,
994 995
						  GEN7_SAMPLER_INSTDONE);
			instdone->row[slice][subslice] =
996
				read_subslice_reg(engine, slice, subslice,
997 998 999 1000
						  GEN7_ROW_INSTDONE);
		}
		break;
	case 7:
1001 1002
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1003

1004
		if (engine->id != RCS0)
1005 1006
			break;

1007 1008 1009 1010 1011 1012
		instdone->slice_common =
			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
		instdone->sampler[0][0] =
			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
		instdone->row[0][0] =
			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1013 1014 1015 1016 1017

		break;
	case 6:
	case 5:
	case 4:
1018 1019
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1020
		if (engine->id == RCS0)
1021
			/* HACK: Using the wrong struct member */
1022 1023
			instdone->slice_common =
				intel_uncore_read(uncore, GEN4_INSTDONE1);
1024 1025 1026
		break;
	case 3:
	case 2:
1027
		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1028 1029 1030
		break;
	}
}
1031

1032 1033 1034 1035
static bool ring_is_idle(struct intel_engine_cs *engine)
{
	bool idle = true;

1036 1037 1038
	if (I915_SELFTEST_ONLY(!engine->mmio_base))
		return true;

1039
	if (!intel_engine_pm_get_if_awake(engine))
1040
		return true;
1041

1042
	/* First check that no commands are left in the ring */
1043 1044
	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1045
		idle = false;
1046

1047
	/* No bit for gen2, so assume the CS parser is idle */
1048
	if (INTEL_GEN(engine->i915) > 2 &&
1049
	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1050 1051
		idle = false;

1052
	intel_engine_pm_put(engine);
1053 1054 1055 1056

	return idle;
}

1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
void intel_engine_flush_submission(struct intel_engine_cs *engine)
{
	struct tasklet_struct *t = &engine->execlists.tasklet;

	if (__tasklet_is_scheduled(t)) {
		local_bh_disable();
		if (tasklet_trylock(t)) {
			/* Must wait for any GPU reset in progress. */
			if (__tasklet_is_enabled(t))
				t->func(t->data);
			tasklet_unlock(t);
		}
		local_bh_enable();
	}

	/* Otherwise flush the tasklet if it was running on another cpu */
	tasklet_unlock_wait(t);
}

1076 1077 1078 1079 1080 1081 1082 1083 1084
/**
 * intel_engine_is_idle() - Report if the engine has finished process all work
 * @engine: the intel_engine_cs
 *
 * Return true if there are no requests pending, nothing left to be submitted
 * to hardware, and that the engine is idle.
 */
bool intel_engine_is_idle(struct intel_engine_cs *engine)
{
1085
	/* More white lies, if wedged, hw state is inconsistent */
1086
	if (intel_gt_is_wedged(engine->gt))
1087 1088
		return true;

1089
	if (!intel_engine_pm_is_awake(engine))
1090 1091
		return true;

1092
	/* Waiting to drain ELSP? */
1093
	if (execlists_active(&engine->execlists)) {
1094
		synchronize_hardirq(engine->i915->drm.pdev->irq);
1095

1096
		intel_engine_flush_submission(engine);
1097

1098
		if (execlists_active(&engine->execlists))
1099 1100
			return false;
	}
1101

1102
	/* ELSP is empty, but there are ready requests? E.g. after reset */
1103
	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1104 1105
		return false;

1106
	/* Ring stopped? */
1107
	return ring_is_idle(engine);
1108 1109
}

1110
bool intel_engines_are_idle(struct intel_gt *gt)
1111 1112 1113 1114
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

1115 1116
	/*
	 * If the driver is wedged, HW state may be very inconsistent and
1117 1118
	 * report that it is still busy, even though we have stopped using it.
	 */
1119
	if (intel_gt_is_wedged(gt))
1120 1121
		return true;

1122
	/* Already parked (and passed an idleness test); must still be idle */
1123
	if (!READ_ONCE(gt->awake))
1124 1125
		return true;

1126
	for_each_engine(engine, gt->i915, id) {
1127 1128 1129 1130 1131 1132 1133
		if (!intel_engine_is_idle(engine))
			return false;
	}

	return true;
}

1134
void intel_engines_reset_default_submission(struct intel_gt *gt)
1135 1136 1137 1138
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

1139
	for_each_engine(engine, gt->i915, id)
1140 1141 1142
		engine->set_default_submission(engine);
}

1143 1144 1145 1146 1147 1148 1149 1150
bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
{
	switch (INTEL_GEN(engine->i915)) {
	case 2:
		return false; /* uses physical not virtual addresses */
	case 3:
		/* maybe only uses physical not virtual addresses */
		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1151 1152
	case 4:
		return !IS_I965G(engine->i915); /* who knows! */
1153 1154 1155 1156 1157 1158 1159
	case 6:
		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
	default:
		return true;
	}
}

1160 1161 1162
static int print_sched_attr(struct drm_i915_private *i915,
			    const struct i915_sched_attr *attr,
			    char *buf, int x, int len)
1163 1164
{
	if (attr->priority == I915_PRIORITY_INVALID)
1165 1166 1167 1168
		return x;

	x += snprintf(buf + x, len - x,
		      " prio=%d", attr->priority);
1169

1170
	return x;
1171 1172
}

1173
static void print_request(struct drm_printer *m,
1174
			  struct i915_request *rq,
1175 1176
			  const char *prefix)
{
1177
	const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1178
	char buf[80] = "";
1179 1180 1181
	int x = 0;

	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1182

1183
	drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1184
		   prefix,
1185
		   rq->fence.context, rq->fence.seqno,
1186 1187 1188
		   i915_request_completed(rq) ? "!" :
		   i915_request_started(rq) ? "*" :
		   "",
1189 1190
		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
			    &rq->fence.flags) ? "+" :
1191
		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1192 1193
			    &rq->fence.flags) ? "-" :
		   "",
1194
		   buf,
1195
		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1196
		   name);
1197 1198
}

1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220
static void hexdump(struct drm_printer *m, const void *buf, size_t len)
{
	const size_t rowsize = 8 * sizeof(u32);
	const void *prev = NULL;
	bool skip = false;
	size_t pos;

	for (pos = 0; pos < len; pos += rowsize) {
		char line[128];

		if (prev && !memcmp(prev, buf + pos, rowsize)) {
			if (!skip) {
				drm_printf(m, "*\n");
				skip = true;
			}
			continue;
		}

		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
						rowsize, sizeof(u32),
						line, sizeof(line),
						false) >= sizeof(line));
1221
		drm_printf(m, "[%04zx] %s\n", pos, line);
1222 1223 1224 1225 1226 1227

		prev = buf + pos;
		skip = false;
	}
}

1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
static struct intel_timeline *get_timeline(struct i915_request *rq)
{
	struct intel_timeline *tl;

	/*
	 * Even though we are holding the engine->active.lock here, there
	 * is no control over the submission queue per-se and we are
	 * inspecting the active state at a random point in time, with an
	 * unknown queue. Play safe and make sure the timeline remains valid.
	 * (Only being used for pretty printing, one extra kref shouldn't
	 * cause a camel stampede!)
	 */
	rcu_read_lock();
	tl = rcu_dereference(rq->timeline);
	if (!kref_get_unless_zero(&tl->kref))
		tl = NULL;
	rcu_read_unlock();

	return tl;
}

1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
static const char *repr_timer(const struct timer_list *t)
{
	if (!READ_ONCE(t->expires))
		return "inactive";

	if (timer_pending(t))
		return "active";

	return "expired";
}

1260
static void intel_engine_print_registers(struct intel_engine_cs *engine,
1261
					 struct drm_printer *m)
1262 1263
{
	struct drm_i915_private *dev_priv = engine->i915;
1264
	struct intel_engine_execlists * const execlists = &engine->execlists;
1265 1266
	u64 addr;

1267
	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
1268
		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1269
	drm_printf(m, "\tRING_START: 0x%08x\n",
1270
		   ENGINE_READ(engine, RING_START));
1271
	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1272
		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1273
	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1274
		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1275
	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1276 1277
		   ENGINE_READ(engine, RING_CTL),
		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1278 1279
	if (INTEL_GEN(engine->i915) > 2) {
		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1280 1281
			   ENGINE_READ(engine, RING_MI_MODE),
			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1282
	}
1283 1284

	if (INTEL_GEN(dev_priv) >= 6) {
1285 1286
		drm_printf(m, "\tRING_IMR: %08x\n",
			   ENGINE_READ(engine, RING_IMR));
1287 1288
	}

1289 1290 1291 1292 1293 1294
	addr = intel_engine_get_active_head(engine);
	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
	addr = intel_engine_get_last_batch_head(engine);
	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
1295
	if (INTEL_GEN(dev_priv) >= 8)
1296
		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1297
	else if (INTEL_GEN(dev_priv) >= 4)
1298
		addr = ENGINE_READ(engine, RING_DMA_FADD);
1299
	else
1300
		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1301 1302 1303 1304
	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
	if (INTEL_GEN(dev_priv) >= 4) {
		drm_printf(m, "\tIPEIR: 0x%08x\n",
1305
			   ENGINE_READ(engine, RING_IPEIR));
1306
		drm_printf(m, "\tIPEHR: 0x%08x\n",
1307
			   ENGINE_READ(engine, RING_IPEHR));
1308
	} else {
1309 1310
		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1311
	}
1312

1313
	if (HAS_EXECLISTS(dev_priv)) {
1314
		struct i915_request * const *port, *rq;
1315 1316
		const u32 *hws =
			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1317
		const u8 num_entries = execlists->csb_size;
1318
		unsigned int idx;
1319
		u8 read, write;
1320

1321 1322 1323 1324 1325
		drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n",
			   yesno(test_bit(TASKLET_STATE_SCHED,
					  &engine->execlists.tasklet.state)),
			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
			   repr_timer(&engine->execlists.timer));
1326

1327 1328 1329
		read = execlists->csb_head;
		write = READ_ONCE(*execlists->csb_write);

1330 1331 1332 1333 1334
		drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
			   read, write, num_entries);

1335
		if (read >= num_entries)
1336
			read = 0;
1337
		if (write >= num_entries)
1338 1339
			write = 0;
		if (read > write)
1340
			write += num_entries;
1341
		while (read < write) {
1342 1343 1344
			idx = ++read % num_entries;
			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
				   idx, hws[idx * 2], hws[idx * 2 + 1]);
1345 1346
		}

1347
		execlists_active_lock_bh(execlists);
1348 1349 1350 1351 1352
		for (port = execlists->active; (rq = *port); port++) {
			char hdr[80];
			int len;

			len = snprintf(hdr, sizeof(hdr),
1353
				       "\t\tActive[%d]: ",
1354
				       (int)(port - execlists->active));
1355 1356 1357
			if (!i915_request_signaled(rq)) {
				struct intel_timeline *tl = get_timeline(rq);

1358 1359 1360
				len += snprintf(hdr + len, sizeof(hdr) - len,
						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
						i915_ggtt_offset(rq->ring->vma),
1361
						tl ? tl->hwsp_offset : 0,
1362
						hwsp_seqno(rq));
1363 1364 1365 1366

				if (tl)
					intel_timeline_put(tl);
			}
1367 1368 1369 1370
			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
			print_request(m, rq, hdr);
		}
		for (port = execlists->pending; (rq = *port); port++) {
1371
			struct intel_timeline *tl = get_timeline(rq);
1372
			char hdr[80];
1373

1374 1375 1376 1377
			snprintf(hdr, sizeof(hdr),
				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
				 (int)(port - execlists->pending),
				 i915_ggtt_offset(rq->ring->vma),
1378
				 tl ? tl->hwsp_offset : 0,
1379 1380
				 hwsp_seqno(rq));
			print_request(m, rq, hdr);
1381 1382 1383

			if (tl)
				intel_timeline_put(tl);
1384
		}
1385
		execlists_active_unlock_bh(execlists);
1386 1387
	} else if (INTEL_GEN(dev_priv) > 6) {
		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1388
			   ENGINE_READ(engine, RING_PP_DIR_BASE));
1389
		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1390
			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1391
		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1392
			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
1393
	}
1394 1395
}

1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
{
	void *ring;
	int size;

	drm_printf(m,
		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
		   rq->head, rq->postfix, rq->tail,
		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);

	size = rq->tail - rq->head;
	if (rq->tail < rq->head)
		size += rq->ring->size;

	ring = kmalloc(size, GFP_ATOMIC);
	if (ring) {
		const void *vaddr = rq->ring->vaddr;
		unsigned int head = rq->head;
		unsigned int len = 0;

		if (rq->tail < head) {
			len = rq->ring->size - head;
			memcpy(ring, vaddr + head, len);
			head = 0;
		}
		memcpy(ring + len, vaddr + head, size - len);

		hexdump(m, ring, size);
		kfree(ring);
	}
}

1429 1430 1431 1432 1433
void intel_engine_dump(struct intel_engine_cs *engine,
		       struct drm_printer *m,
		       const char *header, ...)
{
	struct i915_gpu_error * const error = &engine->i915->gpu_error;
1434
	struct i915_request *rq;
1435
	intel_wakeref_t wakeref;
1436
	unsigned long flags;
1437 1438 1439 1440 1441 1442 1443 1444 1445

	if (header) {
		va_list ap;

		va_start(ap, header);
		drm_vprintf(m, header, &ap);
		va_end(ap);
	}

1446
	if (intel_gt_is_wedged(engine->gt))
1447 1448
		drm_printf(m, "*** WEDGED ***\n");

1449
	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
1450
	drm_printf(m, "\tHangcheck: %d ms ago\n",
1451
		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1452 1453 1454 1455 1456 1457
	drm_printf(m, "\tReset count: %d (global %d)\n",
		   i915_reset_engine_count(error, engine),
		   i915_reset_count(error));

	drm_printf(m, "\tRequests:\n");

1458
	spin_lock_irqsave(&engine->active.lock, flags);
1459
	rq = intel_engine_find_active_request(engine);
1460
	if (rq) {
1461 1462
		struct intel_timeline *tl = get_timeline(rq);

1463
		print_request(m, rq, "\t\tactive ");
1464

1465
		drm_printf(m, "\t\tring->start:  0x%08x\n",
1466
			   i915_ggtt_offset(rq->ring->vma));
1467
		drm_printf(m, "\t\tring->head:   0x%08x\n",
1468
			   rq->ring->head);
1469
		drm_printf(m, "\t\tring->tail:   0x%08x\n",
1470
			   rq->ring->tail);
1471 1472 1473 1474
		drm_printf(m, "\t\tring->emit:   0x%08x\n",
			   rq->ring->emit);
		drm_printf(m, "\t\tring->space:  0x%08x\n",
			   rq->ring->space);
1475 1476 1477 1478 1479 1480

		if (tl) {
			drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
				   tl->hwsp_offset);
			intel_timeline_put(tl);
		}
1481 1482

		print_request_ring(m, rq);
1483 1484 1485 1486 1487

		if (rq->hw_context->lrc_reg_state) {
			drm_printf(m, "Logical Ring Context:\n");
			hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
		}
1488
	}
1489
	spin_unlock_irqrestore(&engine->active.lock, flags);
1490

1491
	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
1492
	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
1493
	if (wakeref) {
1494
		intel_engine_print_registers(engine, m);
1495
		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
1496 1497 1498
	} else {
		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
	}
1499

1500
	intel_execlists_show_requests(engine, m, print_request, 8);
1501

1502
	drm_printf(m, "HWSP:\n");
1503
	hexdump(m, engine->status_page.addr, PAGE_SIZE);
1504

1505
	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1506 1507

	intel_engine_print_breadcrumbs(engine, m);
1508 1509
}

1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
/**
 * intel_enable_engine_stats() - Enable engine busy tracking on engine
 * @engine: engine to enable stats collection
 *
 * Start collecting the engine busyness data for @engine.
 *
 * Returns 0 on success or a negative error code.
 */
int intel_enable_engine_stats(struct intel_engine_cs *engine)
{
1520
	struct intel_engine_execlists *execlists = &engine->execlists;
1521
	unsigned long flags;
1522
	int err = 0;
1523

1524
	if (!intel_engine_supports_stats(engine))
1525 1526
		return -ENODEV;

1527 1528
	execlists_active_lock_bh(execlists);
	write_seqlock_irqsave(&engine->stats.lock, flags);
1529 1530 1531 1532 1533 1534

	if (unlikely(engine->stats.enabled == ~0)) {
		err = -EBUSY;
		goto unlock;
	}

1535
	if (engine->stats.enabled++ == 0) {
1536 1537
		struct i915_request * const *port;
		struct i915_request *rq;
1538

1539
		engine->stats.enabled_at = ktime_get();
1540 1541

		/* XXX submission method oblivious? */
1542
		for (port = execlists->active; (rq = *port); port++)
1543
			engine->stats.active++;
1544 1545 1546

		for (port = execlists->pending; (rq = *port); port++) {
			/* Exclude any contexts already counted in active */
1547
			if (!intel_context_inflight_count(rq->hw_context))
1548
				engine->stats.active++;
1549 1550 1551 1552 1553
		}

		if (engine->stats.active)
			engine->stats.start = engine->stats.enabled_at;
	}
1554

1555
unlock:
1556 1557
	write_sequnlock_irqrestore(&engine->stats.lock, flags);
	execlists_active_unlock_bh(execlists);
1558

1559
	return err;
1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584
}

static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
{
	ktime_t total = engine->stats.total;

	/*
	 * If the engine is executing something at the moment
	 * add it to the total.
	 */
	if (engine->stats.active)
		total = ktime_add(total,
				  ktime_sub(ktime_get(), engine->stats.start));

	return total;
}

/**
 * intel_engine_get_busy_time() - Return current accumulated engine busyness
 * @engine: engine to report on
 *
 * Returns accumulated time @engine was busy since engine stats were enabled.
 */
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
{
1585
	unsigned int seq;
1586 1587
	ktime_t total;

1588 1589 1590 1591
	do {
		seq = read_seqbegin(&engine->stats.lock);
		total = __intel_engine_get_busy_time(engine);
	} while (read_seqretry(&engine->stats.lock, seq));
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605

	return total;
}

/**
 * intel_disable_engine_stats() - Disable engine busy tracking on engine
 * @engine: engine to disable stats collection
 *
 * Stops collecting the engine busyness data for @engine.
 */
void intel_disable_engine_stats(struct intel_engine_cs *engine)
{
	unsigned long flags;

1606
	if (!intel_engine_supports_stats(engine))
1607 1608
		return;

1609
	write_seqlock_irqsave(&engine->stats.lock, flags);
1610 1611 1612 1613 1614
	WARN_ON_ONCE(engine->stats.enabled == 0);
	if (--engine->stats.enabled == 0) {
		engine->stats.total = __intel_engine_get_busy_time(engine);
		engine->stats.active = 0;
	}
1615
	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1616 1617
}

1618 1619
static bool match_ring(struct i915_request *rq)
{
1620
	u32 ring = ENGINE_READ(rq->engine, RING_START);
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640

	return ring == i915_ggtt_offset(rq->ring->vma);
}

struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine)
{
	struct i915_request *request, *active = NULL;

	/*
	 * We are called by the error capture, reset and to dump engine
	 * state at random points in time. In particular, note that neither is
	 * crucially ordered with an interrupt. After a hang, the GPU is dead
	 * and we assume that no more writes can happen (we waited long enough
	 * for all writes that were in transaction to be flushed) - adding an
	 * extra delay for a recent interrupt is pointless. Hence, we do
	 * not need an engine->irq_seqno_barrier() before the seqno reads.
	 * At all other times, we must assume the GPU is still running, but
	 * we only care about the snapshot of this moment.
	 */
1641
	lockdep_assert_held(&engine->active.lock);
1642
	list_for_each_entry(request, &engine->active.requests, sched.link) {
1643 1644 1645 1646
		if (i915_request_completed(request))
			continue;

		if (!i915_request_started(request))
1647
			continue;
1648 1649 1650

		/* More than one preemptible request may match! */
		if (!match_ring(request))
1651
			continue;
1652 1653 1654 1655 1656 1657 1658 1659

		active = request;
		break;
	}

	return active;
}

1660
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1661
#include "mock_engine.c"
1662
#include "selftest_engine.c"
1663
#include "selftest_engine_cs.c"
1664
#endif