intel_engine_cs.c 61.4 KB
Newer Older
C
Chris Wilson 已提交
1
// SPDX-License-Identifier: MIT
2 3 4 5
/*
 * Copyright © 2016 Intel Corporation
 */

L
Lucas De Marchi 已提交
6 7
#include <linux/string_helpers.h>

8 9
#include <drm/drm_print.h>

10
#include "gem/i915_gem_context.h"
11
#include "gem/i915_gem_internal.h"
12
#include "gt/intel_gt_regs.h"
13

14
#include "i915_cmd_parser.h"
15
#include "i915_drv.h"
16
#include "intel_breadcrumbs.h"
17
#include "intel_context.h"
18
#include "intel_engine.h"
19
#include "intel_engine_pm.h"
20
#include "intel_engine_regs.h"
21
#include "intel_engine_user.h"
22
#include "intel_execlists_submission.h"
23
#include "intel_gt.h"
24
#include "intel_gt_mcr.h"
25
#include "intel_gt_pm.h"
26
#include "intel_gt_requests.h"
27
#include "intel_lrc.h"
28
#include "intel_lrc_reg.h"
29
#include "intel_reset.h"
30
#include "intel_ring.h"
31
#include "uc/intel_guc_submission.h"
32

33 34 35 36 37 38 39 40 41
/* Haswell does have the CXT_SIZE register however it does not appear to be
 * valid. Now, docs explain in dwords what is in the context object. The full
 * size is 70720 bytes, however, the power context and execlist context will
 * never be saved (power context is stored elsewhere, and execlists don't work
 * on HSW) - so the final size, including the extra state required for the
 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
 */
#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)

42
#define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
43 44
#define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
#define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
45
#define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
46 47 48

#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)

49
#define MAX_MMIO_BASES 3
50
struct engine_info {
51 52
	u8 class;
	u8 instance;
53
	/* mmio bases table *must* be sorted in reverse graphics_ver order */
54
	struct engine_mmio_base {
55
		u32 graphics_ver : 8;
56 57
		u32 base : 24;
	} mmio_bases[MAX_MMIO_BASES];
58 59 60
};

static const struct engine_info intel_engines[] = {
61
	[RCS0] = {
62 63
		.class = RENDER_CLASS,
		.instance = 0,
64
		.mmio_bases = {
65
			{ .graphics_ver = 1, .base = RENDER_RING_BASE }
66
		},
67
	},
68
	[BCS0] = {
69 70
		.class = COPY_ENGINE_CLASS,
		.instance = 0,
71
		.mmio_bases = {
72
			{ .graphics_ver = 6, .base = BLT_RING_BASE }
73
		},
74
	},
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	[BCS1] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 1,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
		},
	},
	[BCS2] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 2,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
		},
	},
	[BCS3] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 3,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
		},
	},
	[BCS4] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 4,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
		},
	},
	[BCS5] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 5,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
		},
	},
	[BCS6] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 6,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
		},
	},
	[BCS7] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 7,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
		},
	},
	[BCS8] = {
		.class = COPY_ENGINE_CLASS,
		.instance = 8,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
		},
	},
131
	[VCS0] = {
132 133
		.class = VIDEO_DECODE_CLASS,
		.instance = 0,
134
		.mmio_bases = {
135 136 137
			{ .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
			{ .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
			{ .graphics_ver = 4, .base = BSD_RING_BASE }
138
		},
139
	},
140
	[VCS1] = {
141 142
		.class = VIDEO_DECODE_CLASS,
		.instance = 1,
143
		.mmio_bases = {
144 145
			{ .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
			{ .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
146
		},
147
	},
148
	[VCS2] = {
149 150
		.class = VIDEO_DECODE_CLASS,
		.instance = 2,
151
		.mmio_bases = {
152
			{ .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
153
		},
154
	},
155
	[VCS3] = {
156 157
		.class = VIDEO_DECODE_CLASS,
		.instance = 3,
158
		.mmio_bases = {
159
			{ .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
160
		},
161
	},
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
	[VCS4] = {
		.class = VIDEO_DECODE_CLASS,
		.instance = 4,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
		},
	},
	[VCS5] = {
		.class = VIDEO_DECODE_CLASS,
		.instance = 5,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
		},
	},
	[VCS6] = {
		.class = VIDEO_DECODE_CLASS,
		.instance = 6,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
		},
	},
	[VCS7] = {
		.class = VIDEO_DECODE_CLASS,
		.instance = 7,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
		},
	},
190
	[VECS0] = {
191 192
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 0,
193
		.mmio_bases = {
194 195
			{ .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
			{ .graphics_ver = 7, .base = VEBOX_RING_BASE }
196
		},
197
	},
198
	[VECS1] = {
199 200
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 1,
201
		.mmio_bases = {
202
			{ .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
203
		},
204
	},
205 206 207 208 209 210 211 212 213 214 215 216 217 218
	[VECS2] = {
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 2,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
		},
	},
	[VECS3] = {
		.class = VIDEO_ENHANCEMENT_CLASS,
		.instance = 3,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
		},
	},
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
	[CCS0] = {
		.class = COMPUTE_CLASS,
		.instance = 0,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }
		}
	},
	[CCS1] = {
		.class = COMPUTE_CLASS,
		.instance = 1,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }
		}
	},
	[CCS2] = {
		.class = COMPUTE_CLASS,
		.instance = 2,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }
		}
	},
	[CCS3] = {
		.class = COMPUTE_CLASS,
		.instance = 3,
		.mmio_bases = {
			{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
		}
	},
247 248
};

249
/**
250
 * intel_engine_context_size() - return the size of the context for an engine
251
 * @gt: the gt
252 253 254 255 256 257 258 259 260 261 262
 * @class: engine class
 *
 * Each engine class may require a different amount of space for a context
 * image.
 *
 * Return: size (in bytes) of an engine class specific context image
 *
 * Note: this size includes the HWSP, which is part of the context image
 * in LRC mode, but does not include the "shared data page" used with
 * GuC submission. The caller should account for this if using the GuC.
 */
263
u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
264
{
265
	struct intel_uncore *uncore = gt->uncore;
266 267 268 269 270
	u32 cxt_size;

	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);

	switch (class) {
271 272
	case COMPUTE_CLASS:
		fallthrough;
273
	case RENDER_CLASS:
274
		switch (GRAPHICS_VER(gt->i915)) {
275
		default:
276
			MISSING_CASE(GRAPHICS_VER(gt->i915));
277
			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
278
		case 12:
279 280
		case 11:
			return GEN11_LR_CONTEXT_RENDER_SIZE;
281 282 283
		case 9:
			return GEN9_LR_CONTEXT_RENDER_SIZE;
		case 8:
284
			return GEN8_LR_CONTEXT_RENDER_SIZE;
285
		case 7:
286
			if (IS_HASWELL(gt->i915))
287 288
				return HSW_CXT_TOTAL_SIZE;

289
			cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
290 291 292
			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
					PAGE_SIZE);
		case 6:
293
			cxt_size = intel_uncore_read(uncore, CXT_SIZE);
294 295 296
			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
					PAGE_SIZE);
		case 5:
297
		case 4:
298 299 300 301 302 303 304 305 306 307
			/*
			 * There is a discrepancy here between the size reported
			 * by the register and the size of the context layout
			 * in the docs. Both are described as authorative!
			 *
			 * The discrepancy is on the order of a few cachelines,
			 * but the total is under one page (4k), which is our
			 * minimum allocation anyway so it should all come
			 * out in the wash.
			 */
308
			cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
309
			drm_dbg(&gt->i915->drm,
310 311
				"graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
				GRAPHICS_VER(gt->i915), cxt_size * 64,
312
				cxt_size - 1);
313
			return round_up(cxt_size * 64, PAGE_SIZE);
314 315 316 317 318 319 320 321 322
		case 3:
		case 2:
		/* For the special day when i810 gets merged. */
		case 1:
			return 0;
		}
		break;
	default:
		MISSING_CASE(class);
323
		fallthrough;
324 325 326
	case VIDEO_DECODE_CLASS:
	case VIDEO_ENHANCEMENT_CLASS:
	case COPY_ENGINE_CLASS:
327
		if (GRAPHICS_VER(gt->i915) < 8)
328 329 330 331 332
			return 0;
		return GEN8_LR_CONTEXT_OTHER_SIZE;
	}
}

333 334 335 336 337 338
static u32 __engine_mmio_base(struct drm_i915_private *i915,
			      const struct engine_mmio_base *bases)
{
	int i;

	for (i = 0; i < MAX_MMIO_BASES; i++)
339
		if (GRAPHICS_VER(i915) >= bases[i].graphics_ver)
340 341 342 343 344 345 346 347
			break;

	GEM_BUG_ON(i == MAX_MMIO_BASES);
	GEM_BUG_ON(!bases[i].base);

	return bases[i].base;
}

348
static void __sprint_engine_name(struct intel_engine_cs *engine)
349
{
350 351 352 353 354 355 356 357
	/*
	 * Before we know what the uABI name for this engine will be,
	 * we still would like to keep track of this engine in the debug logs.
	 * We throw in a ' here as a reminder that this isn't its final name.
	 */
	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
			     intel_engine_class_repr(engine->class),
			     engine->instance) >= sizeof(engine->name));
358 359
}

360 361 362 363 364 365
void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
{
	/*
	 * Though they added more rings on g4x/ilk, they did not add
	 * per-engine HWSTAM until gen6.
	 */
366
	if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS)
367 368
		return;

369
	if (GRAPHICS_VER(engine->i915) >= 3)
370
		ENGINE_WRITE(engine, RING_HWSTAM, mask);
371
	else
372
		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
373 374 375 376 377 378 379 380
}

static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
{
	/* Mask off all writes into the unknown HWSP */
	intel_engine_set_hwsp_writemask(engine, ~0u);
}

381 382 383 384 385
static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
{
	GEM_DEBUG_WARN_ON(iir);
}

386 387 388 389 390 391 392 393
static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
{
	u32 reset_domain;

	if (ver >= 11) {
		static const u32 engine_reset_domains[] = {
			[RCS0]  = GEN11_GRDOM_RENDER,
			[BCS0]  = GEN11_GRDOM_BLT,
394 395 396 397 398 399 400 401
			[BCS1]  = XEHPC_GRDOM_BLT1,
			[BCS2]  = XEHPC_GRDOM_BLT2,
			[BCS3]  = XEHPC_GRDOM_BLT3,
			[BCS4]  = XEHPC_GRDOM_BLT4,
			[BCS5]  = XEHPC_GRDOM_BLT5,
			[BCS6]  = XEHPC_GRDOM_BLT6,
			[BCS7]  = XEHPC_GRDOM_BLT7,
			[BCS8]  = XEHPC_GRDOM_BLT8,
402 403 404 405 406 407 408 409 410 411 412 413
			[VCS0]  = GEN11_GRDOM_MEDIA,
			[VCS1]  = GEN11_GRDOM_MEDIA2,
			[VCS2]  = GEN11_GRDOM_MEDIA3,
			[VCS3]  = GEN11_GRDOM_MEDIA4,
			[VCS4]  = GEN11_GRDOM_MEDIA5,
			[VCS5]  = GEN11_GRDOM_MEDIA6,
			[VCS6]  = GEN11_GRDOM_MEDIA7,
			[VCS7]  = GEN11_GRDOM_MEDIA8,
			[VECS0] = GEN11_GRDOM_VECS,
			[VECS1] = GEN11_GRDOM_VECS2,
			[VECS2] = GEN11_GRDOM_VECS3,
			[VECS3] = GEN11_GRDOM_VECS4,
414 415 416 417
			[CCS0]  = GEN11_GRDOM_RENDER,
			[CCS1]  = GEN11_GRDOM_RENDER,
			[CCS2]  = GEN11_GRDOM_RENDER,
			[CCS3]  = GEN11_GRDOM_RENDER,
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
		};
		GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
			   !engine_reset_domains[id]);
		reset_domain = engine_reset_domains[id];
	} else {
		static const u32 engine_reset_domains[] = {
			[RCS0]  = GEN6_GRDOM_RENDER,
			[BCS0]  = GEN6_GRDOM_BLT,
			[VCS0]  = GEN6_GRDOM_MEDIA,
			[VCS1]  = GEN8_GRDOM_MEDIA2,
			[VECS0] = GEN6_GRDOM_VECS,
		};
		GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
			   !engine_reset_domains[id]);
		reset_domain = engine_reset_domains[id];
	}

	return reset_domain;
}

438 439
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
			      u8 logical_instance)
440 441
{
	const struct engine_info *info = &intel_engines[id];
442
	struct drm_i915_private *i915 = gt->i915;
443
	struct intel_engine_cs *engine;
444
	u8 guc_class;
445

446 447
	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
448 449
	BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1));
	BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1));
450

451 452 453
	if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
		return -EINVAL;

454
	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
455 456
		return -EINVAL;

457
	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
458 459
		return -EINVAL;

460
	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
461 462
		return -EINVAL;

463 464 465
	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
	if (!engine)
		return -ENOMEM;
466

467 468
	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);

469
	INIT_LIST_HEAD(&engine->pinned_contexts_list);
470
	engine->id = id;
471
	engine->legacy_idx = INVALID_ENGINE;
472
	engine->mask = BIT(id);
473 474
	engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
						id);
475
	engine->i915 = i915;
476 477
	engine->gt = gt;
	engine->uncore = gt->uncore;
478 479 480
	guc_class = engine_class_to_guc_class(info->class);
	engine->guc_id = MAKE_GUC_ID(guc_class, info->instance);
	engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
481

482 483
	engine->irq_handler = nop_irq_handler;

484 485
	engine->class = info->class;
	engine->instance = info->instance;
486
	engine->logical_mask = BIT(logical_instance);
487
	__sprint_engine_name(engine);
488

489 490
	engine->props.heartbeat_interval_ms =
		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
491 492
	engine->props.max_busywait_duration_ns =
		CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
493 494
	engine->props.preempt_timeout_ms =
		CONFIG_DRM_I915_PREEMPT_TIMEOUT;
495 496
	engine->props.stop_timeout_ms =
		CONFIG_DRM_I915_STOP_TIMEOUT;
497 498
	engine->props.timeslice_duration_ms =
		CONFIG_DRM_I915_TIMESLICE_DURATION;
499

500
	/* Override to uninterruptible for OpenCL workloads. */
501
	if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
502 503
		engine->props.preempt_timeout_ms = 0;

504 505 506 507 508
	if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
	     __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
	     engine->class == RENDER_CLASS)
		engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;

509
	/* features common between engines sharing EUs */
510
	if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
511
		engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
512 513
		engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
	}
514

515 516
	engine->defaults = engine->props; /* never to change again */

517
	engine->context_size = intel_engine_context_size(gt, engine->class);
518 519
	if (WARN_ON(engine->context_size > BIT(20)))
		engine->context_size = 0;
520
	if (engine->context_size)
521
		DRIVER_CAPS(i915)->has_logical_contexts = true;
522

523
	ewma__engine_latency_init(&engine->latency);
524
	seqcount_init(&engine->stats.execlists.lock);
525

526 527
	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);

528 529 530
	/* Scrub mmio state on takeover */
	intel_engine_sanitize_mmio(engine);

531
	gt->engine_class[info->class][info->instance] = engine;
532
	gt->engine[id] = engine;
533

534
	return 0;
535 536
}

537 538 539 540 541 542 543 544 545
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
{
	struct drm_i915_private *i915 = engine->i915;

	if (engine->class == VIDEO_DECODE_CLASS) {
		/*
		 * HEVC support is present on first engine instance
		 * before Gen11 and on all instances afterwards.
		 */
546 547
		if (GRAPHICS_VER(i915) >= 11 ||
		    (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
548 549 550 551 552 553 554
			engine->uabi_capabilities |=
				I915_VIDEO_CLASS_CAPABILITY_HEVC;

		/*
		 * SFC block is present only on even logical engine
		 * instances.
		 */
555
		if ((GRAPHICS_VER(i915) >= 11 &&
556 557
		     (engine->gt->info.vdbox_sfc_access &
		      BIT(engine->instance))) ||
558
		    (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
559 560 561
			engine->uabi_capabilities |=
				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
562 563
		if (GRAPHICS_VER(i915) >= 9 &&
		    engine->gt->info.sfc_mask & BIT(engine->instance))
564 565 566 567 568
			engine->uabi_capabilities |=
				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
	}
}

569
static void intel_setup_engine_capabilities(struct intel_gt *gt)
570 571 572 573
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

574
	for_each_engine(engine, gt, id)
575 576 577
		__setup_engine_capabilities(engine);
}

578
/**
579
 * intel_engines_release() - free the resources allocated for Command Streamers
580
 * @gt: pointer to struct intel_gt
581
 */
582
void intel_engines_release(struct intel_gt *gt)
583 584 585 586
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

587 588 589 590 591 592 593 594 595 596 597 598 599
	/*
	 * Before we release the resources held by engine, we must be certain
	 * that the HW is no longer accessing them -- having the GPU scribble
	 * to or read from a page being used for something else causes no end
	 * of fun.
	 *
	 * The GPU should be reset by this point, but assume the worst just
	 * in case we aborted before completely initialising the engines.
	 */
	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
		__intel_gt_reset(gt, ALL_ENGINES);

600
	/* Decouple the backend; but keep the layout for late GPU resets */
601
	for_each_engine(engine, gt, id) {
602 603 604
		if (!engine->release)
			continue;

605 606 607
		intel_wakeref_wait_for_idle(&engine->wakeref);
		GEM_BUG_ON(intel_engine_pm_is_awake(engine));

608 609 610 611
		engine->release(engine);
		engine->release = NULL;

		memset(&engine->reset, 0, sizeof(engine->reset));
612 613 614
	}
}

615 616 617 618 619 620 621 622
void intel_engine_free_request_pool(struct intel_engine_cs *engine)
{
	if (!engine->request_pool)
		return;

	kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
}

623 624 625 626 627
void intel_engines_free(struct intel_gt *gt)
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

628 629 630
	/* Free the requests! dma-resv keeps fences around for an eternity */
	rcu_barrier();

631
	for_each_engine(engine, gt, id) {
632
		intel_engine_free_request_pool(engine);
633 634 635 636 637
		kfree(engine);
		gt->engine[id] = NULL;
	}
}

638
static
639
bool gen11_vdbox_has_sfc(struct intel_gt *gt,
640 641 642
			 unsigned int physical_vdbox,
			 unsigned int logical_vdbox, u16 vdbox_mask)
{
643 644
	struct drm_i915_private *i915 = gt->i915;

645 646 647 648 649 650
	/*
	 * In Gen11, only even numbered logical VDBOXes are hooked
	 * up to an SFC (Scaler & Format Converter) unit.
	 * In Gen12, Even numbered physical instance always are connected
	 * to an SFC. Odd numbered physical instances have SFC only if
	 * previous even instance is fused off.
651 652 653
	 *
	 * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
	 * in the fuse register that tells us whether a specific SFC is present.
654
	 */
655 656
	if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
		return false;
657
	else if (MEDIA_VER(i915) >= 12)
658 659
		return (physical_vdbox % 2 == 0) ||
			!(BIT(physical_vdbox - 1) & vdbox_mask);
660
	else if (MEDIA_VER(i915) == 11)
661 662 663 664 665
		return logical_vdbox % 2 == 0;

	return false;
}

666 667 668 669 670 671 672 673
static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_gt_info *info = &gt->info;
	int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
	unsigned long ccs_mask;
	unsigned int i;

674
	if (hweight32(CCS_MASK(gt)) <= 1)
675 676
		return;

677 678
	ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
						     ss_per_ccs);
679 680 681 682 683 684 685 686 687 688
	/*
	 * If all DSS in a quadrant are fused off, the corresponding CCS
	 * engine is not available for use.
	 */
	for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
		info->engine_mask &= ~BIT(_CCS(i));
		drm_dbg(&i915->drm, "ccs%u fused off\n", i);
	}
}

689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_gt_info *info = &gt->info;
	unsigned long meml3_mask;
	unsigned long quad;

	meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
	meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);

	/*
	 * Link Copy engines may be fused off according to meml3_mask. Each
	 * bit is a quad that houses 2 Link Copy and two Sub Copy engines.
	 */
	for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) {
		unsigned int instance = quad * 2 + 1;
		intel_engine_mask_t mask = GENMASK(_BCS(instance + 1),
						   _BCS(instance));

		if (mask & info->engine_mask) {
			drm_dbg(&i915->drm, "bcs%u fused off\n", instance);
			drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1);

			info->engine_mask &= ~mask;
		}
	}
}

717 718 719 720 721 722 723 724 725 726 727 728 729
/*
 * Determine which engines are fused off in our particular hardware.
 * Note that we have a catch-22 situation where we need to be able to access
 * the blitter forcewake domain to read the engine fuses, but at the same time
 * we need to know which engines are available on the system to know which
 * forcewake domains are present. We solve this by intializing the forcewake
 * domains based on the full engine mask in the platform capabilities before
 * calling this function and pruning the domains for fused-off engines
 * afterwards.
 */
static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
730
	struct intel_gt_info *info = &gt->info;
731 732 733
	struct intel_uncore *uncore = gt->uncore;
	unsigned int logical_vdbox = 0;
	unsigned int i;
734
	u32 media_fuse, fuse1;
735 736 737
	u16 vdbox_mask;
	u16 vebox_mask;

738
	GEM_BUG_ON(!info->engine_mask);
739

740
	if (GRAPHICS_VER(i915) < 11)
741 742
		return info->engine_mask;

743 744 745 746 747 748
	/*
	 * On newer platforms the fusing register is called 'enable' and has
	 * enable semantics, while on older platforms it is called 'disable'
	 * and bits have disable semantices.
	 */
	media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
749
	if (MEDIA_VER_FULL(i915) < IP_VER(12, 50))
750
		media_fuse = ~media_fuse;
751 752 753 754 755

	vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
		      GEN11_GT_VEBOX_DISABLE_SHIFT;

756
	if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) {
757 758 759 760 761 762
		fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1);
		gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
	} else {
		gt->info.sfc_mask = ~0;
	}

763 764 765 766 767 768 769 770 771 772 773 774
	for (i = 0; i < I915_MAX_VCS; i++) {
		if (!HAS_ENGINE(gt, _VCS(i))) {
			vdbox_mask &= ~BIT(i);
			continue;
		}

		if (!(BIT(i) & vdbox_mask)) {
			info->engine_mask &= ~BIT(_VCS(i));
			drm_dbg(&i915->drm, "vcs%u fused off\n", i);
			continue;
		}

775
		if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
776
			gt->info.vdbox_sfc_access |= BIT(i);
777
		logical_vdbox++;
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
	}
	drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
		vdbox_mask, VDBOX_MASK(gt));
	GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));

	for (i = 0; i < I915_MAX_VECS; i++) {
		if (!HAS_ENGINE(gt, _VECS(i))) {
			vebox_mask &= ~BIT(i);
			continue;
		}

		if (!(BIT(i) & vebox_mask)) {
			info->engine_mask &= ~BIT(_VECS(i));
			drm_dbg(&i915->drm, "vecs%u fused off\n", i);
		}
	}
	drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
		vebox_mask, VEBOX_MASK(gt));
	GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));

798
	engine_mask_apply_compute_fuses(gt);
799
	engine_mask_apply_copy_fuses(gt);
800

801 802 803
	return info->engine_mask;
}

804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826
static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
				 u8 class, const u8 *map, u8 num_instances)
{
	int i, j;
	u8 current_logical_id = 0;

	for (j = 0; j < num_instances; ++j) {
		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
			if (!HAS_ENGINE(gt, i) ||
			    intel_engines[i].class != class)
				continue;

			if (intel_engines[i].instance == map[j]) {
				logical_ids[intel_engines[i].instance] =
					current_logical_id++;
				break;
			}
		}
	}
}

static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
{
827 828 829 830 831 832
	/*
	 * Logical to physical mapping is needed for proper support
	 * to split-frame feature.
	 */
	if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) {
		const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 };
833

834 835 836 837 838 839 840 841 842 843 844
		populate_logical_ids(gt, logical_ids, class,
				     map, ARRAY_SIZE(map));
	} else {
		int i;
		u8 map[MAX_ENGINE_INSTANCE + 1];

		for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
			map[i] = i;
		populate_logical_ids(gt, logical_ids, class,
				     map, ARRAY_SIZE(map));
	}
845 846
}

847
/**
848
 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
849
 * @gt: pointer to struct intel_gt
850 851 852
 *
 * Return: non-zero if the initialization failed.
 */
853
int intel_engines_init_mmio(struct intel_gt *gt)
854
{
855
	struct drm_i915_private *i915 = gt->i915;
856
	const unsigned int engine_mask = init_engine_mask(gt);
857
	unsigned int mask = 0;
858 859
	unsigned int i, class;
	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
860
	int err;
861

862 863 864
	drm_WARN_ON(&i915->drm, engine_mask == 0);
	drm_WARN_ON(&i915->drm, engine_mask &
		    GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
865

866
	if (i915_inject_probe_failure(i915))
867 868
		return -ENODEV;

869 870
	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
		setup_logical_ids(gt, logical_ids, class);
871

872 873
		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
			u8 instance = intel_engines[i].instance;
874

875 876 877 878 879 880 881 882 883 884 885
			if (intel_engines[i].class != class ||
			    !HAS_ENGINE(gt, i))
				continue;

			err = intel_engine_setup(gt, i,
						 logical_ids[instance]);
			if (err)
				goto cleanup;

			mask |= BIT(i);
		}
886 887 888 889 890 891 892
	}

	/*
	 * Catch failures to update intel_engines table when the new engines
	 * are added to the driver by a warning and disabling the forgotten
	 * engines.
	 */
893
	if (drm_WARN_ON(&i915->drm, mask != engine_mask))
894
		gt->info.engine_mask = mask;
895

896
	gt->info.num_engines = hweight32(mask);
897

898
	intel_gt_check_and_clear_faults(gt);
899

900
	intel_setup_engine_capabilities(gt);
901

902 903
	intel_uncore_prune_engine_fw_domains(gt->uncore, gt);

904 905 906
	return 0;

cleanup:
907
	intel_engines_free(gt);
908 909 910
	return err;
}

911
void intel_engine_init_execlists(struct intel_engine_cs *engine)
912 913 914
{
	struct intel_engine_execlists * const execlists = &engine->execlists;

915
	execlists->port_mask = 1;
916
	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
917 918
	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);

919 920 921
	memset(execlists->pending, 0, sizeof(execlists->pending));
	execlists->active =
		memset(execlists->inflight, 0, sizeof(execlists->inflight));
922 923
}

924
static void cleanup_status_page(struct intel_engine_cs *engine)
925
{
926 927
	struct i915_vma *vma;

928 929 930
	/* Prevent writes into HWSP after returning the page to the system */
	intel_engine_set_hwsp_writemask(engine, ~0u);

931 932 933
	vma = fetch_and_zero(&engine->status_page.vma);
	if (!vma)
		return;
934

935 936 937 938
	if (!HWS_NEEDS_PHYSICAL(engine->i915))
		i915_vma_unpin(vma);

	i915_gem_object_unpin_map(vma->obj);
939
	i915_gem_object_put(vma->obj);
940 941 942
}

static int pin_ggtt_status_page(struct intel_engine_cs *engine,
943
				struct i915_gem_ww_ctx *ww,
944 945 946 947
				struct i915_vma *vma)
{
	unsigned int flags;

948
	if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
949 950 951 952 953 954 955 956 957 958 959
		/*
		 * On g33, we cannot place HWS above 256MiB, so
		 * restrict its pinning to the low mappable arena.
		 * Though this restriction is not documented for
		 * gen4, gen5, or byt, they also behave similarly
		 * and hang if the HWS is placed at the top of the
		 * GTT. To generalise, it appears that all !llc
		 * platforms have issues with us placing the HWS
		 * above the mappable region (even though we never
		 * actually map it).
		 */
960
		flags = PIN_MAPPABLE;
961
	else
962
		flags = PIN_HIGH;
963

964
	return i915_ggtt_pin(vma, ww, 0, flags);
965 966 967 968 969
}

static int init_status_page(struct intel_engine_cs *engine)
{
	struct drm_i915_gem_object *obj;
970
	struct i915_gem_ww_ctx ww;
971 972 973 974
	struct i915_vma *vma;
	void *vaddr;
	int ret;

975 976
	INIT_LIST_HEAD(&engine->status_page.timelines);

977 978 979 980 981 982 983
	/*
	 * Though the HWS register does support 36bit addresses, historically
	 * we have had hangs and corruption reported due to wild writes if
	 * the HWS is placed above 4G. We only allow objects to be allocated
	 * in GFP_DMA32 for i965, and no earlier physical address users had
	 * access to more than 4G.
	 */
984 985
	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
	if (IS_ERR(obj)) {
986 987
		drm_err(&engine->i915->drm,
			"Failed to allocate status page\n");
988 989 990
		return PTR_ERR(obj);
	}

991
	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
992

993
	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
994 995
	if (IS_ERR(vma)) {
		ret = PTR_ERR(vma);
996
		goto err_put;
997 998
	}

999 1000 1001 1002 1003 1004 1005 1006
	i915_gem_ww_ctx_init(&ww, true);
retry:
	ret = i915_gem_object_lock(obj, &ww);
	if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915))
		ret = pin_ggtt_status_page(engine, &ww, vma);
	if (ret)
		goto err;

1007 1008 1009
	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
	if (IS_ERR(vaddr)) {
		ret = PTR_ERR(vaddr);
1010
		goto err_unpin;
1011 1012
	}

1013
	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
1014
	engine->status_page.vma = vma;
1015

1016
err_unpin:
1017 1018
	if (ret)
		i915_vma_unpin(vma);
1019
err:
1020 1021 1022 1023 1024 1025 1026 1027 1028
	if (ret == -EDEADLK) {
		ret = i915_gem_ww_ctx_backoff(&ww);
		if (!ret)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
err_put:
	if (ret)
		i915_gem_object_put(obj);
1029 1030 1031
	return ret;
}

1032
static int engine_setup_common(struct intel_engine_cs *engine)
1033 1034 1035
{
	int err;

1036 1037
	init_llist_head(&engine->barrier_tasks);

1038 1039 1040 1041
	err = init_status_page(engine);
	if (err)
		return err;

1042 1043 1044 1045 1046 1047
	engine->breadcrumbs = intel_breadcrumbs_create(engine);
	if (!engine->breadcrumbs) {
		err = -ENOMEM;
		goto err_status;
	}

1048 1049 1050 1051 1052
	engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL);
	if (!engine->sched_engine) {
		err = -ENOMEM;
		goto err_sched_engine;
	}
1053
	engine->sched_engine->private_data = engine;
1054

1055 1056 1057 1058
	err = intel_engine_init_cmd_parser(engine);
	if (err)
		goto err_cmd_parser;

1059 1060
	intel_engine_init_execlists(engine);
	intel_engine_init__pm(engine);
1061
	intel_engine_init_retire(engine);
1062

1063 1064
	/* Use the whole device by default */
	engine->sseu =
1065
		intel_sseu_from_device_info(&engine->gt->info.sseu);
1066

1067 1068 1069 1070
	intel_engine_init_workarounds(engine);
	intel_engine_init_whitelist(engine);
	intel_engine_init_ctx_wa(engine);

1071
	if (GRAPHICS_VER(engine->i915) >= 12)
1072 1073
		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;

1074
	return 0;
1075

1076
err_cmd_parser:
1077 1078
	i915_sched_engine_put(engine->sched_engine);
err_sched_engine:
1079
	intel_breadcrumbs_put(engine->breadcrumbs);
1080 1081 1082
err_status:
	cleanup_status_page(engine);
	return err;
1083 1084
}

1085 1086 1087
struct measure_breadcrumb {
	struct i915_request rq;
	struct intel_ring ring;
1088
	u32 cs[2048];
1089 1090
};

1091
static int measure_breadcrumb_dw(struct intel_context *ce)
1092
{
1093
	struct intel_engine_cs *engine = ce->engine;
1094
	struct measure_breadcrumb *frame;
1095
	int dw;
1096

1097
	GEM_BUG_ON(!engine->gt->scratch);
1098 1099 1100 1101 1102

	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
	if (!frame)
		return -ENOMEM;

1103 1104 1105
	frame->rq.engine = engine;
	frame->rq.context = ce;
	rcu_assign_pointer(frame->rq.timeline, ce->timeline);
1106
	frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno;
1107

1108 1109
	frame->ring.vaddr = frame->cs;
	frame->ring.size = sizeof(frame->cs);
1110 1111
	frame->ring.wrap =
		BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
1112 1113 1114
	frame->ring.effective_size = frame->ring.size;
	intel_ring_update_space(&frame->ring);
	frame->rq.ring = &frame->ring;
1115

1116
	mutex_lock(&ce->timeline->mutex);
1117
	spin_lock_irq(&engine->sched_engine->lock);
1118

1119
	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
1120

1121
	spin_unlock_irq(&engine->sched_engine->lock);
1122
	mutex_unlock(&ce->timeline->mutex);
1123

1124
	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
1125

1126
	kfree(frame);
1127 1128 1129
	return dw;
}

1130 1131 1132 1133 1134 1135 1136
struct intel_context *
intel_engine_create_pinned_context(struct intel_engine_cs *engine,
				   struct i915_address_space *vm,
				   unsigned int ring_size,
				   unsigned int hwsp,
				   struct lock_class_key *key,
				   const char *name)
1137 1138 1139 1140
{
	struct intel_context *ce;
	int err;

1141
	ce = intel_context_create(engine);
1142 1143 1144
	if (IS_ERR(ce))
		return ce;

1145
	__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
1146
	ce->timeline = page_pack_bits(NULL, hwsp);
1147 1148
	ce->ring = NULL;
	ce->ring_size = ring_size;
1149 1150 1151

	i915_vm_put(ce->vm);
	ce->vm = i915_vm_get(vm);
1152

1153
	err = intel_context_pin(ce); /* perma-pin so it is always available */
1154 1155 1156 1157 1158
	if (err) {
		intel_context_put(ce);
		return ERR_PTR(err);
	}

1159 1160
	list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);

1161 1162 1163 1164 1165 1166
	/*
	 * Give our perma-pinned kernel timelines a separate lockdep class,
	 * so that we can use them from within the normal user timelines
	 * should we need to inject GPU operations during their request
	 * construction.
	 */
1167
	lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
1168

1169 1170 1171
	return ce;
}

1172
void intel_engine_destroy_pinned_context(struct intel_context *ce)
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
{
	struct intel_engine_cs *engine = ce->engine;
	struct i915_vma *hwsp = engine->status_page.vma;

	GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);

	mutex_lock(&hwsp->vm->mutex);
	list_del(&ce->timeline->engine_link);
	mutex_unlock(&hwsp->vm->mutex);

1183
	list_del(&ce->pinned_contexts_link);
1184 1185 1186 1187
	intel_context_unpin(ce);
	intel_context_put(ce);
}

1188 1189 1190 1191 1192
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
	static struct lock_class_key kernel;

1193 1194 1195
	return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K,
						  I915_GEM_HWS_SEQNO_ADDR,
						  &kernel, "kernel_context");
1196 1197
}

1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
/**
 * intel_engines_init_common - initialize cengine state which might require hw access
 * @engine: Engine to initialize.
 *
 * Initializes @engine@ structure members shared between legacy and execlists
 * submission modes which do require hardware access.
 *
 * Typcally done at later stages of submission mode specific engine setup.
 *
 * Returns zero on success or an error code on failure.
 */
1209
static int engine_init_common(struct intel_engine_cs *engine)
1210
{
1211
	struct intel_context *ce;
1212 1213
	int ret;

1214 1215
	engine->set_default_submission(engine);

1216 1217
	/*
	 * We may need to do things with the shrinker which
1218 1219 1220 1221 1222 1223
	 * require us to immediately switch back to the default
	 * context. This can cause a problem as pinning the
	 * default context also requires GTT space which may not
	 * be available. To avoid this we always pin the default
	 * context.
	 */
1224 1225 1226 1227
	ce = create_kernel_context(engine);
	if (IS_ERR(ce))
		return PTR_ERR(ce);

1228 1229 1230 1231 1232
	ret = measure_breadcrumb_dw(ce);
	if (ret < 0)
		goto err_context;

	engine->emit_fini_breadcrumb_dw = ret;
1233
	engine->kernel_context = ce;
1234

1235
	return 0;
1236 1237

err_context:
1238
	intel_engine_destroy_pinned_context(ce);
1239
	return ret;
1240
}
1241

1242 1243 1244 1245 1246 1247 1248
int intel_engines_init(struct intel_gt *gt)
{
	int (*setup)(struct intel_engine_cs *engine);
	struct intel_engine_cs *engine;
	enum intel_engine_id id;
	int err;

1249 1250
	if (intel_uc_uses_guc_submission(&gt->uc)) {
		gt->submission_method = INTEL_SUBMISSION_GUC;
1251
		setup = intel_guc_submission_setup;
1252 1253
	} else if (HAS_EXECLISTS(gt->i915)) {
		gt->submission_method = INTEL_SUBMISSION_ELSP;
1254
		setup = intel_execlists_submission_setup;
1255 1256
	} else {
		gt->submission_method = INTEL_SUBMISSION_RING;
1257
		setup = intel_ring_submission_setup;
1258
	}
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278

	for_each_engine(engine, gt, id) {
		err = engine_setup_common(engine);
		if (err)
			return err;

		err = setup(engine);
		if (err)
			return err;

		err = engine_init_common(engine);
		if (err)
			return err;

		intel_engine_add_user(engine);
	}

	return 0;
}

1279 1280 1281 1282 1283 1284 1285 1286 1287
/**
 * intel_engines_cleanup_common - cleans up the engine state created by
 *                                the common initiailizers.
 * @engine: Engine to cleanup.
 *
 * This cleans up everything created by the common helpers.
 */
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
1288
	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
1289

1290
	i915_sched_engine_put(engine->sched_engine);
1291
	intel_breadcrumbs_put(engine->breadcrumbs);
1292

1293
	intel_engine_fini_retire(engine);
1294
	intel_engine_cleanup_cmd_parser(engine);
1295

1296
	if (engine->default_state)
1297
		fput(engine->default_state);
1298

1299
	if (engine->kernel_context)
1300
		intel_engine_destroy_pinned_context(engine->kernel_context);
1301

1302
	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
1303
	cleanup_status_page(engine);
1304

1305
	intel_wa_list_free(&engine->ctx_wa_list);
1306
	intel_wa_list_free(&engine->wa_list);
1307
	intel_wa_list_free(&engine->whitelist);
1308
}
1309

1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
/**
 * intel_engine_resume - re-initializes the HW state of the engine
 * @engine: Engine to resume.
 *
 * Returns zero on success or an error code on failure.
 */
int intel_engine_resume(struct intel_engine_cs *engine)
{
	intel_engine_apply_workarounds(engine);
	intel_engine_apply_whitelist(engine);

	return engine->resume(engine);
}

1324
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
1325
{
1326 1327
	struct drm_i915_private *i915 = engine->i915;

1328 1329
	u64 acthd;

1330
	if (GRAPHICS_VER(i915) >= 8)
1331
		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
1332
	else if (GRAPHICS_VER(i915) >= 4)
1333
		acthd = ENGINE_READ(engine, RING_ACTHD);
1334
	else
1335
		acthd = ENGINE_READ(engine, ACTHD);
1336 1337 1338 1339

	return acthd;
}

1340
u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
1341 1342 1343
{
	u64 bbaddr;

1344
	if (GRAPHICS_VER(engine->i915) >= 8)
1345
		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
1346
	else
1347
		bbaddr = ENGINE_READ(engine, RING_BBADDR);
1348 1349 1350

	return bbaddr;
}
1351

1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
static unsigned long stop_timeout(const struct intel_engine_cs *engine)
{
	if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
		return 0;

	/*
	 * If we are doing a normal GPU reset, we can take our time and allow
	 * the engine to quiesce. We've stopped submission to the engine, and
	 * if we wait long enough an innocent context should complete and
	 * leave the engine idle. So they should not be caught unaware by
	 * the forthcoming GPU reset (which usually follows the stop_cs)!
	 */
	return READ_ONCE(engine->props.stop_timeout_ms);
}

1367 1368 1369
static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
				  int fast_timeout_us,
				  int slow_timeout_ms)
1370
{
1371
	struct intel_uncore *uncore = engine->uncore;
1372
	const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
1373 1374
	int err;

1375
	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
1376 1377

	/*
1378
	 * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
1379 1380
	 * stopped, set ring stop bit and prefetch disable bit to halt CS
	 */
1381
	if (IS_GRAPHICS_VER(engine->i915, 11, 12))
1382 1383 1384
		intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
				      _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));

1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
	err = __intel_wait_for_register_fw(engine->uncore, mode,
					   MODE_IDLE, MODE_IDLE,
					   fast_timeout_us,
					   slow_timeout_ms,
					   NULL);

	/* A final mmio read to let GPU writes be hopefully flushed to memory */
	intel_uncore_posting_read_fw(uncore, mode);
	return err;
}

int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
	int err = 0;

1400
	if (GRAPHICS_VER(engine->i915) < 3)
1401 1402
		return -ENODEV;

1403
	ENGINE_TRACE(engine, "\n");
1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
	/*
	 * TODO: Find out why occasionally stopping the CS times out. Seen
	 * especially with gem_eio tests.
	 *
	 * Occasionally trying to stop the cs times out, but does not adversely
	 * affect functionality. The timeout is set as a config parameter that
	 * defaults to 100ms. In most cases the follow up operation is to wait
	 * for pending MI_FORCE_WAKES. The assumption is that this timeout is
	 * sufficient for any pending MI_FORCEWAKEs to complete. Once root
	 * caused, the caller must check and handle the return from this
	 * function.
	 */
1416
	if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
		ENGINE_TRACE(engine,
			     "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
			     ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
			     ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);

		/*
		 * Sometimes we observe that the idle flag is not
		 * set even though the ring is empty. So double
		 * check before giving up.
		 */
		if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
		    (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
			err = -ETIMEDOUT;
1430 1431 1432 1433 1434
	}

	return err;
}

1435 1436
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
{
1437
	ENGINE_TRACE(engine, "\n");
1438

1439
	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
1440 1441
}

1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
{
	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
		[RCS0] = MSG_IDLE_CS,
		[BCS0] = MSG_IDLE_BCS,
		[VCS0] = MSG_IDLE_VCS0,
		[VCS1] = MSG_IDLE_VCS1,
		[VCS2] = MSG_IDLE_VCS2,
		[VCS3] = MSG_IDLE_VCS3,
		[VCS4] = MSG_IDLE_VCS4,
		[VCS5] = MSG_IDLE_VCS5,
		[VCS6] = MSG_IDLE_VCS6,
		[VCS7] = MSG_IDLE_VCS7,
		[VECS0] = MSG_IDLE_VECS0,
		[VECS1] = MSG_IDLE_VECS1,
		[VECS2] = MSG_IDLE_VECS2,
		[VECS3] = MSG_IDLE_VECS3,
		[CCS0] = MSG_IDLE_CS,
		[CCS1] = MSG_IDLE_CS,
		[CCS2] = MSG_IDLE_CS,
		[CCS3] = MSG_IDLE_CS,
	};
	u32 val;

	if (!_reg[engine->id].reg) {
		drm_err(&engine->i915->drm,
			"MSG IDLE undefined for engine id %u\n", engine->id);
		return 0;
	}

	val = intel_uncore_read(engine->uncore, _reg[engine->id]);

	/* bits[29:25] & bits[13:9] >> shift */
	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
}

static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
{
	int ret;

	/* Ensure GPM receives fw up/down after CS is stopped */
	udelay(1);

	/* Wait for forcewake request to complete in GPM */
	ret =  __intel_wait_for_register_fw(gt->uncore,
					    GEN9_PWRGT_DOMAIN_STATUS,
					    fw_mask, fw_mask, 5000, 0, NULL);

	/* Ensure CS receives fw ack from GPM */
	udelay(1);

	if (ret)
		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
}

/*
 * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
 * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
 * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
 * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
 * are concerned only with the gt reset here, we use a logical OR of pending
 * forcewakeups from all reset domains and then wait for them to complete by
 * querying PWRGT_DOMAIN_STATUS.
 */
void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
{
	u32 fw_pending = __cs_pending_mi_force_wakes(engine);

	if (fw_pending)
		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
}

1514
/* NB: please notice the memset */
1515
void intel_engine_get_instdone(const struct intel_engine_cs *engine,
1516 1517
			       struct intel_instdone *instdone)
{
1518
	struct drm_i915_private *i915 = engine->i915;
1519
	struct intel_uncore *uncore = engine->uncore;
1520 1521 1522
	u32 mmio_base = engine->mmio_base;
	int slice;
	int subslice;
1523
	int iter;
1524 1525 1526

	memset(instdone, 0, sizeof(*instdone));

1527
	if (GRAPHICS_VER(i915) >= 8) {
1528 1529
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1530

1531
		if (engine->id != RCS0)
1532
			return;
1533

1534 1535
		instdone->slice_common =
			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1536
		if (GRAPHICS_VER(i915) >= 12) {
1537 1538 1539 1540 1541
			instdone->slice_common_extra[0] =
				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
			instdone->slice_common_extra[1] =
				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
		}
1542

1543 1544 1545 1546 1547 1548 1549 1550 1551
		for_each_ss_steering(iter, engine->gt, slice, subslice) {
			instdone->sampler[slice][subslice] =
				intel_gt_mcr_read(engine->gt,
						  GEN7_SAMPLER_INSTDONE,
						  slice, subslice);
			instdone->row[slice][subslice] =
				intel_gt_mcr_read(engine->gt,
						  GEN7_ROW_INSTDONE,
						  slice, subslice);
1552
		}
1553 1554

		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
1555
			for_each_ss_steering(iter, engine->gt, slice, subslice)
1556
				instdone->geom_svg[slice][subslice] =
1557 1558 1559
					intel_gt_mcr_read(engine->gt,
							  XEHPG_INSTDONE_GEOM_SVG,
							  slice, subslice);
1560
		}
1561
	} else if (GRAPHICS_VER(i915) >= 7) {
1562 1563
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1564

1565
		if (engine->id != RCS0)
1566
			return;
1567

1568 1569 1570 1571 1572 1573
		instdone->slice_common =
			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
		instdone->sampler[0][0] =
			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
		instdone->row[0][0] =
			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1574
	} else if (GRAPHICS_VER(i915) >= 4) {
1575 1576
		instdone->instdone =
			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1577
		if (engine->id == RCS0)
1578
			/* HACK: Using the wrong struct member */
1579 1580
			instdone->slice_common =
				intel_uncore_read(uncore, GEN4_INSTDONE1);
1581
	} else {
1582
		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1583 1584
	}
}
1585

1586 1587 1588 1589
static bool ring_is_idle(struct intel_engine_cs *engine)
{
	bool idle = true;

1590 1591 1592
	if (I915_SELFTEST_ONLY(!engine->mmio_base))
		return true;

1593
	if (!intel_engine_pm_get_if_awake(engine))
1594
		return true;
1595

1596
	/* First check that no commands are left in the ring */
1597 1598
	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1599
		idle = false;
1600

1601
	/* No bit for gen2, so assume the CS parser is idle */
1602
	if (GRAPHICS_VER(engine->i915) > 2 &&
1603
	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1604 1605
		idle = false;

1606
	intel_engine_pm_put(engine);
1607 1608 1609 1610

	return idle;
}

1611
void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
1612
{
1613
	struct tasklet_struct *t = &engine->sched_engine->tasklet;
1614

1615
	if (!t->callback)
1616 1617
		return;

1618 1619 1620 1621
	local_bh_disable();
	if (tasklet_trylock(t)) {
		/* Must wait for any GPU reset in progress. */
		if (__tasklet_is_enabled(t))
1622
			t->callback(t);
1623
		tasklet_unlock(t);
1624
	}
1625
	local_bh_enable();
1626 1627 1628 1629

	/* Synchronise and wait for the tasklet on another CPU */
	if (sync)
		tasklet_unlock_wait(t);
1630 1631
}

1632 1633 1634 1635 1636 1637 1638 1639 1640
/**
 * intel_engine_is_idle() - Report if the engine has finished process all work
 * @engine: the intel_engine_cs
 *
 * Return true if there are no requests pending, nothing left to be submitted
 * to hardware, and that the engine is idle.
 */
bool intel_engine_is_idle(struct intel_engine_cs *engine)
{
1641
	/* More white lies, if wedged, hw state is inconsistent */
1642
	if (intel_gt_is_wedged(engine->gt))
1643 1644
		return true;

1645
	if (!intel_engine_pm_is_awake(engine))
1646 1647
		return true;

1648
	/* Waiting to drain ELSP? */
1649
	intel_synchronize_hardirq(engine->i915);
1650
	intel_engine_flush_submission(engine);
1651

1652
	/* ELSP is empty, but there are ready requests? E.g. after reset */
1653
	if (!i915_sched_engine_is_empty(engine->sched_engine))
1654 1655
		return false;

1656
	/* Ring stopped? */
1657
	return ring_is_idle(engine);
1658 1659
}

1660
bool intel_engines_are_idle(struct intel_gt *gt)
1661 1662 1663 1664
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

1665 1666
	/*
	 * If the driver is wedged, HW state may be very inconsistent and
1667 1668
	 * report that it is still busy, even though we have stopped using it.
	 */
1669
	if (intel_gt_is_wedged(gt))
1670 1671
		return true;

1672
	/* Already parked (and passed an idleness test); must still be idle */
1673
	if (!READ_ONCE(gt->awake))
1674 1675
		return true;

1676
	for_each_engine(engine, gt, id) {
1677 1678 1679 1680 1681 1682 1683
		if (!intel_engine_is_idle(engine))
			return false;
	}

	return true;
}

1684 1685 1686 1687 1688 1689
bool intel_engine_irq_enable(struct intel_engine_cs *engine)
{
	if (!engine->irq_enable)
		return false;

	/* Caller disables interrupts */
1690
	spin_lock(engine->gt->irq_lock);
1691
	engine->irq_enable(engine);
1692
	spin_unlock(engine->gt->irq_lock);
1693 1694 1695 1696 1697 1698 1699 1700 1701 1702

	return true;
}

void intel_engine_irq_disable(struct intel_engine_cs *engine)
{
	if (!engine->irq_disable)
		return;

	/* Caller disables interrupts */
1703
	spin_lock(engine->gt->irq_lock);
1704
	engine->irq_disable(engine);
1705
	spin_unlock(engine->gt->irq_lock);
1706 1707
}

1708
void intel_engines_reset_default_submission(struct intel_gt *gt)
1709 1710 1711 1712
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;

1713 1714 1715 1716
	for_each_engine(engine, gt, id) {
		if (engine->sanitize)
			engine->sanitize(engine);

1717
		engine->set_default_submission(engine);
1718
	}
1719 1720
}

1721 1722
bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
{
1723
	switch (GRAPHICS_VER(engine->i915)) {
1724 1725 1726 1727 1728
	case 2:
		return false; /* uses physical not virtual addresses */
	case 3:
		/* maybe only uses physical not virtual addresses */
		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1729 1730
	case 4:
		return !IS_I965G(engine->i915); /* who knows! */
1731 1732 1733 1734 1735 1736 1737
	case 6:
		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
	default:
		return true;
	}
}

1738 1739 1740 1741 1742
static struct intel_timeline *get_timeline(struct i915_request *rq)
{
	struct intel_timeline *tl;

	/*
1743
	 * Even though we are holding the engine->sched_engine->lock here, there
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780
	 * is no control over the submission queue per-se and we are
	 * inspecting the active state at a random point in time, with an
	 * unknown queue. Play safe and make sure the timeline remains valid.
	 * (Only being used for pretty printing, one extra kref shouldn't
	 * cause a camel stampede!)
	 */
	rcu_read_lock();
	tl = rcu_dereference(rq->timeline);
	if (!kref_get_unless_zero(&tl->kref))
		tl = NULL;
	rcu_read_unlock();

	return tl;
}

static int print_ring(char *buf, int sz, struct i915_request *rq)
{
	int len = 0;

	if (!i915_request_signaled(rq)) {
		struct intel_timeline *tl = get_timeline(rq);

		len = scnprintf(buf, sz,
				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
				i915_ggtt_offset(rq->ring->vma),
				tl ? tl->hwsp_offset : 0,
				hwsp_seqno(rq),
				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
						      1000 * 1000));

		if (tl)
			intel_timeline_put(tl);
	}

	return len;
}

1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802
static void hexdump(struct drm_printer *m, const void *buf, size_t len)
{
	const size_t rowsize = 8 * sizeof(u32);
	const void *prev = NULL;
	bool skip = false;
	size_t pos;

	for (pos = 0; pos < len; pos += rowsize) {
		char line[128];

		if (prev && !memcmp(prev, buf + pos, rowsize)) {
			if (!skip) {
				drm_printf(m, "*\n");
				skip = true;
			}
			continue;
		}

		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
						rowsize, sizeof(u32),
						line, sizeof(line),
						false) >= sizeof(line));
1803
		drm_printf(m, "[%04zx] %s\n", pos, line);
1804 1805 1806 1807 1808 1809

		prev = buf + pos;
		skip = false;
	}
}

1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
static const char *repr_timer(const struct timer_list *t)
{
	if (!READ_ONCE(t->expires))
		return "inactive";

	if (timer_pending(t))
		return "active";

	return "expired";
}

1821
static void intel_engine_print_registers(struct intel_engine_cs *engine,
1822
					 struct drm_printer *m)
1823 1824
{
	struct drm_i915_private *dev_priv = engine->i915;
1825
	struct intel_engine_execlists * const execlists = &engine->execlists;
1826 1827
	u64 addr;

1828
	if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(dev_priv, 4, 7))
1829
		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1830 1831 1832 1833 1834 1835
	if (HAS_EXECLISTS(dev_priv)) {
		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
	}
1836
	drm_printf(m, "\tRING_START: 0x%08x\n",
1837
		   ENGINE_READ(engine, RING_START));
1838
	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1839
		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1840
	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1841
		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1842
	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1843 1844
		   ENGINE_READ(engine, RING_CTL),
		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1845
	if (GRAPHICS_VER(engine->i915) > 2) {
1846
		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1847 1848
			   ENGINE_READ(engine, RING_MI_MODE),
			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1849
	}
1850

1851
	if (GRAPHICS_VER(dev_priv) >= 6) {
1852
		drm_printf(m, "\tRING_IMR:   0x%08x\n",
1853
			   ENGINE_READ(engine, RING_IMR));
1854 1855 1856 1857 1858 1859
		drm_printf(m, "\tRING_ESR:   0x%08x\n",
			   ENGINE_READ(engine, RING_ESR));
		drm_printf(m, "\tRING_EMR:   0x%08x\n",
			   ENGINE_READ(engine, RING_EMR));
		drm_printf(m, "\tRING_EIR:   0x%08x\n",
			   ENGINE_READ(engine, RING_EIR));
1860 1861
	}

1862 1863 1864 1865 1866 1867
	addr = intel_engine_get_active_head(engine);
	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
	addr = intel_engine_get_last_batch_head(engine);
	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
1868
	if (GRAPHICS_VER(dev_priv) >= 8)
1869
		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1870
	else if (GRAPHICS_VER(dev_priv) >= 4)
1871
		addr = ENGINE_READ(engine, RING_DMA_FADD);
1872
	else
1873
		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1874 1875
	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
1876
	if (GRAPHICS_VER(dev_priv) >= 4) {
1877
		drm_printf(m, "\tIPEIR: 0x%08x\n",
1878
			   ENGINE_READ(engine, RING_IPEIR));
1879
		drm_printf(m, "\tIPEHR: 0x%08x\n",
1880
			   ENGINE_READ(engine, RING_IPEHR));
1881
	} else {
1882 1883
		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1884
	}
1885

1886
	if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) {
1887
		struct i915_request * const *port, *rq;
1888 1889
		const u32 *hws =
			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1890
		const u8 num_entries = execlists->csb_size;
1891
		unsigned int idx;
1892
		u8 read, write;
1893

1894
		drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
L
Lucas De Marchi 已提交
1895
			   str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)),
1896
			   str_enabled_disabled(!atomic_read(&engine->sched_engine->tasklet.count)),
1897
			   repr_timer(&engine->execlists.preempt),
1898
			   repr_timer(&engine->execlists.timer));
1899

1900 1901 1902
		read = execlists->csb_head;
		write = READ_ONCE(*execlists->csb_write);

1903 1904 1905 1906 1907
		drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
			   read, write, num_entries);

1908
		if (read >= num_entries)
1909
			read = 0;
1910
		if (write >= num_entries)
1911 1912
			write = 0;
		if (read > write)
1913
			write += num_entries;
1914
		while (read < write) {
1915 1916 1917
			idx = ++read % num_entries;
			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
				   idx, hws[idx * 2], hws[idx * 2 + 1]);
1918 1919
		}

1920
		i915_sched_engine_active_lock_bh(engine->sched_engine);
1921
		rcu_read_lock();
1922
		for (port = execlists->active; (rq = *port); port++) {
1923
			char hdr[160];
1924 1925
			int len;

1926
			len = scnprintf(hdr, sizeof(hdr),
1927
					"\t\tActive[%d]:  ccid:%08x%s%s, ",
1928
					(int)(port - execlists->active),
1929 1930 1931
					rq->context->lrc.ccid,
					intel_context_is_closed(rq->context) ? "!" : "",
					intel_context_is_banned(rq->context) ? "*" : "");
1932
			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
1933
			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1934
			i915_request_show(m, rq, hdr, 0);
1935 1936
		}
		for (port = execlists->pending; (rq = *port); port++) {
1937 1938
			char hdr[160];
			int len;
1939

1940
			len = scnprintf(hdr, sizeof(hdr),
1941
					"\t\tPending[%d]: ccid:%08x%s%s, ",
1942
					(int)(port - execlists->pending),
1943 1944 1945
					rq->context->lrc.ccid,
					intel_context_is_closed(rq->context) ? "!" : "",
					intel_context_is_banned(rq->context) ? "*" : "");
1946 1947
			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1948
			i915_request_show(m, rq, hdr, 0);
1949
		}
1950
		rcu_read_unlock();
1951
		i915_sched_engine_active_unlock_bh(engine->sched_engine);
1952
	} else if (GRAPHICS_VER(dev_priv) > 6) {
1953
		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1954
			   ENGINE_READ(engine, RING_PP_DIR_BASE));
1955
		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1956
			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1957
		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1958
			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
1959
	}
1960 1961
}

1962 1963
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
{
1964
	struct i915_vma_resource *vma_res = rq->batch_res;
1965 1966 1967 1968 1969 1970
	void *ring;
	int size;

	drm_printf(m,
		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
		   rq->head, rq->postfix, rq->tail,
1971 1972
		   vma_res ? upper_32_bits(vma_res->start) : ~0u,
		   vma_res ? lower_32_bits(vma_res->start) : ~0u);
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995

	size = rq->tail - rq->head;
	if (rq->tail < rq->head)
		size += rq->ring->size;

	ring = kmalloc(size, GFP_ATOMIC);
	if (ring) {
		const void *vaddr = rq->ring->vaddr;
		unsigned int head = rq->head;
		unsigned int len = 0;

		if (rq->tail < head) {
			len = rq->ring->size - head;
			memcpy(ring, vaddr + head, len);
			head = 0;
		}
		memcpy(ring + len, vaddr + head, size - len);

		hexdump(m, ring, size);
		kfree(ring);
	}
}

1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
static unsigned long list_count(struct list_head *list)
{
	struct list_head *pos;
	unsigned long count = 0;

	list_for_each(pos, list)
		count++;

	return count;
}

2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
static unsigned long read_ul(void *p, size_t x)
{
	return *(unsigned long *)(p + x);
}

static void print_properties(struct intel_engine_cs *engine,
			     struct drm_printer *m)
{
	static const struct pmap {
		size_t offset;
		const char *name;
	} props[] = {
#define P(x) { \
	.offset = offsetof(typeof(engine->props), x), \
	.name = #x \
}
		P(heartbeat_interval_ms),
		P(max_busywait_duration_ns),
		P(preempt_timeout_ms),
		P(stop_timeout_ms),
		P(timeslice_duration_ms),

		{},
#undef P
	};
	const struct pmap *p;

	drm_printf(m, "\tProperties:\n");
	for (p = props; p->name; p++)
		drm_printf(m, "\t\t%s: %lu [default %lu]\n",
			   p->name,
			   read_ul(&engine->props, p->offset),
			   read_ul(&engine->defaults, p->offset));
}

2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133
static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg)
{
	struct intel_timeline *tl = get_timeline(rq);

	i915_request_show(m, rq, msg, 0);

	drm_printf(m, "\t\tring->start:  0x%08x\n",
		   i915_ggtt_offset(rq->ring->vma));
	drm_printf(m, "\t\tring->head:   0x%08x\n",
		   rq->ring->head);
	drm_printf(m, "\t\tring->tail:   0x%08x\n",
		   rq->ring->tail);
	drm_printf(m, "\t\tring->emit:   0x%08x\n",
		   rq->ring->emit);
	drm_printf(m, "\t\tring->space:  0x%08x\n",
		   rq->ring->space);

	if (tl) {
		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
			   tl->hwsp_offset);
		intel_timeline_put(tl);
	}

	print_request_ring(m, rq);

	if (rq->context->lrc_reg_state) {
		drm_printf(m, "Logical Ring Context:\n");
		hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
	}
}

void intel_engine_dump_active_requests(struct list_head *requests,
				       struct i915_request *hung_rq,
				       struct drm_printer *m)
{
	struct i915_request *rq;
	const char *msg;
	enum i915_request_state state;

	list_for_each_entry(rq, requests, sched.link) {
		if (rq == hung_rq)
			continue;

		state = i915_test_request_state(rq);
		if (state < I915_REQUEST_QUEUED)
			continue;

		if (state == I915_REQUEST_ACTIVE)
			msg = "\t\tactive on engine";
		else
			msg = "\t\tactive in queue";

		engine_dump_request(rq, m, msg);
	}
}

static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
{
	struct i915_request *hung_rq = NULL;
	struct intel_context *ce;
	bool guc;

	/*
	 * No need for an engine->irq_seqno_barrier() before the seqno reads.
	 * The GPU is still running so requests are still executing and any
	 * hardware reads will be out of date by the time they are reported.
	 * But the intention here is just to report an instantaneous snapshot
	 * so that's fine.
	 */
	lockdep_assert_held(&engine->sched_engine->lock);

	drm_printf(m, "\tRequests:\n");

	guc = intel_uc_uses_guc_submission(&engine->gt->uc);
	if (guc) {
		ce = intel_engine_get_hung_context(engine);
		if (ce)
			hung_rq = intel_context_find_active_request(ce);
	} else {
		hung_rq = intel_engine_execlist_find_hung_request(engine);
	}

	if (hung_rq)
		engine_dump_request(hung_rq, m, "\t\thung");

	if (guc)
		intel_guc_dump_active_requests(engine, hung_rq, m);
	else
		intel_engine_dump_active_requests(&engine->sched_engine->requests,
						  hung_rq, m);
}

2134 2135 2136 2137 2138
void intel_engine_dump(struct intel_engine_cs *engine,
		       struct drm_printer *m,
		       const char *header, ...)
{
	struct i915_gpu_error * const error = &engine->i915->gpu_error;
2139
	struct i915_request *rq;
2140
	intel_wakeref_t wakeref;
2141
	unsigned long flags;
2142
	ktime_t dummy;
2143 2144 2145 2146 2147 2148 2149 2150 2151

	if (header) {
		va_list ap;

		va_start(ap, header);
		drm_vprintf(m, header, &ap);
		va_end(ap);
	}

2152
	if (intel_gt_is_wedged(engine->gt))
2153 2154
		drm_printf(m, "*** WEDGED ***\n");

2155
	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
2156
	drm_printf(m, "\tBarriers?: %s\n",
L
Lucas De Marchi 已提交
2157
		   str_yes_no(!llist_empty(&engine->barrier_tasks)));
2158 2159
	drm_printf(m, "\tLatency: %luus\n",
		   ewma__engine_latency_read(&engine->latency));
2160 2161 2162 2163
	if (intel_engine_supports_stats(engine))
		drm_printf(m, "\tRuntime: %llums\n",
			   ktime_to_ms(intel_engine_get_busy_time(engine,
								  &dummy)));
2164
	drm_printf(m, "\tForcewake: %x domains, %d active\n",
2165
		   engine->fw_domain, READ_ONCE(engine->fw_active));
2166 2167 2168 2169 2170 2171 2172

	rcu_read_lock();
	rq = READ_ONCE(engine->heartbeat.systole);
	if (rq)
		drm_printf(m, "\tHeartbeat: %d ms ago\n",
			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
	rcu_read_unlock();
2173 2174 2175
	drm_printf(m, "\tReset count: %d (global %d)\n",
		   i915_reset_engine_count(error, engine),
		   i915_reset_count(error));
2176
	print_properties(engine, m);
2177

2178
	spin_lock_irqsave(&engine->sched_engine->lock, flags);
2179
	engine_dump_active_requests(engine, m);
2180

2181 2182 2183
	drm_printf(m, "\tOn hold?: %lu\n",
		   list_count(&engine->sched_engine->hold));
	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
2184

2185
	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
2186
	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
2187
	if (wakeref) {
2188
		intel_engine_print_registers(engine, m);
2189
		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
2190 2191 2192
	} else {
		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
	}
2193

C
Chris Wilson 已提交
2194
	intel_execlists_show_requests(engine, m, i915_request_show, 8);
2195

2196
	drm_printf(m, "HWSP:\n");
2197
	hexdump(m, engine->status_page.addr, PAGE_SIZE);
2198

L
Lucas De Marchi 已提交
2199
	drm_printf(m, "Idle? %s\n", str_yes_no(intel_engine_is_idle(engine)));
2200 2201

	intel_engine_print_breadcrumbs(engine, m);
2202 2203
}

2204 2205 2206
/**
 * intel_engine_get_busy_time() - Return current accumulated engine busyness
 * @engine: engine to report on
2207
 * @now: monotonic timestamp of sampling
2208 2209 2210
 *
 * Returns accumulated time @engine was busy since engine stats were enabled.
 */
2211
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
2212
{
2213
	return engine->busyness(engine, now);
2214 2215
}

2216 2217
struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings,
2218
			    unsigned int count, unsigned long flags)
2219 2220 2221 2222
{
	if (count == 0)
		return ERR_PTR(-EINVAL);

2223
	if (count == 1 && !(flags & FORCE_VIRTUAL))
2224 2225 2226
		return intel_context_create(siblings[0]);

	GEM_BUG_ON(!siblings[0]->cops->create_virtual);
2227
	return siblings[0]->cops->create_virtual(siblings, count, flags);
2228 2229
}

2230
struct i915_request *
2231
intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
2232 2233 2234
{
	struct i915_request *request, *active = NULL;

2235 2236 2237 2238 2239 2240 2241
	/*
	 * This search does not work in GuC submission mode. However, the GuC
	 * will report the hanging context directly to the driver itself. So
	 * the driver should never get here when in GuC mode.
	 */
	GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc));

2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252
	/*
	 * We are called by the error capture, reset and to dump engine
	 * state at random points in time. In particular, note that neither is
	 * crucially ordered with an interrupt. After a hang, the GPU is dead
	 * and we assume that no more writes can happen (we waited long enough
	 * for all writes that were in transaction to be flushed) - adding an
	 * extra delay for a recent interrupt is pointless. Hence, we do
	 * not need an engine->irq_seqno_barrier() before the seqno reads.
	 * At all other times, we must assume the GPU is still running, but
	 * we only care about the snapshot of this moment.
	 */
2253
	lockdep_assert_held(&engine->sched_engine->lock);
2254 2255 2256 2257 2258 2259 2260

	rcu_read_lock();
	request = execlists_active(&engine->execlists);
	if (request) {
		struct intel_timeline *tl = request->context->timeline;

		list_for_each_entry_from_reverse(request, &tl->requests, link) {
2261
			if (__i915_request_is_complete(request))
2262 2263 2264 2265 2266 2267 2268 2269 2270
				break;

			active = request;
		}
	}
	rcu_read_unlock();
	if (active)
		return active;

2271 2272
	list_for_each_entry(request, &engine->sched_engine->requests,
			    sched.link) {
2273
		if (i915_test_request_state(request) != I915_REQUEST_ACTIVE)
2274
			continue;
2275 2276 2277 2278 2279 2280 2281 2282

		active = request;
		break;
	}

	return active;
}

2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
{
	/*
	 * If there are any non-fused-off CCS engines, we need to enable CCS
	 * support in the RCU_MODE register.  This only needs to be done once,
	 * so for simplicity we'll take care of this in the RCS engine's
	 * resume handler; since the RCS and all CCS engines belong to the
	 * same reset domain and are reset together, this will also take care
	 * of re-applying the setting after i915-triggered resets.
	 */
	if (!CCS_MASK(engine->gt))
		return;

	intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
			   _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
}

2300
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2301
#include "mock_engine.c"
2302
#include "selftest_engine.c"
2303
#include "selftest_engine_cs.c"
2304
#endif