intel_workarounds.c 93.0 KB
Newer Older
C
Chris Wilson 已提交
1
// SPDX-License-Identifier: MIT
2 3 4 5 6
/*
 * Copyright © 2014-2018 Intel Corporation
 */

#include "i915_drv.h"
7
#include "intel_context.h"
8
#include "intel_engine_pm.h"
9
#include "intel_engine_regs.h"
10
#include "intel_gpu_commands.h"
11
#include "intel_gt.h"
12
#include "intel_gt_mcr.h"
13
#include "intel_gt_regs.h"
14
#include "intel_ring.h"
15 16 17 18 19
#include "intel_workarounds.h"

/**
 * DOC: Hardware workarounds
 *
20 21 22 23
 * Hardware workarounds are register programming documented to be executed in
 * the driver that fall outside of the normal programming sequences for a
 * platform. There are some basic categories of workarounds, depending on
 * how/when they are applied:
24
 *
25 26 27 28 29 30
 * - Context workarounds: workarounds that touch registers that are
 *   saved/restored to/from the HW context image. The list is emitted (via Load
 *   Register Immediate commands) once when initializing the device and saved in
 *   the default context. That default context is then used on every context
 *   creation to have a "primed golden context", i.e. a context image that
 *   already contains the changes needed to all the registers.
31
 *
32 33 34 35 36 37 38 39 40 41 42
 * - Engine workarounds: the list of these WAs is applied whenever the specific
 *   engine is reset. It's also possible that a set of engine classes share a
 *   common power domain and they are reset together. This happens on some
 *   platforms with render and compute engines. In this case (at least) one of
 *   them need to keeep the workaround programming: the approach taken in the
 *   driver is to tie those workarounds to the first compute/render engine that
 *   is registered.  When executing with GuC submission, engine resets are
 *   outside of kernel driver control, hence the list of registers involved in
 *   written once, on engine initialization, and then passed to GuC, that
 *   saves/restores their values before/after the reset takes place. See
 *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
43
 *
44 45 46 47 48 49 50 51 52 53 54 55 56 57
 * - GT workarounds: the list of these WAs is applied whenever these registers
 *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
 *
 * - Register whitelist: some workarounds need to be implemented in userspace,
 *   but need to touch privileged registers. The whitelist in the kernel
 *   instructs the hardware to allow the access to happen. From the kernel side,
 *   this is just a special case of a MMIO workaround (as we write the list of
 *   these to/be-whitelisted registers to some special HW registers).
 *
 * - Workaround batchbuffers: buffers that get executed automatically by the
 *   hardware on every HW context restore. These buffers are created and
 *   programmed in the default context so the hardware always go through those
 *   programming sequences when switching contexts. The support for workaround
 *   batchbuffers is enabled these hardware mechanisms:
58
 *
59 60 61 62
 *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
 *      context, pointing the hardware to jump to that location when that offset
 *      is reached in the context restore. Workaround batchbuffer in the driver
 *      currently uses this mechanism for all platforms.
63
 *
64 65 66 67
 *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
 *      pointing the hardware to a buffer to continue executing after the
 *      engine registers are restored in a context restore sequence. This is
 *      currently not used in the driver.
68
 *
69 70 71 72 73 74 75
 * - Other:  There are WAs that, due to their nature, cannot be applied from a
 *   central place. Those are peppered around the rest of the code, as needed.
 *   Workarounds related to the display IP are the main example.
 *
 * .. [1] Technically, some registers are powercontext saved & restored, so they
 *    survive a suspend/resume. In practice, writing them again is not too
 *    costly and simplifies things, so it's the approach taken in the driver.
76 77
 */

78 79
static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
			  const char *name, const char *engine_name)
80
{
81
	wal->gt = gt;
82
	wal->name = name;
83
	wal->engine_name = engine_name;
84 85
}

86 87
#define WA_LIST_CHUNK (1 << 4)

88 89
static void wa_init_finish(struct i915_wa_list *wal)
{
90 91 92 93 94 95 96 97 98 99 100 101
	/* Trim unused entries. */
	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
		struct i915_wa *list = kmemdup(wal->list,
					       wal->count * sizeof(*list),
					       GFP_KERNEL);

		if (list) {
			kfree(wal->list);
			wal->list = list;
		}
	}

102 103 104
	if (!wal->count)
		return;

105 106
	drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
		wal->wa_count, wal->name, wal->engine_name);
107 108
}

109
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
110
{
111
	unsigned int addr = i915_mmio_reg_offset(wa->reg);
112
	struct drm_i915_private *i915 = wal->gt->i915;
113
	unsigned int start = 0, end = wal->count;
114
	const unsigned int grow = WA_LIST_CHUNK;
115 116 117 118 119 120 121 122 123 124
	struct i915_wa *wa_;

	GEM_BUG_ON(!is_power_of_2(grow));

	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
		struct i915_wa *list;

		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
				     GFP_KERNEL);
		if (!list) {
125
			drm_err(&i915->drm, "No space for workaround init!\n");
126 127 128
			return;
		}

129
		if (wal->list) {
130
			memcpy(list, wal->list, sizeof(*wa) * wal->count);
131 132
			kfree(wal->list);
		}
133 134 135

		wal->list = list;
	}
136 137 138 139

	while (start < end) {
		unsigned int mid = start + (end - start) / 2;

140
		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
141
			start = mid + 1;
142
		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
143 144
			end = mid;
		} else {
145
			wa_ = &wal->list[mid];
146

147
			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
148 149 150 151
				drm_err(&i915->drm,
					"Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
					i915_mmio_reg_offset(wa_->reg),
					wa_->clr, wa_->set);
152

153
				wa_->set &= ~wa->clr;
154 155
			}

156
			wal->wa_count++;
157 158
			wa_->set |= wa->set;
			wa_->clr |= wa->clr;
159
			wa_->read |= wa->read;
160 161 162
			return;
		}
	}
163

164 165 166
	wal->wa_count++;
	wa_ = &wal->list[wal->count++];
	*wa_ = *wa;
167

168 169 170 171 172
	while (wa_-- > wal->list) {
		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
			   i915_mmio_reg_offset(wa_[1].reg));
		if (i915_mmio_reg_offset(wa_[1].reg) >
		    i915_mmio_reg_offset(wa_[0].reg))
173
			break;
174

175
		swap(wa_[1], wa_[0]);
176
	}
177 178
}

179
static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
180
		   u32 clear, u32 set, u32 read_mask, bool masked_reg)
181 182
{
	struct i915_wa wa = {
183
		.reg  = reg,
184 185
		.clr  = clear,
		.set  = set,
186
		.read = read_mask,
187
		.masked_reg = masked_reg,
188 189 190 191 192
	};

	_wa_add(wal, &wa);
}

193
static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
194 195 196
		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
{
	struct i915_wa wa = {
197
		.mcr_reg = reg,
198 199 200 201 202 203 204 205 206 207
		.clr  = clear,
		.set  = set,
		.read = read_mask,
		.masked_reg = masked_reg,
		.is_mcr = 1,
	};

	_wa_add(wal, &wa);
}

208
static void
209
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
210
{
211
	wa_add(wal, reg, clear, set, clear, false);
212 213
}

214
static void
215
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
216 217 218 219
{
	wa_mcr_add(wal, reg, clear, set, clear, false);
}

220
static void
221 222
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
223
	wa_write_clr_set(wal, reg, ~0, set);
224 225 226 227
}

static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
228
{
229
	wa_write_clr_set(wal, reg, set, set);
230 231
}

232
static void
233
wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
234 235 236 237
{
	wa_mcr_write_clr_set(wal, reg, set, set);
}

238 239 240
static void
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
{
241
	wa_write_clr_set(wal, reg, clr, 0);
242 243
}

244
static void
245
wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
246 247 248 249
{
	wa_mcr_write_clr_set(wal, reg, clr, 0);
}

250 251 252 253 254 255 256 257 258 259 260
/*
 * WA operations on "masked register". A masked register has the upper 16 bits
 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
 * portion of the register without a rmw: you simply write in the upper 16 bits
 * the mask of bits you are going to modify.
 *
 * The wa_masked_* family of functions already does the necessary operations to
 * calculate the mask based on the parameters passed, so user only has to
 * provide the lower 16 bits of that register.
 */

261
static void
262
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
263
{
264
	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
265 266
}

267
static void
268
wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
269 270 271 272
{
	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
}

273
static void
274
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
275
{
276
	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
277 278
}

279
static void
280
wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
281 282 283 284
{
	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
}

285 286 287 288
static void
wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
		    u32 mask, u32 val)
{
289
	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
290
}
291

292
static void
293
wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
294 295 296 297 298
			u32 mask, u32 val)
{
	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
}

299 300 301
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
302
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
303 304 305 306 307
}

static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
308
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
309 310
}

311 312
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
313
{
314
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
315 316

	/* WaDisableAsyncFlipPerfMode:bdw,chv */
317
	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
318 319

	/* WaDisablePartialInstShootdown:bdw,chv */
320 321
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
322 323

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
324
	 * workaround for a possible hang in the unlikely event a TLB
325 326 327 328
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:bdw,chv */
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
329 330 331
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
		     HDC_FORCE_NON_COHERENT);
332 333 334 335 336 337 338 339 340

	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
	 *  polygons in the same 8x4 pixel/sample area to be processed without
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
	 *  buffer."
	 *
	 * This optimization is off by default for BDW and CHV; turn it on.
	 */
341
	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
342 343

	/* Wa4x4STCOptimizationDisable:bdw,chv */
344
	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
345 346 347 348 349 350 351 352 353

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
354
	wa_masked_field_set(wal, GEN7_GT_MODE,
355 356 357 358
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
}

359 360
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
361
{
362
	struct drm_i915_private *i915 = engine->i915;
363

364
	gen8_ctx_workarounds_init(engine, wal);
365 366

	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
367
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
368 369 370

	/* WaDisableDopClockGating:bdw
	 *
371
	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
372 373
	 * to disable EUTC clock gating.
	 */
374 375
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
			 DOP_CLOCK_GATING_DISABLE);
376

377 378
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
			 GEN8_SAMPLER_POWER_BYPASS_DIS);
379

380 381 382 383 384
	wa_masked_en(wal, HDC_CHICKEN0,
		     /* WaForceContextSaveRestoreNonCoherent:bdw */
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
		     (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
385 386
}

387 388
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
389
{
390
	gen8_ctx_workarounds_init(engine, wal);
391 392

	/* WaDisableThreadStallDopClockGating:chv */
393
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
394 395

	/* Improve HiZ throughput on CHV. */
396
	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
397 398
}

399 400
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
401
{
402 403 404
	struct drm_i915_private *i915 = engine->i915;

	if (HAS_LLC(i915)) {
405 406 407 408 409
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
410 411
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
412 413
		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
414 415 416 417
	}

	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
418 419 420
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 FLOW_CONTROL_ENABLE |
			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
421 422 423

	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
424 425 426
	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
			 GEN9_ENABLE_YV12_BUGFIX |
			 GEN9_ENABLE_GPGPU_PREEMPTION);
427 428 429

	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
430 431 432
	wa_masked_en(wal, CACHE_MODE_1,
		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
433 434

	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
435 436
	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
			  GEN9_CCS_TLB_PREFETCH_ENABLE);
437 438

	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
439 440 441
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456

	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
	 * both tied to WaForceContextSaveRestoreNonCoherent
	 * in some hsds for skl. We keep the tie for all gen9. The
	 * documentation is a bit hazy and so we want to get common behaviour,
	 * even though there is no clear evidence we would need both on kbl/bxt.
	 * This area has been source of system hangs so we play it safe
	 * and mimic the skl regardless of what bspec says.
	 *
	 * Use Force Non-Coherent whenever executing a 3D context. This
	 * is a workaround for a possible hang in the unlikely event
	 * a TLB invalidation occurs during a PSD flush.
	 */

	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
457 458
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_NON_COHERENT);
459 460

	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
461 462 463 464
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915))
465 466
		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
				 GEN8_SAMPLER_POWER_BYPASS_DIS);
467 468

	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
469
	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
470 471 472 473 474 475 476 477 478 479 480 481 482

	/*
	 * Supporting preemption with fine-granularity requires changes in the
	 * batch buffer programming. Since we can't break old userspace, we
	 * need to set our default preemption level to safe value. Userspace is
	 * still able to use more fine-grained preemption levels, since in
	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
	 * not real HW workarounds, but merely a way to start using preemption
	 * while maintaining old contract with userspace.
	 */

	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
483
	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
484 485

	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
486
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
487 488 489
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

490
	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
491
	if (IS_GEN9_LP(i915))
492
		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
493 494
}

495 496
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
				struct i915_wa_list *wal)
497
{
498
	struct intel_gt *gt = engine->gt;
499 500 501 502 503 504 505 506 507 508
	u8 vals[3] = { 0, 0, 0 };
	unsigned int i;

	for (i = 0; i < 3; i++) {
		u8 ss;

		/*
		 * Only consider slices where one, and only one, subslice has 7
		 * EUs
		 */
509
		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
510 511 512 513 514 515 516 517
			continue;

		/*
		 * subslice_7eu[i] != 0 (because of the check above) and
		 * ss_max == 4 (maximum number of subslices possible per slice)
		 *
		 * ->    0 <= ss <= 3;
		 */
518
		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
519 520 521 522
		vals[i] = 3 - ss;
	}

	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
523
		return;
524 525

	/* Tune IZ hashing. See intel_device_info_runtime_init() */
526
	wa_masked_field_set(wal, GEN7_GT_MODE,
527 528 529 530 531 532 533 534
			    GEN9_IZ_HASHING_MASK(2) |
			    GEN9_IZ_HASHING_MASK(1) |
			    GEN9_IZ_HASHING_MASK(0),
			    GEN9_IZ_HASHING(2, vals[2]) |
			    GEN9_IZ_HASHING(1, vals[1]) |
			    GEN9_IZ_HASHING(0, vals[0]));
}

535 536
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
537
{
538 539
	gen9_ctx_workarounds_init(engine, wal);
	skl_tune_iz_hashing(engine, wal);
540
}
541

542 543
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
544
{
545
	gen9_ctx_workarounds_init(engine, wal);
546

547
	/* WaDisableThreadStallDopClockGating:bxt */
548 549
	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
			 STALL_DOP_GATING_DISABLE);
550 551

	/* WaToEnableHwFixForPushConstHWBug:bxt */
552 553
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
554 555
}

556 557
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
558
{
559
	struct drm_i915_private *i915 = engine->i915;
560

561
	gen9_ctx_workarounds_init(engine, wal);
562

563
	/* WaToEnableHwFixForPushConstHWBug:kbl */
564
	if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
565 566
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
567

568
	/* WaDisableSbeCacheDispatchPortSharing:kbl */
569 570
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
571 572
}

573 574
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
575
{
576
	gen9_ctx_workarounds_init(engine, wal);
577 578

	/* WaToEnableHwFixForPushConstHWBug:glk */
579 580
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
581 582
}

583 584
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
585
{
586
	gen9_ctx_workarounds_init(engine, wal);
587 588

	/* WaToEnableHwFixForPushConstHWBug:cfl */
589 590
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
591

592
	/* WaDisableSbeCacheDispatchPortSharing:cfl */
593 594
	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
595 596
}

597 598
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
599
{
600
	/* Wa_1406697149 (WaDisableBankHangMode:icl) */
601 602 603 604 605
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);

606 607 608 609 610 611 612
	/* WaForceEnableNonCoherent:icl
	 * This is not the same workaround as in early Gen9 platforms, where
	 * lacking this could cause system hangs, but coherency performance
	 * overhead is high and only a few compute workloads really need it
	 * (the register is whitelisted in hardware now, so UMDs can opt in
	 * for coherency if they have a good reason).
	 */
613
	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
614

615
	/* WaEnableFloatBlendOptimization:icl */
616 617 618 619
	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
		   0 /* write-only, so skip validation */,
		   true);
620 621

	/* WaDisableGPGPUMidThreadPreemption:icl */
622
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
623 624
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
625 626

	/* allow headerless messages for preemptible GPGPU context */
627 628
	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
M
Matt Roper 已提交
629 630 631

	/* Wa_1604278689:icl,ehl */
	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
632 633 634
	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
			 0, /* write-only register; skip validation */
			 0xFFFFFFFF);
M
Matt Roper 已提交
635 636

	/* Wa_1406306137:icl,ehl */
637
	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
638 639
}

640 641 642 643 644 645 646
/*
 * These settings aren't actually workarounds, but general tuning settings that
 * need to be programmed on dg2 platform.
 */
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
				   struct i915_wa_list *wal)
{
647
	wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
648 649 650 651 652 653 654
	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
	wa_mcr_add(wal,
		   XEHP_FF_MODE2,
		   FF_MODE2_TDS_TIMER_MASK,
		   FF_MODE2_TDS_TIMER_128,
		   0, false);
655 656
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
/*
 * These settings aren't actually workarounds, but general tuning settings that
 * need to be programmed on several platforms.
 */
static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	/*
	 * Although some platforms refer to it as Wa_1604555607, we need to
	 * program it even on those that don't explicitly list that
	 * workaround.
	 *
	 * Note that the programming of this register is further modified
	 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
	 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
	 * value when read. The default value for this register is zero for all
	 * fields and there are no bit masks. So instead of doing a RMW we
	 * should just write TDS timer value. For the same reason read
	 * verification is ignored.
	 */
	wa_add(wal,
678
	       GEN12_FF_MODE2,
679 680
	       FF_MODE2_TDS_TIMER_MASK,
	       FF_MODE2_TDS_TIMER_128,
681
	       0, false);
682 683
}

684 685
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
				       struct i915_wa_list *wal)
686
{
G
Gustavo Sousa 已提交
687 688
	struct drm_i915_private *i915 = engine->i915;

689 690
	gen12_ctx_gt_tuning_init(engine, wal);

691
	/*
692 693 694 695 696 697 698 699 700 701
	 * Wa_1409142259:tgl,dg1,adl-p
	 * Wa_1409347922:tgl,dg1,adl-p
	 * Wa_1409252684:tgl,dg1,adl-p
	 * Wa_1409217633:tgl,dg1,adl-p
	 * Wa_1409207793:tgl,dg1,adl-p
	 * Wa_1409178076:tgl,dg1,adl-p
	 * Wa_1408979724:tgl,dg1,adl-p
	 * Wa_14010443199:tgl,rkl,dg1,adl-p
	 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
	 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
702
	 */
703 704
	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
705

706
	/* WaDisableGPGPUMidThreadPreemption:gen12 */
707
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
708 709 710
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);

711
	/*
712
	 * Wa_16011163337
713
	 *
714 715
	 * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
	 * to Wa_1608008084.
716
	 */
717
	wa_add(wal,
718
	       GEN12_FF_MODE2,
719 720
	       FF_MODE2_GS_TIMER_MASK,
	       FF_MODE2_GS_TIMER_224,
721
	       0, false);
G
Gustavo Sousa 已提交
722 723 724 725

	if (!IS_DG1(i915))
		/* Wa_1806527549 */
		wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
726 727
}

728 729 730 731 732 733
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	gen12_ctx_workarounds_init(engine, wal);

	/* Wa_1409044764 */
734 735
	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
736 737

	/* Wa_22010493298 */
738 739
	wa_masked_en(wal, HIZ_CHICKEN,
		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
740 741
}

742 743 744
static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
745
	dg2_ctx_gt_tuning_init(engine, wal);
746 747 748

	/* Wa_16011186671:dg2_g11 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
749 750
		wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
		wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
751 752 753 754
	}

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010469329:dg2_g10 */
755 756
		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
				 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
757 758 759 760 761 762

		/*
		 * Wa_22010465075:dg2_g10
		 * Wa_22010613112:dg2_g10
		 * Wa_14010698770:dg2_g10
		 */
763 764
		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
				 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
765 766 767
	}

	/* Wa_16013271637:dg2 */
768 769
	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
C
Clint Taylor 已提交
770 771 772 773 774

	/* Wa_14014947963:dg2 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
		IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
		wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
M
Matt Roper 已提交
775 776 777

	/* Wa_15010599737:dg2 */
	wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
778 779
}

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
					 struct i915_wa_list *wal)
{
	/*
	 * This is a "fake" workaround defined by software to ensure we
	 * maintain reliable, backward-compatible behavior for userspace with
	 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
	 *
	 * The per-context setting of MI_MODE[12] determines whether the bits
	 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
	 * in the traditional manner or whether they should instead use a new
	 * tgl+ meaning that breaks backward compatibility, but allows nesting
	 * into 3rd-level batchbuffers.  When this new capability was first
	 * added in TGL, it remained off by default unless a context
	 * intentionally opted in to the new behavior.  However Xe_HPG now
	 * flips this on by default and requires that we explicitly opt out if
	 * we don't want the new behavior.
	 *
	 * From a SW perspective, we want to maintain the backward-compatible
	 * behavior for userspace, so we'll apply a fake workaround to set it
	 * back to the legacy behavior on platforms where the hardware default
	 * is to break compatibility.  At the moment there is no Linux
	 * userspace that utilizes third-level batchbuffers, so this will avoid
	 * userspace from needing to make any changes.  using the legacy
	 * meaning is the correct thing to do.  If/when we have userspace
	 * consumers that want to utilize third-level batch nesting, we can
	 * provide a context parameter to allow them to opt-in.
	 */
	wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
}

811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
				   struct i915_wa_list *wal)
{
	u8 mocs;

	/*
	 * Some blitter commands do not have a field for MOCS, those
	 * commands will use MOCS index pointed by BLIT_CCTL.
	 * BLIT_CCTL registers are needed to be programmed to un-cached.
	 */
	if (engine->class == COPY_ENGINE_CLASS) {
		mocs = engine->gt->mocs.uc_index;
		wa_write_clr_set(wal,
				 BLIT_CCTL(engine->mmio_base),
				 BLIT_CCTL_MASK,
				 BLIT_CCTL_MOCS(mocs, mocs));
	}
}

/*
 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
 * defined by the hardware team, but it programming general context registers.
 * Adding those context register programming in context workaround
 * allow us to use the wa framework for proper application and validation.
 */
static void
gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
			  struct i915_wa_list *wal)
{
	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
		fakewa_disable_nestedbb_mode(engine, wal);

	gen12_ctx_gt_mocs_init(engine, wal);
}

846 847 848 849
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
			   struct i915_wa_list *wal,
			   const char *name)
850
{
851 852
	struct drm_i915_private *i915 = engine->i915;

853
	wa_init_start(wal, engine->gt, name, engine->name);
854 855

	/* Applies to all engines */
856 857 858 859 860 861
	/*
	 * Fake workarounds are not the actual workaround but
	 * programming of context registers using workaround framework.
	 */
	if (GRAPHICS_VER(i915) >= 12)
		gen12_ctx_gt_fake_wa_init(engine, wal);
862

863
	if (engine->class != RENDER_CLASS)
864
		goto done;
865

866 867 868
	if (IS_PONTEVECCHIO(i915))
		; /* noop; none at this time */
	else if (IS_DG2(i915))
869 870
		dg2_ctx_workarounds_init(engine, wal);
	else if (IS_XEHPSDV(i915))
871 872
		; /* noop; none at this time */
	else if (IS_DG1(i915))
873
		dg1_ctx_workarounds_init(engine, wal);
874
	else if (GRAPHICS_VER(i915) == 12)
875
		gen12_ctx_workarounds_init(engine, wal);
876
	else if (GRAPHICS_VER(i915) == 11)
877
		icl_ctx_workarounds_init(engine, wal);
878
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
879
		cfl_ctx_workarounds_init(engine, wal);
880
	else if (IS_GEMINILAKE(i915))
881
		glk_ctx_workarounds_init(engine, wal);
882
	else if (IS_KABYLAKE(i915))
883
		kbl_ctx_workarounds_init(engine, wal);
884
	else if (IS_BROXTON(i915))
885
		bxt_ctx_workarounds_init(engine, wal);
886
	else if (IS_SKYLAKE(i915))
887
		skl_ctx_workarounds_init(engine, wal);
888
	else if (IS_CHERRYVIEW(i915))
889
		chv_ctx_workarounds_init(engine, wal);
890
	else if (IS_BROADWELL(i915))
891
		bdw_ctx_workarounds_init(engine, wal);
892
	else if (GRAPHICS_VER(i915) == 7)
893
		gen7_ctx_workarounds_init(engine, wal);
894
	else if (GRAPHICS_VER(i915) == 6)
895
		gen6_ctx_workarounds_init(engine, wal);
896
	else if (GRAPHICS_VER(i915) < 8)
897
		;
898
	else
899
		MISSING_CASE(GRAPHICS_VER(i915));
900

901
done:
902
	wa_init_finish(wal);
903 904
}

905 906 907 908 909
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
{
	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
}

910
int intel_engine_emit_ctx_wa(struct i915_request *rq)
911
{
912 913 914
	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
	struct i915_wa *wa;
	unsigned int i;
915
	u32 *cs;
916
	int ret;
917

918
	if (wal->count == 0)
919 920 921
		return 0;

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
922 923 924
	if (ret)
		return ret;

925
	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
926 927 928
	if (IS_ERR(cs))
		return PTR_ERR(cs);

929 930 931
	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		*cs++ = i915_mmio_reg_offset(wa->reg);
932
		*cs++ = wa->set;
933 934 935 936 937 938 939 940 941 942 943 944
	}
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
	if (ret)
		return ret;

	return 0;
}

945
static void
946
gen4_gt_workarounds_init(struct intel_gt *gt,
947
			 struct i915_wa_list *wal)
948
{
949 950 951 952 953
	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
}

static void
954
g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
955
{
956
	gen4_gt_workarounds_init(gt, wal);
957

958
	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
959
	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
960
}
961

962
static void
963
ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
964
{
965
	g4x_gt_workarounds_init(gt, wal);
966 967

	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
968 969
}

970
static void
971
snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
972 973 974
{
}

975
static void
976
ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
977 978 979 980 981 982 983 984 985 986 987 988 989 990
{
	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
	wa_masked_dis(wal,
		      GEN7_COMMON_SLICE_CHICKEN1,
		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);

	/* WaApplyL3ControlAndL3ChickenMode:ivb */
	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);

	/* WaForceL3Serialization:ivb */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
}

991
static void
992
vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
993 994 995 996 997 998 999 1000 1001 1002 1003
{
	/* WaForceL3Serialization:vlv */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);

	/*
	 * WaIncreaseL3CreditsForVLVB0:vlv
	 * This is the hardware default actually.
	 */
	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
}

1004
static void
1005
hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1006 1007 1008 1009 1010 1011 1012
{
	/* L3 caching of data atomics doesn't work -- disable it. */
	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);

	wa_add(wal,
	       HSW_ROW_CHICKEN3, 0,
	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1013
	       0 /* XXX does this reg exist? */, true);
1014 1015 1016 1017 1018

	/* WaVSRefCountFullforceMissDisable:hsw */
	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
}

1019 1020 1021
static void
gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
1022
	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
	unsigned int slice, subslice;
	u32 mcr, mcr_mask;

	GEM_BUG_ON(GRAPHICS_VER(i915) != 9);

	/*
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
	 * Before any MMIO read into slice/subslice specific registers, MCR
	 * packet control register needs to be programmed to point to any
	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
	 * This means each subsequent MMIO read will be forwarded to an
	 * specific s/ss combination, but this is OK since these registers
	 * are consistent across s/ss in almost all cases. In the rare
	 * occasions, such as INSTDONE, where this value is dependent
	 * on s/ss combo, the read should be done with read_subslice_reg.
	 */
	slice = ffs(sseu->slice_mask) - 1;
1040 1041
	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
	subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
	GEM_BUG_ON(!subslice);
	subslice--;

	/*
	 * We use GEN8_MCR..() macros to calculate the |mcr| value for
	 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
	 */
	mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
	mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;

	drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);

	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}

1057
static void
1058
gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1059
{
1060 1061
	struct drm_i915_private *i915 = gt->i915;

1062 1063 1064
	/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
	gen9_wa_init_mcr(i915, wal);

1065
	/* WaDisableKillLogic:bxt,skl,kbl */
1066
	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1067 1068 1069
		wa_write_or(wal,
			    GAM_ECOCHK,
			    ECOCHK_DIS_TLB);
1070

1071
	if (HAS_LLC(i915)) {
1072 1073 1074 1075 1076
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
1077 1078 1079
		wa_write_or(wal,
			    MMCD_MISC_CTRL,
			    MMCD_PCLA | MMCD_HOTSPOT_EN);
1080 1081 1082
	}

	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1083 1084 1085
	wa_write_or(wal,
		    GAM_ECOCHK,
		    BDW_DISABLE_HDC_INVALIDATION);
1086 1087
}

1088
static void
1089
skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1090
{
1091
	gen9_gt_workarounds_init(gt, wal);
1092 1093

	/* WaDisableGafsUnitClkGating:skl */
1094 1095 1096
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1097 1098

	/* WaInPlaceDecompressionHang:skl */
1099
	if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1100 1101 1102
		wa_write_or(wal,
			    GEN9_GAMT_ECO_REG_RW_IA,
			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1103 1104
}

1105
static void
1106
kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1107
{
1108
	gen9_gt_workarounds_init(gt, wal);
1109

1110
	/* WaDisableDynamicCreditSharing:kbl */
1111
	if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1112 1113 1114
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1115

1116
	/* WaDisableGafsUnitClkGating:kbl */
1117 1118 1119
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1120

1121
	/* WaInPlaceDecompressionHang:kbl */
1122 1123 1124
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1125
}
1126

1127
static void
1128
glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1129
{
1130
	gen9_gt_workarounds_init(gt, wal);
1131 1132
}

1133
static void
1134
cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1135
{
1136
	gen9_gt_workarounds_init(gt, wal);
1137 1138

	/* WaDisableGafsUnitClkGating:cfl */
1139 1140 1141
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1142

1143
	/* WaInPlaceDecompressionHang:cfl */
1144 1145 1146
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1147
}
1148

M
Matt Roper 已提交
1149 1150 1151
static void __set_mcr_steering(struct i915_wa_list *wal,
			       i915_reg_t steering_reg,
			       unsigned int slice, unsigned int subslice)
1152 1153 1154 1155 1156 1157
{
	u32 mcr, mcr_mask;

	mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
	mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;

M
Matt Roper 已提交
1158 1159 1160
	wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
}

1161
static void debug_dump_steering(struct intel_gt *gt)
M
Matt Roper 已提交
1162
{
1163
	struct drm_printer p = drm_debug_printer("MCR Steering:");
1164

1165 1166 1167 1168 1169 1170 1171
	if (drm_debug_enabled(DRM_UT_DRIVER))
		intel_gt_mcr_report_steering(&p, gt, false);
}

static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
			 unsigned int slice, unsigned int subslice)
{
M
Matt Roper 已提交
1172
	__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1173 1174 1175 1176

	gt->default_steering.groupid = slice;
	gt->default_steering.instanceid = subslice;

1177
	debug_dump_steering(gt);
1178 1179
}

1180
static void
1181
icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1182
{
1183
	const struct sseu_dev_info *sseu = &gt->info.sseu;
1184
	unsigned int subslice;
1185

1186
	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1187
	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1188

1189
	/*
1190 1191 1192 1193 1194 1195 1196
	 * Although a platform may have subslices, we need to always steer
	 * reads to the lowest instance that isn't fused off.  When Render
	 * Power Gating is enabled, grabbing forcewake will only power up a
	 * single subslice (the "minconfig") if there isn't a real workload
	 * that needs to be run; this means that if we steer register reads to
	 * one of the higher subslices, we run the risk of reading back 0's or
	 * random garbage.
1197
	 */
1198
	subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1199

1200 1201 1202 1203 1204
	/*
	 * If the subslice we picked above also steers us to a valid L3 bank,
	 * then we can just rely on the default steering and won't need to
	 * worry about explicitly re-steering L3BANK reads later.
	 */
1205 1206
	if (gt->info.l3bank_mask & BIT(subslice))
		gt->steering_table[L3BANK] = NULL;
1207

1208
	__add_mcr_wa(gt, wal, 0, subslice);
1209
}
1210

1211 1212 1213 1214 1215 1216 1217
static void
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
	const struct sseu_dev_info *sseu = &gt->info.sseu;
	unsigned long slice, subslice = 0, slice_mask = 0;
	u32 lncf_mask = 0;
	int i;
1218

1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
	/*
	 * On Xe_HP the steering increases in complexity. There are now several
	 * more units that require steering and we're not guaranteed to be able
	 * to find a common setting for all of them. These are:
	 * - GSLICE (fusable)
	 * - DSS (sub-unit within gslice; fusable)
	 * - L3 Bank (fusable)
	 * - MSLICE (fusable)
	 * - LNCF (sub-unit within mslice; always present if mslice is present)
	 *
	 * We'll do our default/implicit steering based on GSLICE (in the
	 * sliceid field) and DSS (in the subsliceid field).  If we can
	 * find overlap between the valid MSLICE and/or LNCF values with
	 * a suitable GSLICE, then we can just re-use the default value and
	 * skip and explicit steering at runtime.
	 *
	 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
	 * a valid sliceid value.  DSS steering is the only type of steering
	 * that utilizes the 'subsliceid' bits.
	 *
	 * Also note that, even though the steering domain is called "GSlice"
	 * and it is encoded in the register using the gslice format, the spec
	 * says that the combined (geometry | compute) fuse should be used to
	 * select the steering.
	 */

	/* Find the potential gslice candidates */
1246 1247
	slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
						       GEN_DSS_PER_GSLICE);
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270

	/*
	 * Find the potential LNCF candidates.  Either LNCF within a valid
	 * mslice is fine.
	 */
	for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
		lncf_mask |= (0x3 << (i * 2));

	/*
	 * Are there any sliceid values that work for both GSLICE and LNCF
	 * steering?
	 */
	if (slice_mask & lncf_mask) {
		slice_mask &= lncf_mask;
		gt->steering_table[LNCF] = NULL;
	}

	/* How about sliceid values that also work for MSLICE steering? */
	if (slice_mask & gt->info.mslice_mask) {
		slice_mask &= gt->info.mslice_mask;
		gt->steering_table[MSLICE] = NULL;
	}

1271 1272 1273
	if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
		gt->steering_table[GAM] = NULL;

1274
	slice = __ffs(slice_mask);
1275 1276
	subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
		GEN_DSS_PER_GSLICE;
1277

1278
	__add_mcr_wa(gt, wal, slice, subslice);
M
Matt Roper 已提交
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290

	/*
	 * SQIDI ranges are special because they use different steering
	 * registers than everything else we work with.  On XeHP SDV and
	 * DG2-G10, any value in the steering registers will work fine since
	 * all instances are present, but DG2-G11 only has SQIDI instances at
	 * ID's 2 and 3, so we need to steer to one of those.  For simplicity
	 * we'll just steer to a hardcoded "2" since that value will work
	 * everywhere.
	 */
	__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
	__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1291 1292 1293 1294 1295 1296 1297

	/*
	 * On DG2, GAM registers have a dedicated steering control register
	 * and must always be programmed to a hardcoded groupid of "1."
	 */
	if (IS_DG2(gt->i915))
		__set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1298 1299
}

M
Matt Roper 已提交
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
static void
pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
	unsigned int dss;

	/*
	 * Setup implicit steering for COMPUTE and DSS ranges to the first
	 * non-fused-off DSS.  All other types of MCR registers will be
	 * explicitly steered.
	 */
	dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
	__add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
}

1314
static void
1315
icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1316
{
1317 1318 1319
	struct drm_i915_private *i915 = gt->i915;

	icl_wa_init_mcr(gt, wal);
1320

1321
	/* WaModifyGamTlbPartitioning:icl */
1322 1323 1324 1325
	wa_write_clr_set(wal,
			 GEN11_GACB_PERF_CTRL,
			 GEN11_HASH_CTRL_MASK,
			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
O
Oscar Mateo 已提交
1326

O
Oscar Mateo 已提交
1327 1328 1329
	/* Wa_1405766107:icl
	 * Formerly known as WaCL2SFHalfMaxAlloc
	 */
1330 1331 1332 1333
	wa_write_or(wal,
		    GEN11_LSN_UNSLCVC,
		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
O
Oscar Mateo 已提交
1334 1335 1336 1337

	/* Wa_220166154:icl
	 * Formerly known as WaDisCtxReload
	 */
1338 1339 1340
	wa_write_or(wal,
		    GEN8_GAMW_ECO_DEV_RW_IA,
		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
O
Oscar Mateo 已提交
1341

O
Oscar Mateo 已提交
1342 1343 1344
	/* Wa_1406463099:icl
	 * Formerly known as WaGamTlbPendError
	 */
1345 1346 1347
	wa_write_or(wal,
		    GAMT_CHKN_BIT_REG,
		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
M
Mika Kuoppala 已提交
1348

1349 1350 1351 1352 1353
	/* Wa_1407352427:icl,ehl */
	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
		    PSDUNIT_CLKGATE_DIS);

	/* Wa_1406680159:icl,ehl */
1354 1355 1356
	wa_mcr_write_or(wal,
			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
			GWUNIT_CLKGATE_DIS);
1357

1358 1359
	/* Wa_1607087056:icl,ehl,jsl */
	if (IS_ICELAKE(i915) ||
1360
	    IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1361
		wa_write_or(wal,
1362
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1363
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1364 1365 1366 1367 1368

	/*
	 * This is not a documented workaround, but rather an optimization
	 * to reduce sampler power.
	 */
1369
	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1370 1371
}

1372 1373 1374 1375 1376 1377 1378
/*
 * Though there are per-engine instances of these registers,
 * they retain their value through engine resets and should
 * only be provided on the GT workaround list rather than
 * the engine-specific workaround list.
 */
static void
1379
wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
{
	struct intel_engine_cs *engine;
	int id;

	for_each_engine(engine, gt, id) {
		if (engine->class != VIDEO_DECODE_CLASS ||
		    (engine->instance % 2))
			continue;

		wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
			    IECPUNIT_CLKGATE_DIS);
	}
}

1394
static void
1395
gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1396
{
1397
	icl_wa_init_mcr(gt, wal);
1398

1399
	/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1400
	wa_14011060649(gt, wal);
1401 1402

	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1403
	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1404 1405 1406
}

static void
1407
tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1408
{
1409 1410 1411
	struct drm_i915_private *i915 = gt->i915;

	gen12_gt_workarounds_init(gt, wal);
1412

M
Mika Kuoppala 已提交
1413
	/* Wa_1409420604:tgl */
1414
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1415 1416 1417
		wa_mcr_write_or(wal,
				SUBSLICE_UNIT_LEVEL_CLKGATE2,
				CPSSUNIT_CLKGATE_DIS);
M
Mika Kuoppala 已提交
1418

1419
	/* Wa_1607087056:tgl also know as BUG:1409180338 */
1420
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
M
Mika Kuoppala 已提交
1421
		wa_write_or(wal,
1422
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
M
Mika Kuoppala 已提交
1423
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1424 1425

	/* Wa_1408615072:tgl[a0] */
1426
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1427 1428
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
1429 1430
}

1431
static void
1432
dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1433
{
1434 1435 1436
	struct drm_i915_private *i915 = gt->i915;

	gen12_gt_workarounds_init(gt, wal);
1437 1438

	/* Wa_1607087056:dg1 */
1439
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1440
		wa_write_or(wal,
1441
			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1442 1443 1444 1445
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);

	/* Wa_1409420604:dg1 */
	if (IS_DG1(i915))
1446 1447 1448
		wa_mcr_write_or(wal,
				SUBSLICE_UNIT_LEVEL_CLKGATE2,
				CPSSUNIT_CLKGATE_DIS);
1449 1450 1451 1452 1453 1454 1455 1456

	/* Wa_1408615072:dg1 */
	/* Empirical testing shows this register is unaffected by engine reset. */
	if (IS_DG1(i915))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
}

1457
static void
1458
xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1459
{
1460 1461
	struct drm_i915_private *i915 = gt->i915;

1462
	xehp_init_mcr(gt, wal);
1463 1464

	/* Wa_1409757795:xehpsdv */
1465
	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508

	/* Wa_16011155590:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    TSGUNIT_CLKGATE_DIS);

	/* Wa_14011780169:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
			    GAMTLBVDBOX7_CLKGATE_DIS |
			    GAMTLBVDBOX6_CLKGATE_DIS |
			    GAMTLBVDBOX5_CLKGATE_DIS |
			    GAMTLBVDBOX4_CLKGATE_DIS |
			    GAMTLBVDBOX3_CLKGATE_DIS |
			    GAMTLBVDBOX2_CLKGATE_DIS |
			    GAMTLBVDBOX1_CLKGATE_DIS |
			    GAMTLBVDBOX0_CLKGATE_DIS |
			    GAMTLBKCR_CLKGATE_DIS |
			    GAMTLBGUC_CLKGATE_DIS |
			    GAMTLBBLT_CLKGATE_DIS);
		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
			    GAMTLBGFXA1_CLKGATE_DIS |
			    GAMTLBCOMPA0_CLKGATE_DIS |
			    GAMTLBCOMPA1_CLKGATE_DIS |
			    GAMTLBCOMPB0_CLKGATE_DIS |
			    GAMTLBCOMPB1_CLKGATE_DIS |
			    GAMTLBCOMPC0_CLKGATE_DIS |
			    GAMTLBCOMPC1_CLKGATE_DIS |
			    GAMTLBCOMPD0_CLKGATE_DIS |
			    GAMTLBCOMPD1_CLKGATE_DIS |
			    GAMTLBMERT_CLKGATE_DIS   |
			    GAMTLBVEBOX3_CLKGATE_DIS |
			    GAMTLBVEBOX2_CLKGATE_DIS |
			    GAMTLBVEBOX1_CLKGATE_DIS |
			    GAMTLBVEBOX0_CLKGATE_DIS);
	}

	/* Wa_16012725990:xehpsdv */
	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);

	/* Wa_14011060649:xehpsdv */
	wa_14011060649(gt, wal);
1509 1510
}

1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	struct intel_engine_cs *engine;
	int id;

	xehp_init_mcr(gt, wal);

	/* Wa_14011060649:dg2 */
	wa_14011060649(gt, wal);

	/*
	 * Although there are per-engine instances of these registers,
	 * they technically exist outside the engine itself and are not
	 * impacted by engine resets.  Furthermore, they're part of the
	 * GuC blacklist so trying to treat them as engine workarounds
	 * will result in GuC initialization failure and a wedged GPU.
	 */
	for_each_engine(engine, gt, id) {
		if (engine->class != VIDEO_DECODE_CLASS)
			continue;

		/* Wa_16010515920:dg2_g10 */
		if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
			wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
				    ALNUNIT_CLKGATE_DIS);
	}

	if (IS_DG2_G10(gt->i915)) {
		/* Wa_22010523718:dg2 */
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    CG3DDISCFEG_CLKGATE_DIS);

		/* Wa_14011006942:dg2 */
1545 1546
		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
				DSS_ROUTER_CLKGATE_DIS);
1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
	}

	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010948348:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);

		/* Wa_14011037102:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);

		/* Wa_14011371254:dg2_g10 */
1557
		wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592

		/* Wa_14011431319:dg2_g10 */
		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
			    GAMTLBVDBOX7_CLKGATE_DIS |
			    GAMTLBVDBOX6_CLKGATE_DIS |
			    GAMTLBVDBOX5_CLKGATE_DIS |
			    GAMTLBVDBOX4_CLKGATE_DIS |
			    GAMTLBVDBOX3_CLKGATE_DIS |
			    GAMTLBVDBOX2_CLKGATE_DIS |
			    GAMTLBVDBOX1_CLKGATE_DIS |
			    GAMTLBVDBOX0_CLKGATE_DIS |
			    GAMTLBKCR_CLKGATE_DIS |
			    GAMTLBGUC_CLKGATE_DIS |
			    GAMTLBBLT_CLKGATE_DIS);
		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
			    GAMTLBGFXA1_CLKGATE_DIS |
			    GAMTLBCOMPA0_CLKGATE_DIS |
			    GAMTLBCOMPA1_CLKGATE_DIS |
			    GAMTLBCOMPB0_CLKGATE_DIS |
			    GAMTLBCOMPB1_CLKGATE_DIS |
			    GAMTLBCOMPC0_CLKGATE_DIS |
			    GAMTLBCOMPC1_CLKGATE_DIS |
			    GAMTLBCOMPD0_CLKGATE_DIS |
			    GAMTLBCOMPD1_CLKGATE_DIS |
			    GAMTLBMERT_CLKGATE_DIS   |
			    GAMTLBVEBOX3_CLKGATE_DIS |
			    GAMTLBVEBOX2_CLKGATE_DIS |
			    GAMTLBVEBOX1_CLKGATE_DIS |
			    GAMTLBVEBOX0_CLKGATE_DIS);

		/* Wa_14010569222:dg2_g10 */
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    GAMEDIA_CLKGATE_DIS);

		/* Wa_14011028019:dg2_g10 */
1593
		wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
1594 1595 1596
	}

	/* Wa_14014830051:dg2 */
1597
	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1598 1599 1600 1601 1602 1603

	/*
	 * The following are not actually "workarounds" but rather
	 * recommended tuning settings documented in the bspec's
	 * performance guide section.
	 */
1604
	wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1605 1606

	/* Wa_14015795083 */
1607
	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1608 1609
}

M
Matt Roper 已提交
1610 1611 1612
static void
pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
M
Matt Roper 已提交
1613 1614
	pvc_init_mcr(gt, wal);

M
Matt Roper 已提交
1615
	/* Wa_14015795083 */
1616
	wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
M
Matt Roper 已提交
1617 1618
}

1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	/* FIXME: Actual workarounds will be added in future patch(es) */

	/*
	 * Unlike older platforms, we no longer setup implicit steering here;
	 * all MCR accesses are explicitly steered.
	 */
	debug_dump_steering(gt);
}

1631 1632 1633 1634 1635 1636
static void
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
	/* FIXME: Actual workarounds will be added in future patch(es) */

	debug_dump_steering(gt);
M
Matt Roper 已提交
1637 1638
}

1639
static void
1640
gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1641
{
1642 1643
	struct drm_i915_private *i915 = gt->i915;

1644 1645 1646 1647 1648 1649
	if (gt->type == GT_MEDIA) {
		if (MEDIA_VER(i915) >= 13)
			xelpmp_gt_workarounds_init(gt, wal);
		else
			MISSING_CASE(MEDIA_VER(i915));

1650
		return;
1651
	}
1652 1653 1654 1655

	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
		xelpg_gt_workarounds_init(gt, wal);
	else if (IS_PONTEVECCHIO(i915))
M
Matt Roper 已提交
1656
		pvc_gt_workarounds_init(gt, wal);
1657
	else if (IS_DG2(i915))
1658 1659
		dg2_gt_workarounds_init(gt, wal);
	else if (IS_XEHPSDV(i915))
1660
		xehpsdv_gt_workarounds_init(gt, wal);
1661
	else if (IS_DG1(i915))
1662
		dg1_gt_workarounds_init(gt, wal);
1663
	else if (IS_TIGERLAKE(i915))
1664
		tgl_gt_workarounds_init(gt, wal);
1665
	else if (GRAPHICS_VER(i915) == 12)
1666
		gen12_gt_workarounds_init(gt, wal);
1667
	else if (GRAPHICS_VER(i915) == 11)
1668
		icl_gt_workarounds_init(gt, wal);
1669
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1670
		cfl_gt_workarounds_init(gt, wal);
1671
	else if (IS_GEMINILAKE(i915))
1672
		glk_gt_workarounds_init(gt, wal);
1673
	else if (IS_KABYLAKE(i915))
1674
		kbl_gt_workarounds_init(gt, wal);
1675
	else if (IS_BROXTON(i915))
1676
		gen9_gt_workarounds_init(gt, wal);
1677
	else if (IS_SKYLAKE(i915))
1678
		skl_gt_workarounds_init(gt, wal);
1679
	else if (IS_HASWELL(i915))
1680
		hsw_gt_workarounds_init(gt, wal);
1681
	else if (IS_VALLEYVIEW(i915))
1682
		vlv_gt_workarounds_init(gt, wal);
1683
	else if (IS_IVYBRIDGE(i915))
1684
		ivb_gt_workarounds_init(gt, wal);
1685
	else if (GRAPHICS_VER(i915) == 6)
1686
		snb_gt_workarounds_init(gt, wal);
1687
	else if (GRAPHICS_VER(i915) == 5)
1688
		ilk_gt_workarounds_init(gt, wal);
1689
	else if (IS_G4X(i915))
1690
		g4x_gt_workarounds_init(gt, wal);
1691
	else if (GRAPHICS_VER(i915) == 4)
1692
		gen4_gt_workarounds_init(gt, wal);
1693
	else if (GRAPHICS_VER(i915) <= 8)
1694
		;
1695
	else
1696
		MISSING_CASE(GRAPHICS_VER(i915));
1697 1698
}

1699
void intel_gt_init_workarounds(struct intel_gt *gt)
1700
{
1701
	struct i915_wa_list *wal = &gt->wa_list;
1702

1703
	wa_init_start(wal, gt, "GT", "global");
1704
	gt_init_workarounds(gt, wal);
1705 1706 1707 1708
	wa_init_finish(wal);
}

static enum forcewake_domains
1709
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1710 1711 1712 1713 1714 1715
{
	enum forcewake_domains fw = 0;
	struct i915_wa *wa;
	unsigned int i;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1716
		fw |= intel_uncore_forcewake_for_reg(uncore,
1717 1718 1719 1720 1721 1722 1723
						     wa->reg,
						     FW_REG_READ |
						     FW_REG_WRITE);

	return fw;
}

1724
static bool
1725 1726
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
	  const char *name, const char *from)
1727
{
1728
	if ((cur ^ wa->set) & wa->read) {
1729 1730 1731 1732
		drm_err(&gt->i915->drm,
			"%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
			name, from, i915_mmio_reg_offset(wa->reg),
			cur, cur & wa->read, wa->set & wa->read);
1733 1734 1735 1736 1737 1738 1739

		return false;
	}

	return true;
}

1740
static void wa_list_apply(const struct i915_wa_list *wal)
1741
{
1742
	struct intel_gt *gt = wal->gt;
1743
	struct intel_uncore *uncore = gt->uncore;
1744 1745 1746 1747 1748 1749 1750 1751
	enum forcewake_domains fw;
	unsigned long flags;
	struct i915_wa *wa;
	unsigned int i;

	if (!wal->count)
		return;

1752
	fw = wal_get_fw_for_rmw(uncore, wal);
1753

1754 1755
	spin_lock_irqsave(&uncore->lock, flags);
	intel_uncore_forcewake_get__locked(uncore, fw);
1756 1757

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1758 1759 1760
		u32 val, old = 0;

		/* open-coded rmw due to steering */
1761 1762
		if (wa->clr)
			old = wa->is_mcr ?
1763
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1764
				intel_uncore_read_fw(uncore, wa->reg);
1765
		val = (old & ~wa->clr) | wa->set;
1766 1767
		if (val != old || !wa->clr) {
			if (wa->is_mcr)
1768
				intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1769 1770 1771 1772 1773 1774
			else
				intel_uncore_write_fw(uncore, wa->reg, val);
		}

		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
			u32 val = wa->is_mcr ?
1775
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1776
				intel_uncore_read_fw(uncore, wa->reg);
1777

1778
			wa_verify(gt, wa, val, wal->name, "application");
1779
		}
1780 1781
	}

1782 1783
	intel_uncore_forcewake_put__locked(uncore, fw);
	spin_unlock_irqrestore(&uncore->lock, flags);
1784 1785
}

1786
void intel_gt_apply_workarounds(struct intel_gt *gt)
1787
{
1788
	wa_list_apply(&gt->wa_list);
1789 1790
}

1791
static bool wa_list_verify(struct intel_gt *gt,
1792 1793 1794
			   const struct i915_wa_list *wal,
			   const char *from)
{
1795
	struct intel_uncore *uncore = gt->uncore;
1796
	struct i915_wa *wa;
1797 1798
	enum forcewake_domains fw;
	unsigned long flags;
1799 1800 1801
	unsigned int i;
	bool ok = true;

1802 1803 1804 1805 1806
	fw = wal_get_fw_for_rmw(uncore, wal);

	spin_lock_irqsave(&uncore->lock, flags);
	intel_uncore_forcewake_get__locked(uncore, fw);

1807
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1808
		ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1809
				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1810
				intel_uncore_read_fw(uncore, wa->reg),
1811
				wal->name, from);
1812

1813 1814 1815
	intel_uncore_forcewake_put__locked(uncore, fw);
	spin_unlock_irqrestore(&uncore->lock, flags);

1816 1817 1818
	return ok;
}

1819
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1820
{
1821
	return wa_list_verify(gt, &gt->wa_list, from);
1822 1823
}

1824
__maybe_unused
C
Chris Wilson 已提交
1825
static bool is_nonpriv_flags_valid(u32 flags)
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838
{
	/* Check only valid flag bits are set */
	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
		return false;

	/* NB: Only 3 out of 4 enum values are valid for access field */
	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
		return false;

	return true;
}

1839
static void
1840
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1841
{
1842 1843 1844
	struct i915_wa wa = {
		.reg = reg
	};
1845

1846 1847
	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;
1848

1849 1850 1851
	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1852
	wa.reg.reg |= flags;
1853
	_wa_add(wal, &wa);
1854 1855
}

1856
static void
1857
whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1858 1859
{
	struct i915_wa wa = {
1860
		.mcr_reg = reg,
1861 1862 1863 1864 1865 1866 1867 1868 1869
		.is_mcr = 1,
	};

	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;

	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1870
	wa.mcr_reg.reg |= flags;
1871 1872 1873
	_wa_add(wal, &wa);
}

1874 1875 1876
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
1877
	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1878 1879
}

1880
static void
1881
whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1882 1883 1884 1885
{
	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
}

1886
static void gen9_whitelist_build(struct i915_wa_list *w)
1887 1888
{
	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1889
	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1890 1891

	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1892
	whitelist_reg(w, GEN8_CS_CHICKEN1);
1893 1894

	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1895
	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1896 1897 1898

	/* WaSendPushConstantsFromMMIO:skl,bxt */
	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1899 1900
}

1901
static void skl_whitelist_build(struct intel_engine_cs *engine)
1902
{
1903 1904 1905 1906 1907
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1908
	gen9_whitelist_build(w);
1909 1910

	/* WaDisableLSQCROPERFforOCL:skl */
1911
	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1912 1913
}

1914
static void bxt_whitelist_build(struct intel_engine_cs *engine)
1915
{
1916 1917 1918 1919
	if (engine->class != RENDER_CLASS)
		return;

	gen9_whitelist_build(&engine->whitelist);
1920 1921
}

1922
static void kbl_whitelist_build(struct intel_engine_cs *engine)
1923
{
1924 1925 1926 1927 1928
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1929
	gen9_whitelist_build(w);
1930

1931
	/* WaDisableLSQCROPERFforOCL:kbl */
1932
	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1933 1934
}

1935
static void glk_whitelist_build(struct intel_engine_cs *engine)
1936
{
1937 1938 1939 1940 1941
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1942
	gen9_whitelist_build(w);
1943

1944
	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1945
	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1946
}
1947

1948
static void cfl_whitelist_build(struct intel_engine_cs *engine)
1949
{
1950 1951
	struct i915_wa_list *w = &engine->whitelist;

1952 1953 1954
	if (engine->class != RENDER_CLASS)
		return;

1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
	gen9_whitelist_build(w);

	/*
	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
	 *
	 * This covers 4 register which are next to one another :
	 *   - PS_INVOCATION_COUNT
	 *   - PS_INVOCATION_COUNT_UDW
	 *   - PS_DEPTH_COUNT
	 *   - PS_DEPTH_COUNT_UDW
	 */
	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1967
			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1968
			  RING_FORCE_TO_NONPRIV_RANGE_4);
1969 1970
}

1971
static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
1972 1973 1974 1975 1976 1977 1978
{
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1979 1980 1981 1982 1983
}

static void cml_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);
1984 1985 1986 1987

	cfl_whitelist_build(engine);
}

1988
static void icl_whitelist_build(struct intel_engine_cs *engine)
1989
{
1990 1991
	struct i915_wa_list *w = &engine->whitelist;

1992 1993
	allow_read_ctx_timestamp(engine);

1994 1995 1996
	switch (engine->class) {
	case RENDER_CLASS:
		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1997
		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1998 1999

		/* WaAllowUMDToModifySamplerMode:icl */
2000
		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2001 2002 2003

		/* WaEnableStateCacheRedirectToCS:icl */
		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014

		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
		 *
		 * This covers 4 register which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2015
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2016
				  RING_FORCE_TO_NONPRIV_RANGE_4);
2017 2018 2019 2020 2021
		break;

	case VIDEO_DECODE_CLASS:
		/* hucStatusRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2022
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2023 2024
		/* hucUKernelHdrInfoRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2025
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2026 2027
		/* hucStatus2RegOffset */
		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2028
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2029 2030 2031 2032 2033
		break;

	default:
		break;
	}
2034 2035
}

2036 2037
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
2038 2039
	struct i915_wa_list *w = &engine->whitelist;

2040 2041
	allow_read_ctx_timestamp(engine);

2042 2043 2044 2045
	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2046
		 * Wa_1408556865:tgl
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
				  RING_FORCE_TO_NONPRIV_RANGE_4);
2057

2058 2059 2060 2061 2062
		/*
		 * Wa_1808121037:tgl
		 * Wa_14012131227:dg1
		 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
		 */
2063
		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2064 2065 2066

		/* Wa_1806527549:tgl */
		whitelist_reg(w, HIZ_CHICKEN);
2067 2068 2069 2070
		break;
	default:
		break;
	}
2071 2072
}

2073 2074 2075 2076 2077 2078 2079
static void dg1_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	tgl_whitelist_build(engine);

	/* GEN:BUG:1409280441:dg1 */
2080
	if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
2081 2082 2083 2084 2085 2086
	    (engine->class == RENDER_CLASS ||
	     engine->class == COPY_ENGINE_CLASS))
		whitelist_reg_ext(w, RING_ID(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
}

2087 2088 2089 2090 2091
static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);
}

2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114
static void dg2_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	allow_read_ctx_timestamp(engine);

	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * Wa_1507100340:dg2_g10
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
			whitelist_reg_ext(w, PS_INVOCATION_COUNT,
					  RING_FORCE_TO_NONPRIV_ACCESS_RD |
					  RING_FORCE_TO_NONPRIV_RANGE_4);

		break;
2115 2116 2117 2118 2119
	case COMPUTE_CLASS:
		/* Wa_16011157294:dg2_g10 */
		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
			whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
		break;
2120 2121 2122 2123 2124
	default:
		break;
	}
}

2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150
static void blacklist_trtt(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	/*
	 * Prevent read/write access to [0x4400, 0x4600) which covers
	 * the TRTT range across all engines. Note that normally userspace
	 * cannot access the other engines' trtt control, but for simplicity
	 * we cover the entire range on each engine.
	 */
	whitelist_reg_ext(w, _MMIO(0x4400),
			  RING_FORCE_TO_NONPRIV_DENY |
			  RING_FORCE_TO_NONPRIV_RANGE_64);
	whitelist_reg_ext(w, _MMIO(0x4500),
			  RING_FORCE_TO_NONPRIV_DENY |
			  RING_FORCE_TO_NONPRIV_RANGE_64);
}

static void pvc_whitelist_build(struct intel_engine_cs *engine)
{
	allow_read_ctx_timestamp(engine);

	/* Wa_16014440446:pvc */
	blacklist_trtt(engine);
}

2151
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2152 2153
{
	struct drm_i915_private *i915 = engine->i915;
2154
	struct i915_wa_list *w = &engine->whitelist;
2155

2156
	wa_init_start(w, engine->gt, "whitelist", engine->name);
2157

2158 2159 2160
	if (IS_PONTEVECCHIO(i915))
		pvc_whitelist_build(engine);
	else if (IS_DG2(i915))
2161 2162
		dg2_whitelist_build(engine);
	else if (IS_XEHPSDV(i915))
2163 2164
		xehpsdv_whitelist_build(engine);
	else if (IS_DG1(i915))
2165
		dg1_whitelist_build(engine);
2166
	else if (GRAPHICS_VER(i915) == 12)
2167
		tgl_whitelist_build(engine);
2168
	else if (GRAPHICS_VER(i915) == 11)
2169
		icl_whitelist_build(engine);
2170 2171 2172
	else if (IS_COMETLAKE(i915))
		cml_whitelist_build(engine);
	else if (IS_COFFEELAKE(i915))
2173
		cfl_whitelist_build(engine);
2174
	else if (IS_GEMINILAKE(i915))
2175
		glk_whitelist_build(engine);
2176
	else if (IS_KABYLAKE(i915))
2177
		kbl_whitelist_build(engine);
2178
	else if (IS_BROXTON(i915))
2179
		bxt_whitelist_build(engine);
2180
	else if (IS_SKYLAKE(i915))
2181
		skl_whitelist_build(engine);
2182
	else if (GRAPHICS_VER(i915) <= 8)
2183
		;
2184
	else
2185
		MISSING_CASE(GRAPHICS_VER(i915));
2186

2187
	wa_init_finish(w);
2188 2189
}

2190
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2191
{
2192
	const struct i915_wa_list *wal = &engine->whitelist;
2193
	struct intel_uncore *uncore = engine->uncore;
2194
	const u32 base = engine->mmio_base;
2195
	struct i915_wa *wa;
2196 2197
	unsigned int i;

2198
	if (!wal->count)
2199
		return;
2200

2201
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2202 2203 2204
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(wa->reg));
2205

2206 2207
	/* And clear the rest just in case of garbage */
	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2208 2209 2210
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(RING_NOPID(base)));
2211 2212
}

2213 2214 2215 2216 2217 2218 2219 2220 2221 2222
/*
 * engine_fake_wa_init(), a place holder to program the registers
 * which are not part of an official workaround defined by the
 * hardware team.
 * Adding programming of those register inside workaround will
 * allow utilizing wa framework to proper application and verification.
 */
static void
engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
2223
	u8 mocs_w, mocs_r;
2224 2225

	/*
2226 2227 2228 2229 2230 2231
	 * RING_CMD_CCTL specifies the default MOCS entry that will be used
	 * by the command streamer when executing commands that don't have
	 * a way to explicitly specify a MOCS setting.  The default should
	 * usually reference whichever MOCS entry corresponds to uncached
	 * behavior, although use of a WB cached entry is recommended by the
	 * spec in certain circumstances on specific platforms.
2232 2233
	 */
	if (GRAPHICS_VER(engine->i915) >= 12) {
2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
		mocs_r = engine->gt->mocs.uc_index;
		mocs_w = engine->gt->mocs.uc_index;

		if (HAS_L3_CCS_READ(engine->i915) &&
		    engine->class == COMPUTE_CLASS) {
			mocs_r = engine->gt->mocs.wb_index;

			/*
			 * Even on the few platforms where MOCS 0 is a
			 * legitimate table entry, it's never the correct
			 * setting to use here; we can assume the MOCS init
			 * just forgot to initialize wb_index.
			 */
			drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
		}

2250 2251 2252
		wa_masked_field_set(wal,
				    RING_CMD_CCTL(engine->mmio_base),
				    CMD_CCTL_MOCS_MASK,
2253
				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2254 2255
	}
}
2256 2257 2258

static bool needs_wa_1308578152(struct intel_engine_cs *engine)
{
2259
	return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
2260
		GEN_DSS_PER_GSLICE;
2261 2262
}

2263 2264
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2265 2266
{
	struct drm_i915_private *i915 = engine->i915;
2267

2268
	if (IS_DG2(i915)) {
2269 2270 2271
		/* Wa_1509235366:dg2 */
		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
			    GLOBAL_INVALIDATION_MODE);
M
Matt Roper 已提交
2272 2273
	}

2274
	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2275
		/* Wa_14013392000:dg2_g11 */
2276
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
2277 2278
	}

2279 2280 2281
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
		/* Wa_1509727124:dg2 */
2282 2283
		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
2284 2285
	}

2286 2287
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2288
		/* Wa_14012419201:dg2 */
2289 2290
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
2291 2292
	}

2293 2294
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
	    IS_DG2_G11(i915)) {
2295 2296 2297 2298
		/*
		 * Wa_22012826095:dg2
		 * Wa_22013059131:dg2
		 */
2299 2300 2301
		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
				     MAXREQS_PER_BANK,
				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
2302 2303

		/* Wa_22013059131:dg2 */
2304 2305
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
2306 2307 2308
	}

	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
2309
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
2310
	    needs_wa_1308578152(engine)) {
2311 2312 2313 2314
		wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
			      GEN12_REPLAY_MODE_GRANULARITY);
	}

2315
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2316
	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2317
		/* Wa_22013037850:dg2 */
2318 2319
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
				DISABLE_128B_EVICTION_COMMAND_UDW);
2320 2321

		/* Wa_22012856258:dg2 */
2322 2323
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
				 GEN12_DISABLE_READ_SUPPRESSION);
2324 2325 2326 2327 2328

		/*
		 * Wa_22010960976:dg2
		 * Wa_14013347512:dg2
		 */
2329 2330
		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2331 2332
	}

2333
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2334 2335 2336 2337
		/*
		 * Wa_1608949956:dg2_g10
		 * Wa_14010198302:dg2_g10
		 */
2338 2339
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
				 MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
2340 2341 2342 2343 2344 2345 2346

		/*
		 * Wa_14010918519:dg2_g10
		 *
		 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
		 * so ignoring verification.
		 */
2347 2348 2349
		wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
			   FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
			   0, false);
2350 2351
	}

2352
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2353
		/* Wa_22010430635:dg2 */
2354 2355 2356
		wa_mcr_masked_en(wal,
				 GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_GRF_CLEAR);
2357 2358

		/* Wa_14010648519:dg2 */
2359
		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
2360 2361 2362
	}

	/* Wa_14013202645:dg2 */
2363 2364
	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
2365
		wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
2366

2367 2368 2369
	/* Wa_22012532006:dg2 */
	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
2370 2371
		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
				 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
2372 2373 2374 2375 2376 2377 2378 2379 2380 2381

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
		/* Wa_14010680813:dg2_g10 */
		wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
			    EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
	}

	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
		/* Wa_14012362059:dg2 */
2382
		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
2383 2384
	}

2385 2386 2387
	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
	    IS_DG2_G10(i915)) {
		/* Wa_22014600077:dg2 */
2388 2389 2390 2391
		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
			   0 /* Wa_14012342262 write-only reg, so skip verification */,
			   true);
2392 2393
	}

2394 2395
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
	    IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
2396
		/*
2397 2398
		 * Wa_1607138336:tgl[a0],dg1[a0]
		 * Wa_1607063988:tgl[a0],dg1[a0]
2399
		 */
M
Mika Kuoppala 已提交
2400 2401 2402
		wa_write_or(wal,
			    GEN9_CTX_PREEMPT_REG,
			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
2403
	}
2404

2405
	if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
R
Radhakrishna Sripada 已提交
2406 2407 2408 2409 2410 2411 2412
		/*
		 * Wa_1606679103:tgl
		 * (see also Wa_1606682166:icl)
		 */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
2413 2414
	}

2415
	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2416
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2417
		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2418
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2419

2420 2421 2422 2423
		/*
		 * Wa_1407928979:tgl A*
		 * Wa_18011464164:tgl[B0+],dg1[B0+]
		 * Wa_22010931296:tgl[B0+],dg1[B0+]
2424
		 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2425 2426 2427
		 */
		wa_write_or(wal, GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2428
	}
2429

2430 2431
	if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2432
		/*
2433 2434 2435
		 * Wa_1606700617:tgl,dg1,adl-p
		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
		 * Wa_14010826681:tgl,dg1,rkl,adl-p
2436
		 * Wa_18019627453:dg2
2437 2438 2439 2440
		 */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
2441 2442
	}

2443
	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2444
	    IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2445
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2446
		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
2447 2448
		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2449

2450 2451
		/*
		 * Wa_1409085225:tgl
2452
		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
2453
		 */
2454
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2455 2456
	}

2457
	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2458
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2459 2460 2461
		/*
		 * Wa_1607030317:tgl
		 * Wa_1607186500:tgl
2462
		 * Wa_1607297627:tgl,rkl,dg1[a0],adlp
2463 2464 2465 2466 2467
		 *
		 * On TGL and RKL there are multiple entries for this WA in the
		 * BSpec; some indicate this is an A0-only WA, others indicate
		 * it applies to all steppings so we trust the "all steppings."
		 * For DG1 this only applies to A0.
2468 2469
		 */
		wa_masked_en(wal,
2470
			     RING_PSMI_CTL(RENDER_RING_BASE),
2471 2472
			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2473 2474
	}

2475
	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
2476 2477
	    IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2478 2479 2480
		wa_mcr_masked_en(wal,
				 GEN10_SAMPLER_MODE,
				 ENABLE_SMALLPL);
2481 2482
	}

2483
	if (GRAPHICS_VER(i915) == 11) {
2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
		/* This is not an Wa. Enable for better image quality */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);

		/*
		 * Wa_1405543622:icl
		 * Formerly known as WaGAPZPriorityScheme
		 */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN11_ARBITRATION_PRIO_ORDER_MASK);

		/*
		 * Wa_1604223664:icl
		 * Formerly known as WaL3BankAddressHashing
		 */
2501 2502 2503 2504 2505 2506 2507 2508
		wa_write_clr_set(wal,
				 GEN8_GARBCNTL,
				 GEN11_HASH_CTRL_EXCL_MASK,
				 GEN11_HASH_CTRL_EXCL_BIT0);
		wa_write_clr_set(wal,
				 GEN11_GLBLINVL,
				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2509 2510 2511 2512 2513

		/*
		 * Wa_1405733216:icl
		 * Formerly known as WaDisableCleanEvicts
		 */
2514 2515 2516
		wa_mcr_write_or(wal,
				GEN8_L3SQCREG4,
				GEN11_LQSC_CLEAN_EVICT_DISABLE);
2517

2518 2519 2520 2521
		/* Wa_1606682166:icl */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
T
Tvrtko Ursulin 已提交
2522 2523

		/* Wa_1409178092:icl */
2524 2525 2526 2527
		wa_mcr_write_clr_set(wal,
				     GEN11_SCRATCH2,
				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
				     0);
2528 2529 2530 2531 2532 2533 2534 2535 2536

		/* WaEnable32PlaneMode:icl */
		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
			     GEN11_ENABLE_32_PLANE_MODE);

		/*
		 * Wa_1408615072:icl,ehl  (vsunit)
		 * Wa_1407596294:icl,ehl  (hsunit)
		 */
2537 2538
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
2539

2540 2541 2542 2543 2544 2545 2546
		/*
		 * Wa_1408767742:icl[a2..forever],ehl[all]
		 * Wa_1605460711:icl[a0..c0]
		 */
		wa_write_or(wal,
			    GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
M
Matt Atwood 已提交
2547

2548 2549 2550 2551
		/* Wa_22010271021 */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
2552 2553
	}

2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608
	/*
	 * Intel platforms that support fine-grained preemption (i.e., gen9 and
	 * beyond) allow the kernel-mode driver to choose between two different
	 * options for controlling preemption granularity and behavior.
	 *
	 * Option 1 (hardware default):
	 *   Preemption settings are controlled in a global manner via
	 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
	 *   and settings chosen by the kernel-mode driver will apply to all
	 *   userspace clients.
	 *
	 * Option 2:
	 *   Preemption settings are controlled on a per-context basis via
	 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
	 *   context switch and is writable by userspace (e.g., via
	 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
	 *   which allows different userspace drivers/clients to select
	 *   different settings, or to change those settings on the fly in
	 *   response to runtime needs.  This option was known by name
	 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
	 *   that name is somewhat misleading as other non-granularity
	 *   preemption settings are also impacted by this decision.
	 *
	 * On Linux, our policy has always been to let userspace drivers
	 * control preemption granularity/settings (Option 2).  This was
	 * originally mandatory on gen9 to prevent ABI breakage (old gen9
	 * userspace developed before object-level preemption was enabled would
	 * not behave well if i915 were to go with Option 1 and enable that
	 * preemption in a global manner).  On gen9 each context would have
	 * object-level preemption disabled by default (see
	 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
	 * userspace drivers could opt-in to object-level preemption as they
	 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
	 * even though it is no longer necessary for ABI compatibility when
	 * enabling a new platform, it does ensure that userspace will be able
	 * to implement any workarounds that show up requiring temporary
	 * adjustments to preemption behavior at runtime.
	 *
	 * Notes/Workarounds:
	 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
	 *      CS_CHICKEN1[0] does not disable object-level preemption as
	 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
	 *      using Option 1).  Effectively this means userspace is unable
	 *      to disable object-level preemption on these platforms/steppings
	 *      despite the setting here.
	 *
	 *  - Wa_16013994831:  May require that userspace program
	 *      CS_CHICKEN1[10] when certain runtime conditions are true.
	 *      Userspace requires Option 2 to be in effect for their update of
	 *      CS_CHICKEN1[10] to be effective.
	 *
	 * Other workarounds may appear in the future that will also require
	 * Option 2 behavior to allow proper userspace implementation.
	 */
	if (GRAPHICS_VER(i915) >= 9)
2609 2610 2611 2612
		wa_masked_en(wal,
			     GEN7_FF_SLICE_CS_CHICKEN1,
			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);

2613 2614 2615 2616
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915)) {
2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629
		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN9_GAPS_TSV_CREDIT_DISABLE);
	}

	if (IS_BROXTON(i915)) {
		/* WaDisablePooledEuLoadBalancingFix:bxt */
		wa_masked_en(wal,
			     FF_SLICE_CS_CHICKEN2,
			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
	}

2630
	if (GRAPHICS_VER(i915) == 9) {
2631 2632 2633 2634 2635 2636
		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
		wa_masked_en(wal,
			     GEN9_CSFE_CHICKEN1_RCS,
			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);

		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2637 2638 2639
		wa_mcr_write_or(wal,
				BDW_SCRATCH1,
				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2640 2641 2642

		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
		if (IS_GEN9_LP(i915))
2643 2644 2645 2646 2647
			wa_mcr_write_clr_set(wal,
					     GEN8_L3SQCREG1,
					     L3_PRIO_CREDITS_MASK,
					     L3_GENERAL_PRIO_CREDITS(62) |
					     L3_HIGH_PRIO_CREDITS(2));
2648 2649

		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2650 2651 2652
		wa_mcr_write_or(wal,
				GEN8_L3SQCREG4,
				GEN8_LQSC_FLUSH_COHERENT_LINES);
2653 2654 2655 2656

		/* Disable atomics in L3 to prevent unrecoverable hangs */
		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2657 2658 2659 2660
		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
				     EVICTION_PERF_FIX_ENABLE, 0);
2661
	}
2662

2663 2664 2665
	if (IS_HASWELL(i915)) {
		/* WaSampleCChickenBitEnable:hsw */
		wa_masked_en(wal,
2666
			     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2667 2668 2669 2670 2671

		wa_masked_dis(wal,
			      CACHE_MODE_0_GEN7,
			      /* enable HiZ Raw Stall Optimization */
			      HIZ_RAW_STALL_OPT_DISABLE);
2672 2673 2674 2675 2676 2677 2678
	}

	if (IS_VALLEYVIEW(i915)) {
		/* WaDisableEarlyCull:vlv */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2679 2680

		/*
2681
		 * WaVSThreadDispatchOverride:ivb,vlv
2682
		 *
2683 2684
		 * This actually overrides the dispatch
		 * mode for all thread types.
2685
		 */
2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

		/* WaPsdDispatchEnable:vlv */
		/* WaDisablePSDDualDispatchEnable:vlv */
		wa_masked_en(wal,
			     GEN7_HALF_SLICE_CHICKEN1,
			     GEN7_MAX_PS_THREAD_DEP |
			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2699 2700
	}

2701 2702
	if (IS_IVYBRIDGE(i915)) {
		/* WaDisableEarlyCull:ivb */
2703 2704 2705 2706
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);

2707 2708 2709 2710 2711 2712 2713
		if (0) { /* causes HiZ corruption on ivb:gt1 */
			/* enable HiZ Raw Stall Optimization */
			wa_masked_dis(wal,
				      CACHE_MODE_0_GEN7,
				      HIZ_RAW_STALL_OPT_DISABLE);
		}

2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726
		/*
		 * WaVSThreadDispatchOverride:ivb,vlv
		 *
		 * This actually overrides the dispatch
		 * mode for all thread types.
		 */
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

2727 2728 2729 2730 2731 2732 2733
		/* WaDisablePSDDualDispatchEnable:ivb */
		if (IS_IVB_GT1(i915))
			wa_masked_en(wal,
				     GEN7_HALF_SLICE_CHICKEN1,
				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
	}

2734
	if (GRAPHICS_VER(i915) == 7) {
2735 2736
		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
		wa_masked_en(wal,
2737
			     RING_MODE_GEN7(RENDER_RING_BASE),
2738 2739 2740
			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);

		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2741 2742 2743 2744
		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);

		/*
		 * BSpec says this must be set, even though
2745
		 * WaDisable4x2SubspanOptimization:ivb,hsw
2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759
		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
		 */
		wa_masked_en(wal,
			     CACHE_MODE_1,
			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
2760 2761 2762 2763
		wa_masked_field_set(wal,
				    GEN7_GT_MODE,
				    GEN6_WIZ_HASHING_MASK,
				    GEN6_WIZ_HASHING_16x4);
2764 2765
	}

2766
	if (IS_GRAPHICS_VER(i915, 6, 7))
2767 2768 2769 2770 2771 2772 2773 2774
		/*
		 * We need to disable the AsyncFlip performance optimisations in
		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
		 * already be programmed to '1' on all products.
		 *
		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
		 */
		wa_masked_en(wal,
2775
			     RING_MI_MODE(RENDER_RING_BASE),
2776 2777
			     ASYNC_FLIP_PERF_DISABLE);

2778
	if (GRAPHICS_VER(i915) == 6) {
2779 2780 2781 2782 2783 2784 2785 2786 2787
		/*
		 * Required for the hardware to program scanline values for
		 * waiting
		 * WaEnableFlushTlbInvalidationMode:snb
		 */
		wa_masked_en(wal,
			     GFX_MODE,
			     GFX_TLB_INVALIDATE_EXPLICIT);

2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
		wa_masked_en(wal,
			     _3D_CHICKEN,
			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);

		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
			     /*
			      * Bspec says:
			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
			      * to normal and 3DSTATE_SF number of SF output attributes
			      * is more than 16."
			      */
			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
2813 2814 2815 2816
		wa_masked_field_set(wal,
				    GEN6_GT_MODE,
				    GEN6_WIZ_HASHING_MASK,
				    GEN6_WIZ_HASHING_16x4);
2817 2818 2819 2820

		/* WaDisable_RenderCache_OperationalFlush:snb */
		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);

2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
		/*
		 * From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset. LRA replacement
		 *  policy is not supported."
		 */
		wa_masked_dis(wal,
			      CACHE_MODE_0,
			      CM0_STC_EVICT_DISABLE_LRA_SNB);
	}

2832
	if (IS_GRAPHICS_VER(i915, 4, 6))
2833
		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2834
		wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2835 2836
		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
		       /* XXX bit doesn't stick on Broadwater */
2837
		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2838

2839
	if (GRAPHICS_VER(i915) == 4)
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849
		/*
		 * Disable CONSTANT_BUFFER before it is loaded from the context
		 * image. For as it is loaded, it is executed and the stored
		 * address may no longer be valid, leading to a GPU hang.
		 *
		 * This imposes the requirement that userspace reload their
		 * CONSTANT_BUFFER on every batch, fortunately a requirement
		 * they are already accustomed to from before contexts were
		 * enabled.
		 */
M
Matt Roper 已提交
2850
		wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2851
		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2852 2853
		       0 /* XXX bit doesn't stick on Broadwater */,
		       true);
2854 2855
}

2856 2857
static void
xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2858 2859 2860 2861
{
	struct drm_i915_private *i915 = engine->i915;

	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2862
	if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2863 2864 2865 2866 2867 2868
		wa_write(wal,
			 RING_SEMA_WAIT_POLL(engine->mmio_base),
			 1);
	}
}

2869 2870 2871 2872 2873
static void
ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
		/* Wa_14014999345:pvc */
2874
		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2875 2876 2877
	}
}

2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899
/*
 * The bspec performance guide has recommended MMIO tuning settings.  These
 * aren't truly "workarounds" but we want to program them with the same
 * workaround infrastructure to ensure that they're automatically added to
 * the GuC save/restore lists, re-applied at the right times, and checked for
 * any conflicting programming requested by real workarounds.
 *
 * Programming settings should be added here only if their registers are not
 * part of an engine's register state context.  If a register is part of a
 * context, then any tuning settings should be programmed in an appropriate
 * function invoked by __intel_engine_init_ctx_wa().
 */
static void
add_render_compute_tuning_settings(struct drm_i915_private *i915,
				   struct i915_wa_list *wal)
{
	if (IS_PONTEVECCHIO(i915)) {
		wa_write(wal, XEHPC_L3SCRUB,
			 SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
	}

	if (IS_DG2(i915)) {
2900 2901
		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2902 2903 2904 2905 2906 2907 2908 2909 2910 2911

		/*
		 * This is also listed as Wa_22012654132 for certain DG2
		 * steppings, but the tuning setting programming is a superset
		 * since it applies to all DG2 variants and steppings.
		 *
		 * Note that register 0xE420 is write-only and cannot be read
		 * back for verification on DG2 (due to Wa_14012342262), so
		 * we need to explicitly skip the readback.
		 */
2912 2913 2914 2915
		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
			   0 /* write-only, so skip validation */,
			   true);
2916
	}
2917 2918 2919 2920 2921 2922 2923

	/*
	 * This tuning setting proves beneficial only on ATS-M designs; the
	 * default "age based" setting is optimal on regular DG2 and other
	 * platforms.
	 */
	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2924 2925
		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2926 2927
}

2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941
/*
 * The workarounds in this function apply to shared registers in
 * the general render reset domain that aren't tied to a
 * specific engine.  Since all render+compute engines get reset
 * together, and the contents of these registers are lost during
 * the shared render domain reset, we'll define such workarounds
 * here and then add them to just a single RCS or CCS engine's
 * workaround list (whichever engine has the XXXX flag).
 */
static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
	struct drm_i915_private *i915 = engine->i915;

2942
	add_render_compute_tuning_settings(i915, wal);
2943

2944
	if (IS_PONTEVECCHIO(i915)) {
2945 2946
		/* Wa_16016694945 */
		wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
2947 2948
	}

2949 2950
	if (IS_XEHPSDV(i915)) {
		/* Wa_1409954639 */
2951 2952 2953
		wa_mcr_masked_en(wal,
				 GEN8_ROW_CHICKEN,
				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
2954 2955

		/* Wa_1607196519 */
2956 2957 2958
		wa_mcr_masked_en(wal,
				 GEN9_ROW_CHICKEN4,
				 GEN12_DISABLE_GRF_CLEAR);
2959 2960

		/* Wa_14010670810:xehpsdv */
2961
		wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
2962 2963

		/* Wa_14010449647:xehpsdv */
2964 2965
		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2966 2967 2968

		/* Wa_18011725039:xehpsdv */
		if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
2969 2970
			wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
			wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
2971 2972 2973
		}

		/* Wa_14012362059:xehpsdv */
2974
		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
2975 2976 2977 2978

		/* Wa_14014368820:xehpsdv */
		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
				GLOBAL_INVALIDATION_MODE);
2979
	}
2980

2981 2982
	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
		/* Wa_14015227452:dg2,pvc */
2983
		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
2984 2985

		/* Wa_22014226127:dg2,pvc */
2986
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
M
Matt Roper 已提交
2987 2988 2989 2990 2991

		/* Wa_16015675438:dg2,pvc */
		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);

		/* Wa_18018781329:dg2,pvc */
2992 2993 2994 2995
		wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
		wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
2996
	}
2997 2998 2999 3000 3001 3002

	if (IS_DG2(i915)) {
		/*
		 * Wa_16011620976:dg2_g11
		 * Wa_22015475538:dg2
		 */
3003
		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
3004 3005 3006

		/* Wa_18017747507:dg2 */
		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
3007
	}
3008 3009
}

3010 3011 3012
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
3013
	if (GRAPHICS_VER(engine->i915) < 4)
3014 3015
		return;

3016 3017
	engine_fake_wa_init(engine, wal);

3018 3019 3020 3021 3022
	/*
	 * These are common workarounds that just need to applied
	 * to a single RCS/CCS engine's workaround list since
	 * they're reset as part of the general render domain reset.
	 */
3023
	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3024 3025
		general_render_compute_wa_init(engine, wal);

3026 3027 3028
	if (engine->class == COMPUTE_CLASS)
		ccs_engine_wa_init(engine, wal);
	else if (engine->class == RENDER_CLASS)
3029 3030 3031 3032 3033
		rcs_engine_wa_init(engine, wal);
	else
		xcs_engine_wa_init(engine, wal);
}

3034 3035 3036 3037
void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
	struct i915_wa_list *wal = &engine->wa_list;

3038
	wa_init_start(wal, engine->gt, "engine", engine->name);
3039
	engine_init_workarounds(engine, wal);
3040 3041 3042 3043 3044
	wa_init_finish(wal);
}

void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
{
3045
	wa_list_apply(&engine->wa_list);
3046 3047
}

3048
static const struct i915_range mcr_ranges_gen8[] = {
M
Matt Roper 已提交
3049 3050 3051 3052 3053 3054 3055 3056
	{ .start = 0x5500, .end = 0x55ff },
	{ .start = 0x7000, .end = 0x7fff },
	{ .start = 0x9400, .end = 0x97ff },
	{ .start = 0xb000, .end = 0xb3ff },
	{ .start = 0xe000, .end = 0xe7ff },
	{},
};

3057
static const struct i915_range mcr_ranges_gen12[] = {
3058 3059 3060 3061 3062 3063 3064 3065
	{ .start =  0x8150, .end =  0x815f },
	{ .start =  0x9520, .end =  0x955f },
	{ .start =  0xb100, .end =  0xb3ff },
	{ .start =  0xde80, .end =  0xe8ff },
	{ .start = 0x24a00, .end = 0x24a7f },
	{},
};

3066
static const struct i915_range mcr_ranges_xehp[] = {
3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079
	{ .start =  0x4000, .end =  0x4aff },
	{ .start =  0x5200, .end =  0x52ff },
	{ .start =  0x5400, .end =  0x7fff },
	{ .start =  0x8140, .end =  0x815f },
	{ .start =  0x8c80, .end =  0x8dff },
	{ .start =  0x94d0, .end =  0x955f },
	{ .start =  0x9680, .end =  0x96ff },
	{ .start =  0xb000, .end =  0xb3ff },
	{ .start =  0xc800, .end =  0xcfff },
	{ .start =  0xd800, .end =  0xd8ff },
	{ .start =  0xdc00, .end =  0xffff },
	{ .start = 0x17000, .end = 0x17fff },
	{ .start = 0x24a00, .end = 0x24a7f },
3080
	{},
3081 3082
};

3083 3084
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
3085
	const struct i915_range *mcr_ranges;
M
Matt Roper 已提交
3086 3087
	int i;

3088 3089 3090
	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
		mcr_ranges = mcr_ranges_xehp;
	else if (GRAPHICS_VER(i915) >= 12)
3091
		mcr_ranges = mcr_ranges_gen12;
3092
	else if (GRAPHICS_VER(i915) >= 8)
3093 3094
		mcr_ranges = mcr_ranges_gen8;
	else
M
Matt Roper 已提交
3095 3096
		return false;

3097
	/*
M
Matt Roper 已提交
3098
	 * Registers in these ranges are affected by the MCR selector
3099 3100 3101
	 * which only controls CPU initiated MMIO. Routing does not
	 * work for CS access so we cannot verify them on this path.
	 */
3102 3103 3104
	for (i = 0; mcr_ranges[i].start; i++)
		if (offset >= mcr_ranges[i].start &&
		    offset <= mcr_ranges[i].end)
M
Matt Roper 已提交
3105
			return true;
3106 3107 3108 3109

	return false;
}

3110 3111 3112 3113 3114
static int
wa_list_srm(struct i915_request *rq,
	    const struct i915_wa_list *wal,
	    struct i915_vma *vma)
{
3115
	struct drm_i915_private *i915 = rq->engine->i915;
3116
	unsigned int i, count = 0;
3117 3118 3119 3120
	const struct i915_wa *wa;
	u32 srm, *cs;

	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3121
	if (GRAPHICS_VER(i915) >= 8)
3122 3123
		srm++;

3124 3125 3126 3127 3128 3129
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
			count++;
	}

	cs = intel_ring_begin(rq, 4 * count);
3130 3131 3132 3133
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3134 3135 3136 3137 3138
		u32 offset = i915_mmio_reg_offset(wa->reg);

		if (mcr_range(i915, offset))
			continue;

3139
		*cs++ = srm;
3140
		*cs++ = offset;
3141 3142 3143 3144 3145 3146 3147 3148
		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
		*cs++ = 0;
	}
	intel_ring_advance(rq, cs);

	return 0;
}

3149
static int engine_wa_list_verify(struct intel_context *ce,
3150 3151 3152 3153 3154 3155
				 const struct i915_wa_list * const wal,
				 const char *from)
{
	const struct i915_wa *wa;
	struct i915_request *rq;
	struct i915_vma *vma;
3156
	struct i915_gem_ww_ctx ww;
3157 3158 3159 3160 3161 3162 3163
	unsigned int i;
	u32 *results;
	int err;

	if (!wal->count)
		return 0;

3164 3165
	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
					   wal->count * sizeof(u32));
3166 3167 3168
	if (IS_ERR(vma))
		return PTR_ERR(vma);

3169
	intel_engine_pm_get(ce->engine);
3170 3171 3172 3173 3174 3175 3176 3177
	i915_gem_ww_ctx_init(&ww, false);
retry:
	err = i915_gem_object_lock(vma->obj, &ww);
	if (err == 0)
		err = intel_context_pin_ww(ce, &ww);
	if (err)
		goto err_pm;

3178 3179 3180 3181 3182
	err = i915_vma_pin_ww(vma, &ww, 0, 0,
			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
	if (err)
		goto err_unpin;

3183
	rq = i915_request_create(ce);
3184 3185
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
3186
		goto err_vma;
3187 3188
	}

3189
	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3190 3191
	if (err == 0)
		err = wa_list_srm(rq, wal, vma);
3192

3193
	i915_request_get(rq);
3194 3195
	if (err)
		i915_request_set_error_once(rq, err);
3196
	i915_request_add(rq);
3197 3198 3199 3200

	if (err)
		goto err_rq;

3201
	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3202
		err = -ETIME;
3203
		goto err_rq;
3204 3205 3206 3207 3208
	}

	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
	if (IS_ERR(results)) {
		err = PTR_ERR(results);
3209
		goto err_rq;
3210 3211 3212
	}

	err = 0;
3213
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3214
		if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
3215 3216
			continue;

3217
		if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3218
			err = -ENXIO;
3219
	}
3220 3221 3222

	i915_gem_object_unpin_map(vma->obj);

3223 3224
err_rq:
	i915_request_put(rq);
3225 3226
err_vma:
	i915_vma_unpin(vma);
3227 3228 3229 3230 3231 3232 3233 3234 3235 3236
err_unpin:
	intel_context_unpin(ce);
err_pm:
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
	intel_engine_pm_put(ce->engine);
3237 3238 3239 3240 3241 3242 3243
	i915_vma_put(vma);
	return err;
}

int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
				    const char *from)
{
3244 3245 3246
	return engine_wa_list_verify(engine->kernel_context,
				     &engine->wa_list,
				     from);
3247 3248
}

3249
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3250
#include "selftest_workarounds.c"
3251
#endif