intel_workarounds.c 62.2 KB
Newer Older
C
Chris Wilson 已提交
1
// SPDX-License-Identifier: MIT
2 3 4 5 6
/*
 * Copyright © 2014-2018 Intel Corporation
 */

#include "i915_drv.h"
7
#include "intel_context.h"
8
#include "intel_engine_pm.h"
9
#include "intel_gpu_commands.h"
10
#include "intel_gt.h"
11
#include "intel_ring.h"
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
#include "intel_workarounds.h"

/**
 * DOC: Hardware workarounds
 *
 * This file is intended as a central place to implement most [1]_ of the
 * required workarounds for hardware to work as originally intended. They fall
 * in five basic categories depending on how/when they are applied:
 *
 * - Workarounds that touch registers that are saved/restored to/from the HW
 *   context image. The list is emitted (via Load Register Immediate commands)
 *   everytime a new context is created.
 * - GT workarounds. The list of these WAs is applied whenever these registers
 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
 * - Display workarounds. The list is applied during display clock-gating
 *   initialization.
 * - Workarounds that whitelist a privileged register, so that UMDs can manage
 *   them directly. This is just a special case of a MMMIO workaround (as we
 *   write the list of these to/be-whitelisted registers to some special HW
 *   registers).
 * - Workaround batchbuffers, that get executed automatically by the hardware
 *   on every HW context restore.
 *
 * .. [1] Please notice that there are other WAs that, due to their nature,
 *    cannot be applied from a central place. Those are peppered around the rest
 *    of the code, as needed.
 *
 * .. [2] Technically, some registers are powercontext saved & restored, so they
 *    survive a suspend/resume. In practice, writing them again is not too
 *    costly and simplifies things. We can revisit this in the future.
 *
 * Layout
44
 * ~~~~~~
45 46 47 48 49 50 51 52 53 54
 *
 * Keep things in this file ordered by WA type, as per the above (context, GT,
 * display, register whitelist, batchbuffer). Then, inside each type, keep the
 * following order:
 *
 * - Infrastructure functions and macros
 * - WAs per platform in standard gen/chrono order
 * - Public functions to init or apply the given workaround type.
 */

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/*
 * KBL revision ID ordering is bizarre; higher revision ID's map to lower
 * steppings in some cases.  So rather than test against the revision ID
 * directly, let's map that into our own range of increasing ID's that we
 * can test against in a regular manner.
 */

const struct i915_rev_steppings kbl_revids[] = {
	[0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 },
	[1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 },
	[2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 },
	[3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 },
	[4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 },
	[5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 },
	[6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 },
	[7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 },
};

73 74 75 76 77
const struct i915_rev_steppings tgl_uy_revid_step_tbl[] = {
	[0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
	[1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_C0 },
	[2] = { .gt_stepping = STEP_B1, .disp_stepping = STEP_C0 },
	[3] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_D0 },
78 79 80
};

/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
81 82 83
const struct i915_rev_steppings tgl_revid_step_tbl[] = {
	[0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_B0 },
	[1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_D0 },
84 85
};

86 87 88 89 90 91 92 93
const struct i915_rev_steppings adls_revid_step_tbl[] = {
	[0x0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
	[0x1] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A2 },
	[0x4] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_B0 },
	[0x8] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_B0 },
	[0xC] = { .gt_stepping = STEP_D0, .disp_stepping = STEP_C0 },
};

94
static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
95 96
{
	wal->name = name;
97
	wal->engine_name = engine_name;
98 99
}

100 101
#define WA_LIST_CHUNK (1 << 4)

102 103
static void wa_init_finish(struct i915_wa_list *wal)
{
104 105 106 107 108 109 110 111 112 113 114 115
	/* Trim unused entries. */
	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
		struct i915_wa *list = kmemdup(wal->list,
					       wal->count * sizeof(*list),
					       GFP_KERNEL);

		if (list) {
			kfree(wal->list);
			wal->list = list;
		}
	}

116 117 118
	if (!wal->count)
		return;

119 120
	DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
			 wal->wa_count, wal->name, wal->engine_name);
121 122
}

123
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
124
{
125 126
	unsigned int addr = i915_mmio_reg_offset(wa->reg);
	unsigned int start = 0, end = wal->count;
127
	const unsigned int grow = WA_LIST_CHUNK;
128 129 130 131 132 133 134 135 136 137 138 139 140 141
	struct i915_wa *wa_;

	GEM_BUG_ON(!is_power_of_2(grow));

	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
		struct i915_wa *list;

		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
				     GFP_KERNEL);
		if (!list) {
			DRM_ERROR("No space for workaround init!\n");
			return;
		}

142
		if (wal->list) {
143
			memcpy(list, wal->list, sizeof(*wa) * wal->count);
144 145
			kfree(wal->list);
		}
146 147 148

		wal->list = list;
	}
149 150 151 152

	while (start < end) {
		unsigned int mid = start + (end - start) / 2;

153
		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
154
			start = mid + 1;
155
		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
156 157
			end = mid;
		} else {
158
			wa_ = &wal->list[mid];
159

160 161
			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
				DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
162
					  i915_mmio_reg_offset(wa_->reg),
163
					  wa_->clr, wa_->set);
164

165
				wa_->set &= ~wa->clr;
166 167
			}

168
			wal->wa_count++;
169 170
			wa_->set |= wa->set;
			wa_->clr |= wa->clr;
171
			wa_->read |= wa->read;
172 173 174
			return;
		}
	}
175

176 177 178
	wal->wa_count++;
	wa_ = &wal->list[wal->count++];
	*wa_ = *wa;
179

180 181 182 183 184
	while (wa_-- > wal->list) {
		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
			   i915_mmio_reg_offset(wa_[1].reg));
		if (i915_mmio_reg_offset(wa_[1].reg) >
		    i915_mmio_reg_offset(wa_[0].reg))
185
			break;
186

187
		swap(wa_[1], wa_[0]);
188
	}
189 190
}

191 192
static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
		   u32 clear, u32 set, u32 read_mask)
193 194
{
	struct i915_wa wa = {
195
		.reg  = reg,
196 197
		.clr  = clear,
		.set  = set,
198
		.read = read_mask,
199 200 201 202 203
	};

	_wa_add(wal, &wa);
}

204
static void
205
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
206
{
207
	wa_add(wal, reg, clear, set, clear);
208 209
}

210
static void
211 212
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
213
	wa_write_clr_set(wal, reg, ~0, set);
214 215 216 217
}

static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
218
{
219
	wa_write_clr_set(wal, reg, set, set);
220 221
}

222 223 224
static void
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
{
225
	wa_write_clr_set(wal, reg, clr, 0);
226 227
}

228 229 230 231 232 233 234 235 236 237 238
/*
 * WA operations on "masked register". A masked register has the upper 16 bits
 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
 * portion of the register without a rmw: you simply write in the upper 16 bits
 * the mask of bits you are going to modify.
 *
 * The wa_masked_* family of functions already does the necessary operations to
 * calculate the mask based on the parameters passed, so user only has to
 * provide the lower 16 bits of that register.
 */

239
static void
240
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
241
{
242
	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
243 244 245
}

static void
246
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
247
{
248
	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
249 250
}

251 252 253 254
static void
wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
		    u32 mask, u32 val)
{
255
	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
256
}
257

258 259 260
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
261
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
262 263 264 265 266
}

static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
267
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
268 269
}

270 271
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
272
{
273
	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
274 275

	/* WaDisableAsyncFlipPerfMode:bdw,chv */
276
	wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
277 278

	/* WaDisablePartialInstShootdown:bdw,chv */
279 280
	wa_masked_en(wal, GEN8_ROW_CHICKEN,
		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
281 282

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
283
	 * workaround for a possible hang in the unlikely event a TLB
284 285 286 287
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:bdw,chv */
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
288 289 290
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
		     HDC_FORCE_NON_COHERENT);
291 292 293 294 295 296 297 298 299

	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
	 *  polygons in the same 8x4 pixel/sample area to be processed without
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
	 *  buffer."
	 *
	 * This optimization is off by default for BDW and CHV; turn it on.
	 */
300
	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
301 302

	/* Wa4x4STCOptimizationDisable:bdw,chv */
303
	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
304 305 306 307 308 309 310 311 312

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
313
	wa_masked_field_set(wal, GEN7_GT_MODE,
314 315 316 317
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
}

318 319
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
320
{
321
	struct drm_i915_private *i915 = engine->i915;
322

323
	gen8_ctx_workarounds_init(engine, wal);
324 325

	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
326
	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
327 328 329

	/* WaDisableDopClockGating:bdw
	 *
330
	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
331 332
	 * to disable EUTC clock gating.
	 */
333 334
	wa_masked_en(wal, GEN7_ROW_CHICKEN2,
		     DOP_CLOCK_GATING_DISABLE);
335

336 337
	wa_masked_en(wal, HALF_SLICE_CHICKEN3,
		     GEN8_SAMPLER_POWER_BYPASS_DIS);
338

339 340 341 342 343
	wa_masked_en(wal, HDC_CHICKEN0,
		     /* WaForceContextSaveRestoreNonCoherent:bdw */
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
		     (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
344 345
}

346 347
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
348
{
349
	gen8_ctx_workarounds_init(engine, wal);
350 351

	/* WaDisableThreadStallDopClockGating:chv */
352
	wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
353 354

	/* Improve HiZ throughput on CHV. */
355
	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
356 357
}

358 359
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
360
{
361 362 363
	struct drm_i915_private *i915 = engine->i915;

	if (HAS_LLC(i915)) {
364 365 366 367 368
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
369 370 371 372
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
		wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
			     GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
373 374 375 376
	}

	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
377 378 379
	wa_masked_en(wal, GEN8_ROW_CHICKEN,
		     FLOW_CONTROL_ENABLE |
		     PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
380 381 382

	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
383 384 385
	wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
		     GEN9_ENABLE_YV12_BUGFIX |
		     GEN9_ENABLE_GPGPU_PREEMPTION);
386 387 388

	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
389 390 391
	wa_masked_en(wal, CACHE_MODE_1,
		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
392 393

	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
394 395
	wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
		      GEN9_CCS_TLB_PREFETCH_ENABLE);
396 397

	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
398 399 400
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415

	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
	 * both tied to WaForceContextSaveRestoreNonCoherent
	 * in some hsds for skl. We keep the tie for all gen9. The
	 * documentation is a bit hazy and so we want to get common behaviour,
	 * even though there is no clear evidence we would need both on kbl/bxt.
	 * This area has been source of system hangs so we play it safe
	 * and mimic the skl regardless of what bspec says.
	 *
	 * Use Force Non-Coherent whenever executing a 3D context. This
	 * is a workaround for a possible hang in the unlikely event
	 * a TLB invalidation occurs during a PSD flush.
	 */

	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
416 417
	wa_masked_en(wal, HDC_CHICKEN0,
		     HDC_FORCE_NON_COHERENT);
418 419

	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
420 421 422 423
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915))
424 425
		wa_masked_en(wal, HALF_SLICE_CHICKEN3,
			     GEN8_SAMPLER_POWER_BYPASS_DIS);
426 427

	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
428
	wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
429 430 431 432 433 434 435 436 437 438 439 440 441

	/*
	 * Supporting preemption with fine-granularity requires changes in the
	 * batch buffer programming. Since we can't break old userspace, we
	 * need to set our default preemption level to safe value. Userspace is
	 * still able to use more fine-grained preemption levels, since in
	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
	 * not real HW workarounds, but merely a way to start using preemption
	 * while maintaining old contract with userspace.
	 */

	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
442
	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
443 444

	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
445
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
446 447 448
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

449
	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
450
	if (IS_GEN9_LP(i915))
451
		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
452 453
}

454 455
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
				struct i915_wa_list *wal)
456
{
457
	struct intel_gt *gt = engine->gt;
458 459 460 461 462 463 464 465 466 467
	u8 vals[3] = { 0, 0, 0 };
	unsigned int i;

	for (i = 0; i < 3; i++) {
		u8 ss;

		/*
		 * Only consider slices where one, and only one, subslice has 7
		 * EUs
		 */
468
		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
469 470 471 472 473 474 475 476
			continue;

		/*
		 * subslice_7eu[i] != 0 (because of the check above) and
		 * ss_max == 4 (maximum number of subslices possible per slice)
		 *
		 * ->    0 <= ss <= 3;
		 */
477
		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
478 479 480 481
		vals[i] = 3 - ss;
	}

	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
482
		return;
483 484

	/* Tune IZ hashing. See intel_device_info_runtime_init() */
485
	wa_masked_field_set(wal, GEN7_GT_MODE,
486 487 488 489 490 491 492 493
			    GEN9_IZ_HASHING_MASK(2) |
			    GEN9_IZ_HASHING_MASK(1) |
			    GEN9_IZ_HASHING_MASK(0),
			    GEN9_IZ_HASHING(2, vals[2]) |
			    GEN9_IZ_HASHING(1, vals[1]) |
			    GEN9_IZ_HASHING(0, vals[0]));
}

494 495
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
496
{
497 498
	gen9_ctx_workarounds_init(engine, wal);
	skl_tune_iz_hashing(engine, wal);
499
}
500

501 502
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
503
{
504
	gen9_ctx_workarounds_init(engine, wal);
505

506
	/* WaDisableThreadStallDopClockGating:bxt */
507 508
	wa_masked_en(wal, GEN8_ROW_CHICKEN,
		     STALL_DOP_GATING_DISABLE);
509 510

	/* WaToEnableHwFixForPushConstHWBug:bxt */
511 512
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
513 514
}

515 516
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
517
{
518
	struct drm_i915_private *i915 = engine->i915;
519

520
	gen9_ctx_workarounds_init(engine, wal);
521

522
	/* WaToEnableHwFixForPushConstHWBug:kbl */
523
	if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
524 525
		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
526

527
	/* WaDisableSbeCacheDispatchPortSharing:kbl */
528 529
	wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
530 531
}

532 533
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
534
{
535
	gen9_ctx_workarounds_init(engine, wal);
536 537

	/* WaToEnableHwFixForPushConstHWBug:glk */
538 539
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
540 541
}

542 543
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
544
{
545
	gen9_ctx_workarounds_init(engine, wal);
546 547

	/* WaToEnableHwFixForPushConstHWBug:cfl */
548 549
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
550

551
	/* WaDisableSbeCacheDispatchPortSharing:cfl */
552 553
	wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
		     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
554 555
}

556 557
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
558
{
559
	/* WaForceContextSaveRestoreNonCoherent:cnl */
560 561
	wa_masked_en(wal, CNL_HDC_CHICKEN0,
		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
562 563

	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
564 565
	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
566 567

	/* WaPushConstantDereferenceHoldDisable:cnl */
568
	wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
569

570
	/* FtrEnableFastAnisoL1BankingFix:cnl */
571
	wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
572 573

	/* WaDisable3DMidCmdPreemption:cnl */
574
	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
575 576

	/* WaDisableGPGPUMidCmdPreemption:cnl */
577
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
578 579 580 581
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

	/* WaDisableEarlyEOT:cnl */
582
	wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
583 584
}

585 586
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
587
{
588 589
	struct drm_i915_private *i915 = engine->i915;

590 591 592 593 594 595
	/* WaDisableBankHangMode:icl */
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);

596 597 598
	/* Wa_1604370585:icl (pre-prod)
	 * Formerly known as WaPushConstantDereferenceHoldDisable
	 */
599
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
600 601
		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
			     PUSH_CONSTANT_DEREF_DISABLE);
602 603 604 605 606 607 608 609

	/* WaForceEnableNonCoherent:icl
	 * This is not the same workaround as in early Gen9 platforms, where
	 * lacking this could cause system hangs, but coherency performance
	 * overhead is high and only a few compute workloads really need it
	 * (the register is whitelisted in hardware now, so UMDs can opt in
	 * for coherency if they have a good reason).
	 */
610
	wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
611

612 613 614
	/* Wa_2006611047:icl (pre-prod)
	 * Formerly known as WaDisableImprovedTdlClkGating
	 */
615
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
616 617
		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
			     GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
618

O
Oscar Mateo 已提交
619
	/* Wa_2006665173:icl (pre-prod) */
620
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
621 622
		wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
			     GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
623 624

	/* WaEnableFloatBlendOptimization:icl */
625 626 627 628
	wa_write_clr_set(wal,
			 GEN10_CACHE_MODE_SS,
			 0, /* write-only, so skip validation */
			 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
629 630

	/* WaDisableGPGPUMidThreadPreemption:icl */
631
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
632 633
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
634 635

	/* allow headerless messages for preemptible GPGPU context */
636 637
	wa_masked_en(wal, GEN10_SAMPLER_MODE,
		     GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
M
Matt Roper 已提交
638 639 640

	/* Wa_1604278689:icl,ehl */
	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
641 642 643
	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
			 0, /* write-only register; skip validation */
			 0xFFFFFFFF);
M
Matt Roper 已提交
644 645 646

	/* Wa_1406306137:icl,ehl */
	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
647 648
}

649 650
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
				       struct i915_wa_list *wal)
651
{
652 653 654 655 656 657 658 659
	/*
	 * Wa_1409142259:tgl
	 * Wa_1409347922:tgl
	 * Wa_1409252684:tgl
	 * Wa_1409217633:tgl
	 * Wa_1409207793:tgl
	 * Wa_1409178076:tgl
	 * Wa_1408979724:tgl
660 661
	 * Wa_14010443199:rkl
	 * Wa_14010698770:rkl
662
	 */
663 664
	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
665

666
	/* WaDisableGPGPUMidThreadPreemption:gen12 */
667
	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
668 669 670 671 672 673 674 675 676
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
}

static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	gen12_ctx_workarounds_init(engine, wal);

677
	/*
678 679 680 681
	 * Wa_1604555607:tgl,rkl
	 *
	 * Note that the implementation of this workaround is further modified
	 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
682 683
	 * FF_MODE2 register will return the wrong value when read. The default
	 * value for this register is zero for all fields and there are no bit
684 685
	 * masks. So instead of doing a RMW we should just write the GS Timer
	 * and TDS timer values for Wa_1604555607 and Wa_16011163337.
686
	 */
687 688 689 690 691
	wa_add(wal,
	       FF_MODE2,
	       FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
	       FF_MODE2_GS_TIMER_224  | FF_MODE2_TDS_TIMER_128,
	       0);
692 693
}

694 695 696 697 698 699
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
	gen12_ctx_workarounds_init(engine, wal);

	/* Wa_1409044764 */
700 701
	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
702 703

	/* Wa_22010493298 */
704 705
	wa_masked_en(wal, HIZ_CHICKEN,
		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
706 707 708 709 710 711 712 713 714 715

	/*
	 * Wa_16011163337
	 *
	 * Like in tgl_ctx_workarounds_init(), read verification is ignored due
	 * to Wa_1608008084.
	 */
	wa_add(wal,
	       FF_MODE2,
	       FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
716 717
}

718 719 720 721
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
			   struct i915_wa_list *wal,
			   const char *name)
722
{
723 724
	struct drm_i915_private *i915 = engine->i915;

725 726 727
	if (engine->class != RENDER_CLASS)
		return;

728
	wa_init_start(wal, name, engine->name);
729

730 731
	if (IS_DG1(i915))
		dg1_ctx_workarounds_init(engine, wal);
732 733
	else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) ||
		 IS_TIGERLAKE(i915))
734
		tgl_ctx_workarounds_init(engine, wal);
735 736
	else if (IS_GEN(i915, 12))
		gen12_ctx_workarounds_init(engine, wal);
737
	else if (IS_GEN(i915, 11))
738
		icl_ctx_workarounds_init(engine, wal);
739
	else if (IS_CANNONLAKE(i915))
740
		cnl_ctx_workarounds_init(engine, wal);
741
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
742
		cfl_ctx_workarounds_init(engine, wal);
743
	else if (IS_GEMINILAKE(i915))
744
		glk_ctx_workarounds_init(engine, wal);
745
	else if (IS_KABYLAKE(i915))
746
		kbl_ctx_workarounds_init(engine, wal);
747
	else if (IS_BROXTON(i915))
748
		bxt_ctx_workarounds_init(engine, wal);
749
	else if (IS_SKYLAKE(i915))
750
		skl_ctx_workarounds_init(engine, wal);
751
	else if (IS_CHERRYVIEW(i915))
752
		chv_ctx_workarounds_init(engine, wal);
753
	else if (IS_BROADWELL(i915))
754
		bdw_ctx_workarounds_init(engine, wal);
755 756 757 758
	else if (IS_GEN(i915, 7))
		gen7_ctx_workarounds_init(engine, wal);
	else if (IS_GEN(i915, 6))
		gen6_ctx_workarounds_init(engine, wal);
759 760
	else if (INTEL_GEN(i915) < 8)
		return;
761
	else
762
		MISSING_CASE(INTEL_GEN(i915));
763

764
	wa_init_finish(wal);
765 766
}

767 768 769 770 771
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
{
	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
}

772
int intel_engine_emit_ctx_wa(struct i915_request *rq)
773
{
774 775 776
	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
	struct i915_wa *wa;
	unsigned int i;
777
	u32 *cs;
778
	int ret;
779

780
	if (wal->count == 0)
781 782 783
		return 0;

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
784 785 786
	if (ret)
		return ret;

787
	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
788 789 790
	if (IS_ERR(cs))
		return PTR_ERR(cs);

791 792 793
	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		*cs++ = i915_mmio_reg_offset(wa->reg);
794
		*cs++ = wa->set;
795 796 797 798 799 800 801 802 803 804 805 806
	}
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
	if (ret)
		return ret;

	return 0;
}

807
static void
808 809
gen4_gt_workarounds_init(struct drm_i915_private *i915,
			 struct i915_wa_list *wal)
810
{
811 812 813 814 815 816 817 818
	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
}

static void
g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	gen4_gt_workarounds_init(i915, wal);
819

820
	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
821
	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
822
}
823

824 825 826 827 828 829
static void
ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	g4x_gt_workarounds_init(i915, wal);

	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
830 831
}

832 833 834 835 836
static void
snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
}

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
static void
ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
	wa_masked_dis(wal,
		      GEN7_COMMON_SLICE_CHICKEN1,
		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);

	/* WaApplyL3ControlAndL3ChickenMode:ivb */
	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);

	/* WaForceL3Serialization:ivb */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
}

853 854 855 856 857 858 859 860 861 862 863 864 865
static void
vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	/* WaForceL3Serialization:vlv */
	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);

	/*
	 * WaIncreaseL3CreditsForVLVB0:vlv
	 * This is the hardware default actually.
	 */
	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
}

866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
static void
hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	/* L3 caching of data atomics doesn't work -- disable it. */
	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);

	wa_add(wal,
	       HSW_ROW_CHICKEN3, 0,
	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
		0 /* XXX does this reg exist? */);

	/* WaVSRefCountFullforceMissDisable:hsw */
	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
}

881 882
static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
883
{
884
	/* WaDisableKillLogic:bxt,skl,kbl */
885
	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
886 887 888
		wa_write_or(wal,
			    GAM_ECOCHK,
			    ECOCHK_DIS_TLB);
889

890
	if (HAS_LLC(i915)) {
891 892 893 894 895
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
896 897 898
		wa_write_or(wal,
			    MMCD_MISC_CTRL,
			    MMCD_PCLA | MMCD_HOTSPOT_EN);
899 900 901
	}

	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
902 903 904
	wa_write_or(wal,
		    GAM_ECOCHK,
		    BDW_DISABLE_HDC_INVALIDATION);
905 906
}

907 908
static void
skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
909
{
910
	gen9_gt_workarounds_init(i915, wal);
911 912

	/* WaDisableGafsUnitClkGating:skl */
913 914 915
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
916 917

	/* WaInPlaceDecompressionHang:skl */
918 919 920 921
	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
		wa_write_or(wal,
			    GEN9_GAMT_ECO_REG_RW_IA,
			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
922 923
}

924 925
static void
bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
926
{
927
	gen9_gt_workarounds_init(i915, wal);
928 929

	/* WaInPlaceDecompressionHang:bxt */
930 931 932
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
933 934
}

935 936
static void
kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
937
{
938
	gen9_gt_workarounds_init(i915, wal);
939

940
	/* WaDisableDynamicCreditSharing:kbl */
941
	if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0))
942 943 944
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
945

946
	/* WaDisableGafsUnitClkGating:kbl */
947 948 949
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
950

951
	/* WaInPlaceDecompressionHang:kbl */
952 953 954
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
955
}
956

957 958
static void
glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
959
{
960
	gen9_gt_workarounds_init(i915, wal);
961 962
}

963 964
static void
cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
965
{
966
	gen9_gt_workarounds_init(i915, wal);
967 968

	/* WaDisableGafsUnitClkGating:cfl */
969 970 971
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
972

973
	/* WaInPlaceDecompressionHang:cfl */
974 975 976
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
977
}
978

979
static void
980
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
981
{
982
	const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
983 984 985 986
	unsigned int slice, subslice;
	u32 l3_en, mcr, mcr_mask;

	GEM_BUG_ON(INTEL_GEN(i915) < 10);
987

988 989 990 991 992 993
	/*
	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
	 * L3Banks could be fused off in single slice scenario. If that is
	 * the case, we might need to program MCR select to a valid L3Bank
	 * by default, to make sure we correctly read certain registers
	 * later on (in the range 0xB100 - 0xB3FF).
994
	 *
995
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
996 997 998 999 1000 1001 1002 1003
	 * Before any MMIO read into slice/subslice specific registers, MCR
	 * packet control register needs to be programmed to point to any
	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
	 * This means each subsequent MMIO read will be forwarded to an
	 * specific s/ss combination, but this is OK since these registers
	 * are consistent across s/ss in almost all cases. In the rare
	 * occasions, such as INSTDONE, where this value is dependent
	 * on s/ss combo, the read should be done with read_subslice_reg.
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
	 *
	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
	 * to which subslice, or to which L3 bank, the respective mmio reads
	 * will go, we have to find a common index which works for both
	 * accesses.
	 *
	 * Case where we cannot find a common index fortunately should not
	 * happen in production hardware, so we only emit a warning instead of
	 * implementing something more complex that requires checking the range
	 * of every MMIO read.
1014
	 */
1015 1016 1017 1018 1019 1020

	if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
		u32 l3_fuse =
			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
			GEN10_L3BANK_MASK;

1021
		drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
1022 1023 1024 1025 1026 1027
		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
	} else {
		l3_en = ~0;
	}

	slice = fls(sseu->slice_mask) - 1;
S
Stuart Summers 已提交
1028
	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
1029
	if (!subslice) {
1030 1031
		drm_warn(&i915->drm,
			 "No common index found between subslice mask %x and L3 bank mask %x!\n",
S
Stuart Summers 已提交
1032
			 intel_sseu_get_subslices(sseu, slice), l3_en);
1033
		subslice = fls(l3_en);
1034
		drm_WARN_ON(&i915->drm, !subslice);
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
	}
	subslice--;

	if (INTEL_GEN(i915) >= 11) {
		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
	} else {
		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
	}

1046
	drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
1047

1048
	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1049 1050
}

1051 1052
static void
cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1053
{
1054
	wa_init_mcr(i915, wal);
1055

1056
	/* WaInPlaceDecompressionHang:cnl */
1057 1058 1059
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1060 1061
}

1062 1063
static void
icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1064
{
1065
	wa_init_mcr(i915, wal);
1066

1067
	/* WaInPlaceDecompressionHang:icl */
1068 1069 1070
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1071

1072
	/* WaModifyGamTlbPartitioning:icl */
1073 1074 1075 1076
	wa_write_clr_set(wal,
			 GEN11_GACB_PERF_CTRL,
			 GEN11_HASH_CTRL_MASK,
			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
O
Oscar Mateo 已提交
1077

O
Oscar Mateo 已提交
1078 1079 1080
	/* Wa_1405766107:icl
	 * Formerly known as WaCL2SFHalfMaxAlloc
	 */
1081 1082 1083 1084
	wa_write_or(wal,
		    GEN11_LSN_UNSLCVC,
		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
O
Oscar Mateo 已提交
1085 1086 1087 1088

	/* Wa_220166154:icl
	 * Formerly known as WaDisCtxReload
	 */
1089 1090 1091
	wa_write_or(wal,
		    GEN8_GAMW_ECO_DEV_RW_IA,
		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
O
Oscar Mateo 已提交
1092 1093

	/* Wa_1405779004:icl (pre-prod) */
1094 1095 1096 1097
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    MSCUNIT_CLKGATE_DIS);
O
Oscar Mateo 已提交
1098

O
Oscar Mateo 已提交
1099
	/* Wa_1406838659:icl (pre-prod) */
1100 1101 1102 1103
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
		wa_write_or(wal,
			    INF_UNIT_LEVEL_CLKGATE,
			    CGPSF_CLKGATE_DIS);
1104

O
Oscar Mateo 已提交
1105 1106 1107
	/* Wa_1406463099:icl
	 * Formerly known as WaGamTlbPendError
	 */
1108 1109 1110
	wa_write_or(wal,
		    GAMT_CHKN_BIT_REG,
		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
M
Mika Kuoppala 已提交
1111

1112 1113
	/* Wa_1607087056:icl,ehl,jsl */
	if (IS_ICELAKE(i915) ||
1114
		IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
1115 1116 1117 1118
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
	}
1119 1120
}

1121
static void
1122 1123
gen12_gt_workarounds_init(struct drm_i915_private *i915,
			  struct i915_wa_list *wal)
1124
{
1125
	wa_init_mcr(i915, wal);
1126 1127 1128 1129 1130 1131
}

static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	gen12_gt_workarounds_init(i915, wal);
1132

M
Mika Kuoppala 已提交
1133
	/* Wa_1409420604:tgl */
1134
	if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
M
Mika Kuoppala 已提交
1135 1136 1137
		wa_write_or(wal,
			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
			    CPSSUNIT_CLKGATE_DIS);
M
Mika Kuoppala 已提交
1138

1139
	/* Wa_1607087056:tgl also know as BUG:1409180338 */
1140
	if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
M
Mika Kuoppala 已提交
1141 1142 1143
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1144 1145

	/* Wa_1408615072:tgl[a0] */
1146
	if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1147 1148
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
1149 1150
}

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
static void
dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
	gen12_gt_workarounds_init(i915, wal);

	/* Wa_1607087056:dg1 */
	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);

	/* Wa_1409420604:dg1 */
	if (IS_DG1(i915))
		wa_write_or(wal,
			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
			    CPSSUNIT_CLKGATE_DIS);

	/* Wa_1408615072:dg1 */
	/* Empirical testing shows this register is unaffected by engine reset. */
	if (IS_DG1(i915))
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
}

1175 1176
static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1177
{
1178 1179 1180
	if (IS_DG1(i915))
		dg1_gt_workarounds_init(i915, wal);
	else if (IS_TIGERLAKE(i915))
1181
		tgl_gt_workarounds_init(i915, wal);
1182 1183
	else if (IS_GEN(i915, 12))
		gen12_gt_workarounds_init(i915, wal);
1184
	else if (IS_GEN(i915, 11))
1185
		icl_gt_workarounds_init(i915, wal);
1186
	else if (IS_CANNONLAKE(i915))
1187
		cnl_gt_workarounds_init(i915, wal);
1188
	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1189 1190 1191 1192 1193 1194 1195 1196 1197
		cfl_gt_workarounds_init(i915, wal);
	else if (IS_GEMINILAKE(i915))
		glk_gt_workarounds_init(i915, wal);
	else if (IS_KABYLAKE(i915))
		kbl_gt_workarounds_init(i915, wal);
	else if (IS_BROXTON(i915))
		bxt_gt_workarounds_init(i915, wal);
	else if (IS_SKYLAKE(i915))
		skl_gt_workarounds_init(i915, wal);
1198 1199
	else if (IS_HASWELL(i915))
		hsw_gt_workarounds_init(i915, wal);
1200 1201
	else if (IS_VALLEYVIEW(i915))
		vlv_gt_workarounds_init(i915, wal);
1202 1203
	else if (IS_IVYBRIDGE(i915))
		ivb_gt_workarounds_init(i915, wal);
1204 1205
	else if (IS_GEN(i915, 6))
		snb_gt_workarounds_init(i915, wal);
1206 1207
	else if (IS_GEN(i915, 5))
		ilk_gt_workarounds_init(i915, wal);
1208 1209 1210 1211
	else if (IS_G4X(i915))
		g4x_gt_workarounds_init(i915, wal);
	else if (IS_GEN(i915, 4))
		gen4_gt_workarounds_init(i915, wal);
1212 1213
	else if (INTEL_GEN(i915) <= 8)
		return;
1214
	else
1215
		MISSING_CASE(INTEL_GEN(i915));
1216 1217 1218 1219 1220
}

void intel_gt_init_workarounds(struct drm_i915_private *i915)
{
	struct i915_wa_list *wal = &i915->gt_wa_list;
1221

1222
	wa_init_start(wal, "GT", "global");
1223
	gt_init_workarounds(i915, wal);
1224 1225 1226 1227
	wa_init_finish(wal);
}

static enum forcewake_domains
1228
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1229 1230 1231 1232 1233 1234
{
	enum forcewake_domains fw = 0;
	struct i915_wa *wa;
	unsigned int i;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1235
		fw |= intel_uncore_forcewake_for_reg(uncore,
1236 1237 1238 1239 1240 1241 1242
						     wa->reg,
						     FW_REG_READ |
						     FW_REG_WRITE);

	return fw;
}

1243 1244 1245
static bool
wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
{
1246
	if ((cur ^ wa->set) & wa->read) {
1247
		DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1248
			  name, from, i915_mmio_reg_offset(wa->reg),
1249
			  cur, cur & wa->read, wa->set & wa->read);
1250 1251 1252 1253 1254 1255 1256

		return false;
	}

	return true;
}

1257
static void
1258
wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1259 1260 1261 1262 1263 1264 1265 1266 1267
{
	enum forcewake_domains fw;
	unsigned long flags;
	struct i915_wa *wa;
	unsigned int i;

	if (!wal->count)
		return;

1268
	fw = wal_get_fw_for_rmw(uncore, wal);
1269

1270 1271
	spin_lock_irqsave(&uncore->lock, flags);
	intel_uncore_forcewake_get__locked(uncore, fw);
1272 1273

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1274 1275 1276 1277
		if (wa->clr)
			intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
		else
			intel_uncore_write_fw(uncore, wa->reg, wa->set);
1278 1279 1280 1281
		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
			wa_verify(wa,
				  intel_uncore_read_fw(uncore, wa->reg),
				  wal->name, "application");
1282 1283
	}

1284 1285
	intel_uncore_forcewake_put__locked(uncore, fw);
	spin_unlock_irqrestore(&uncore->lock, flags);
1286 1287
}

1288
void intel_gt_apply_workarounds(struct intel_gt *gt)
1289
{
1290
	wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
1291 1292
}

1293
static bool wa_list_verify(struct intel_uncore *uncore,
1294 1295 1296 1297 1298 1299 1300 1301
			   const struct i915_wa_list *wal,
			   const char *from)
{
	struct i915_wa *wa;
	unsigned int i;
	bool ok = true;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1302 1303 1304
		ok &= wa_verify(wa,
				intel_uncore_read(uncore, wa->reg),
				wal->name, from);
1305 1306 1307 1308

	return ok;
}

1309
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1310
{
1311
	return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1312 1313
}

1314
__maybe_unused
C
Chris Wilson 已提交
1315
static bool is_nonpriv_flags_valid(u32 flags)
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
{
	/* Check only valid flag bits are set */
	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
		return false;

	/* NB: Only 3 out of 4 enum values are valid for access field */
	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
		return false;

	return true;
}

1329
static void
1330
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1331
{
1332 1333 1334
	struct i915_wa wa = {
		.reg = reg
	};
1335

1336 1337
	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;
1338

1339 1340 1341
	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1342
	wa.reg.reg |= flags;
1343
	_wa_add(wal, &wa);
1344 1345
}

1346 1347 1348
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
1349
	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1350 1351
}

1352
static void gen9_whitelist_build(struct i915_wa_list *w)
1353 1354
{
	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1355
	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1356 1357

	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1358
	whitelist_reg(w, GEN8_CS_CHICKEN1);
1359 1360

	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1361
	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1362 1363 1364

	/* WaSendPushConstantsFromMMIO:skl,bxt */
	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1365 1366
}

1367
static void skl_whitelist_build(struct intel_engine_cs *engine)
1368
{
1369 1370 1371 1372 1373
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1374
	gen9_whitelist_build(w);
1375 1376

	/* WaDisableLSQCROPERFforOCL:skl */
1377
	whitelist_reg(w, GEN8_L3SQCREG4);
1378 1379
}

1380
static void bxt_whitelist_build(struct intel_engine_cs *engine)
1381
{
1382 1383 1384 1385
	if (engine->class != RENDER_CLASS)
		return;

	gen9_whitelist_build(&engine->whitelist);
1386 1387
}

1388
static void kbl_whitelist_build(struct intel_engine_cs *engine)
1389
{
1390 1391 1392 1393 1394
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1395
	gen9_whitelist_build(w);
1396

1397
	/* WaDisableLSQCROPERFforOCL:kbl */
1398
	whitelist_reg(w, GEN8_L3SQCREG4);
1399 1400
}

1401
static void glk_whitelist_build(struct intel_engine_cs *engine)
1402
{
1403 1404 1405 1406 1407
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1408
	gen9_whitelist_build(w);
1409

1410
	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1411
	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1412
}
1413

1414
static void cfl_whitelist_build(struct intel_engine_cs *engine)
1415
{
1416 1417
	struct i915_wa_list *w = &engine->whitelist;

1418 1419 1420
	if (engine->class != RENDER_CLASS)
		return;

1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432
	gen9_whitelist_build(w);

	/*
	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
	 *
	 * This covers 4 register which are next to one another :
	 *   - PS_INVOCATION_COUNT
	 *   - PS_INVOCATION_COUNT_UDW
	 *   - PS_DEPTH_COUNT
	 *   - PS_DEPTH_COUNT_UDW
	 */
	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1433
			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1434
			  RING_FORCE_TO_NONPRIV_RANGE_4);
1435 1436
}

1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448
static void cml_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);

	cfl_whitelist_build(engine);
}

1449
static void cnl_whitelist_build(struct intel_engine_cs *engine)
1450
{
1451 1452 1453 1454 1455
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1456
	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1457 1458 1459
	whitelist_reg(w, GEN8_CS_CHICKEN1);
}

1460
static void icl_whitelist_build(struct intel_engine_cs *engine)
1461
{
1462 1463
	struct i915_wa_list *w = &engine->whitelist;

1464 1465 1466 1467 1468 1469 1470 1471 1472 1473
	switch (engine->class) {
	case RENDER_CLASS:
		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);

		/* WaAllowUMDToModifySamplerMode:icl */
		whitelist_reg(w, GEN10_SAMPLER_MODE);

		/* WaEnableStateCacheRedirectToCS:icl */
		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484

		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
		 *
		 * This covers 4 register which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1485
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1486
				  RING_FORCE_TO_NONPRIV_RANGE_4);
1487 1488 1489 1490 1491
		break;

	case VIDEO_DECODE_CLASS:
		/* hucStatusRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1492
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1493 1494
		/* hucUKernelHdrInfoRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1495
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1496 1497
		/* hucStatus2RegOffset */
		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1498
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1499 1500 1501
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1502 1503 1504
		break;

	default:
1505 1506 1507
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1508 1509
		break;
	}
1510 1511
}

1512 1513
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
1514 1515 1516 1517 1518 1519
	struct i915_wa_list *w = &engine->whitelist;

	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1520
		 * Wa_1408556865:tgl
1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
				  RING_FORCE_TO_NONPRIV_RANGE_4);
1531 1532 1533

		/* Wa_1808121037:tgl */
		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1534 1535 1536

		/* Wa_1806527549:tgl */
		whitelist_reg(w, HIZ_CHICKEN);
1537 1538
		break;
	default:
1539 1540 1541
		whitelist_reg_ext(w,
				  RING_CTX_TIMESTAMP(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1542 1543
		break;
	}
1544 1545
}

1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
static void dg1_whitelist_build(struct intel_engine_cs *engine)
{
	struct i915_wa_list *w = &engine->whitelist;

	tgl_whitelist_build(engine);

	/* GEN:BUG:1409280441:dg1 */
	if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
	    (engine->class == RENDER_CLASS ||
	     engine->class == COPY_ENGINE_CLASS))
		whitelist_reg_ext(w, RING_ID(engine->mmio_base),
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
}

1560
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1561 1562
{
	struct drm_i915_private *i915 = engine->i915;
1563
	struct i915_wa_list *w = &engine->whitelist;
1564

1565
	wa_init_start(w, "whitelist", engine->name);
1566

1567 1568 1569
	if (IS_DG1(i915))
		dg1_whitelist_build(engine);
	else if (IS_GEN(i915, 12))
1570 1571
		tgl_whitelist_build(engine);
	else if (IS_GEN(i915, 11))
1572
		icl_whitelist_build(engine);
1573
	else if (IS_CANNONLAKE(i915))
1574
		cnl_whitelist_build(engine);
1575 1576 1577
	else if (IS_COMETLAKE(i915))
		cml_whitelist_build(engine);
	else if (IS_COFFEELAKE(i915))
1578
		cfl_whitelist_build(engine);
1579
	else if (IS_GEMINILAKE(i915))
1580
		glk_whitelist_build(engine);
1581
	else if (IS_KABYLAKE(i915))
1582
		kbl_whitelist_build(engine);
1583
	else if (IS_BROXTON(i915))
1584
		bxt_whitelist_build(engine);
1585
	else if (IS_SKYLAKE(i915))
1586
		skl_whitelist_build(engine);
1587 1588
	else if (INTEL_GEN(i915) <= 8)
		return;
1589 1590
	else
		MISSING_CASE(INTEL_GEN(i915));
1591

1592
	wa_init_finish(w);
1593 1594
}

1595
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1596
{
1597
	const struct i915_wa_list *wal = &engine->whitelist;
1598
	struct intel_uncore *uncore = engine->uncore;
1599
	const u32 base = engine->mmio_base;
1600
	struct i915_wa *wa;
1601 1602
	unsigned int i;

1603
	if (!wal->count)
1604
		return;
1605

1606
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1607 1608 1609
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(wa->reg));
1610

1611 1612
	/* And clear the rest just in case of garbage */
	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1613 1614 1615
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(RING_NOPID(base)));
1616 1617
}

1618 1619
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1620 1621 1622
{
	struct drm_i915_private *i915 = engine->i915;

1623
	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1624
	    IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
1625
		/*
1626 1627
		 * Wa_1607138336:tgl[a0],dg1[a0]
		 * Wa_1607063988:tgl[a0],dg1[a0]
1628
		 */
M
Mika Kuoppala 已提交
1629 1630 1631
		wa_write_or(wal,
			    GEN9_CTX_PREEMPT_REG,
			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1632
	}
1633

1634
	if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
R
Radhakrishna Sripada 已提交
1635 1636 1637 1638 1639 1640 1641
		/*
		 * Wa_1606679103:tgl
		 * (see also Wa_1606682166:icl)
		 */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
1642 1643
	}

1644 1645 1646
	if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
		/* Wa_1606931601:tgl,rkl,dg1,adl-s */
1647 1648
		wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);

1649 1650 1651 1652
		/*
		 * Wa_1407928979:tgl A*
		 * Wa_18011464164:tgl[B0+],dg1[B0+]
		 * Wa_22010931296:tgl[B0+],dg1[B0+]
1653
		 * Wa_14010919138:rkl,dg1,adl-s
1654 1655 1656
		 */
		wa_write_or(wal, GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1657 1658 1659

		/*
		 * Wa_1606700617:tgl,dg1
1660
		 * Wa_22010271021:tgl,rkl,dg1, adl-s
1661 1662 1663 1664
		 */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
1665 1666
	}

1667
	if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1668
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1669
		/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */
1670 1671
		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1672

1673 1674
		/*
		 * Wa_1409085225:tgl
1675
		 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s
1676 1677
		 */
		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1678 1679
	}

1680

1681 1682
	if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1683 1684 1685
		/*
		 * Wa_1607030317:tgl
		 * Wa_1607186500:tgl
1686 1687 1688 1689 1690 1691
		 * Wa_1607297627:tgl,rkl,dg1[a0]
		 *
		 * On TGL and RKL there are multiple entries for this WA in the
		 * BSpec; some indicate this is an A0-only WA, others indicate
		 * it applies to all steppings so we trust the "all steppings."
		 * For DG1 this only applies to A0.
1692 1693 1694 1695 1696
		 */
		wa_masked_en(wal,
			     GEN6_RC_SLEEP_PSMI_CONTROL,
			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1697 1698
	}

1699 1700 1701 1702 1703 1704 1705
	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
		/* Wa_1406941453:tgl,rkl,dg1 */
		wa_masked_en(wal,
			     GEN10_SAMPLER_MODE,
			     ENABLE_SMALLPL);
	}

1706
	if (IS_GEN(i915, 11)) {
1707 1708 1709 1710 1711 1712
		/* This is not an Wa. Enable for better image quality */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);

		/* WaPipelineFlushCoherentLines:icl */
1713 1714 1715
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728

		/*
		 * Wa_1405543622:icl
		 * Formerly known as WaGAPZPriorityScheme
		 */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN11_ARBITRATION_PRIO_ORDER_MASK);

		/*
		 * Wa_1604223664:icl
		 * Formerly known as WaL3BankAddressHashing
		 */
1729 1730 1731 1732 1733 1734 1735 1736
		wa_write_clr_set(wal,
				 GEN8_GARBCNTL,
				 GEN11_HASH_CTRL_EXCL_MASK,
				 GEN11_HASH_CTRL_EXCL_BIT0);
		wa_write_clr_set(wal,
				 GEN11_GLBLINVL,
				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1737 1738 1739 1740 1741

		/*
		 * Wa_1405733216:icl
		 * Formerly known as WaDisableCleanEvicts
		 */
1742 1743 1744
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756

		/* WaForwardProgressSoftReset:icl */
		wa_write_or(wal,
			    GEN10_SCRATCH_LNCF2,
			    PMFLUSHDONE_LNICRSDROP |
			    PMFLUSH_GAPL3UNBLOCK |
			    PMFLUSHDONE_LNEBLK);

		/* Wa_1406609255:icl (pre-prod) */
		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
			wa_write_or(wal,
				    GEN7_SARCHKMD,
1757 1758 1759 1760 1761 1762
				    GEN7_DISABLE_DEMAND_PREFETCH);

		/* Wa_1606682166:icl */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
T
Tvrtko Ursulin 已提交
1763 1764

		/* Wa_1409178092:icl */
1765 1766 1767 1768
		wa_write_clr_set(wal,
				 GEN11_SCRATCH2,
				 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
				 0);
1769 1770 1771 1772 1773 1774 1775 1776 1777

		/* WaEnable32PlaneMode:icl */
		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
			     GEN11_ENABLE_32_PLANE_MODE);

		/*
		 * Wa_1408615072:icl,ehl  (vsunit)
		 * Wa_1407596294:icl,ehl  (hsunit)
		 */
1778 1779
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1780 1781

		/* Wa_1407352427:icl,ehl */
1782 1783
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    PSDUNIT_CLKGATE_DIS);
1784 1785 1786 1787 1788

		/* Wa_1406680159:icl,ehl */
		wa_write_or(wal,
			    SUBSLICE_UNIT_LEVEL_CLKGATE,
			    GWUNIT_CLKGATE_DIS);
1789 1790 1791 1792 1793 1794 1795 1796

		/*
		 * Wa_1408767742:icl[a2..forever],ehl[all]
		 * Wa_1605460711:icl[a0..c0]
		 */
		wa_write_or(wal,
			    GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
M
Matt Atwood 已提交
1797 1798

		/* Wa_22010271021:ehl */
1799
		if (IS_JSL_EHL(i915))
M
Matt Atwood 已提交
1800 1801 1802
			wa_masked_en(wal,
				     GEN9_CS_DEBUG_MODE1,
				     FF_DOP_CLOCK_GATE_DISABLE);
1803 1804
	}

1805 1806
	if (IS_GEN_RANGE(i915, 9, 12)) {
		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1807 1808 1809 1810 1811
		wa_masked_en(wal,
			     GEN7_FF_SLICE_CS_CHICKEN1,
			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
	}

1812 1813 1814 1815
	if (IS_SKYLAKE(i915) ||
	    IS_KABYLAKE(i915) ||
	    IS_COFFEELAKE(i915) ||
	    IS_COMETLAKE(i915)) {
1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN9_GAPS_TSV_CREDIT_DISABLE);
	}

	if (IS_BROXTON(i915)) {
		/* WaDisablePooledEuLoadBalancingFix:bxt */
		wa_masked_en(wal,
			     FF_SLICE_CS_CHICKEN2,
			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
	}

1829
	if (IS_GEN(i915, 9)) {
1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
		wa_masked_en(wal,
			     GEN9_CSFE_CHICKEN1_RCS,
			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);

		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
		wa_write_or(wal,
			    BDW_SCRATCH1,
			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);

		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
		if (IS_GEN9_LP(i915))
1842 1843 1844 1845 1846
			wa_write_clr_set(wal,
					 GEN8_L3SQCREG1,
					 L3_PRIO_CREDITS_MASK,
					 L3_GENERAL_PRIO_CREDITS(62) |
					 L3_HIGH_PRIO_CREDITS(2));
1847 1848 1849 1850 1851

		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1852 1853 1854 1855 1856 1857 1858 1859

		/* Disable atomics in L3 to prevent unrecoverable hangs */
		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
		wa_write_clr_set(wal, GEN8_L3SQCREG4,
				 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
		wa_write_clr_set(wal, GEN9_SCRATCH1,
				 EVICTION_PERF_FIX_ENABLE, 0);
1860
	}
1861

1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
	if (IS_HASWELL(i915)) {
		/* WaSampleCChickenBitEnable:hsw */
		wa_masked_en(wal,
			     HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);

		wa_masked_dis(wal,
			      CACHE_MODE_0_GEN7,
			      /* enable HiZ Raw Stall Optimization */
			      HIZ_RAW_STALL_OPT_DISABLE);

		/* WaDisable4x2SubspanOptimization:hsw */
		wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1874 1875 1876 1877 1878 1879 1880
	}

	if (IS_VALLEYVIEW(i915)) {
		/* WaDisableEarlyCull:vlv */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1881 1882

		/*
1883
		 * WaVSThreadDispatchOverride:ivb,vlv
1884
		 *
1885 1886
		 * This actually overrides the dispatch
		 * mode for all thread types.
1887
		 */
1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

		/* WaPsdDispatchEnable:vlv */
		/* WaDisablePSDDualDispatchEnable:vlv */
		wa_masked_en(wal,
			     GEN7_HALF_SLICE_CHICKEN1,
			     GEN7_MAX_PS_THREAD_DEP |
			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1901 1902
	}

1903 1904
	if (IS_IVYBRIDGE(i915)) {
		/* WaDisableEarlyCull:ivb */
1905 1906 1907 1908
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);

1909 1910 1911 1912 1913 1914 1915
		if (0) { /* causes HiZ corruption on ivb:gt1 */
			/* enable HiZ Raw Stall Optimization */
			wa_masked_dis(wal,
				      CACHE_MODE_0_GEN7,
				      HIZ_RAW_STALL_OPT_DISABLE);
		}

1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928
		/*
		 * WaVSThreadDispatchOverride:ivb,vlv
		 *
		 * This actually overrides the dispatch
		 * mode for all thread types.
		 */
		wa_write_clr_set(wal,
				 GEN7_FF_THREAD_MODE,
				 GEN7_FF_SCHED_MASK,
				 GEN7_FF_TS_SCHED_HW |
				 GEN7_FF_VS_SCHED_HW |
				 GEN7_FF_DS_SCHED_HW);

1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
		/* WaDisablePSDDualDispatchEnable:ivb */
		if (IS_IVB_GT1(i915))
			wa_masked_en(wal,
				     GEN7_HALF_SLICE_CHICKEN1,
				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
	}

	if (IS_GEN(i915, 7)) {
		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
		wa_masked_en(wal,
			     GFX_MODE_GEN7,
			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);

		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1943 1944 1945 1946
		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);

		/*
		 * BSpec says this must be set, even though
1947
		 * WaDisable4x2SubspanOptimization:ivb,hsw
1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
		 */
		wa_masked_en(wal,
			     CACHE_MODE_1,
			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
		wa_add(wal, GEN7_GT_MODE, 0,
		       _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
				     GEN6_WIZ_HASHING_16x4),
		       GEN6_WIZ_HASHING_16x4);
	}

1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
	if (IS_GEN_RANGE(i915, 6, 7))
		/*
		 * We need to disable the AsyncFlip performance optimisations in
		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
		 * already be programmed to '1' on all products.
		 *
		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
		 */
		wa_masked_en(wal,
			     MI_MODE,
			     ASYNC_FLIP_PERF_DISABLE);

	if (IS_GEN(i915, 6)) {
		/*
		 * Required for the hardware to program scanline values for
		 * waiting
		 * WaEnableFlushTlbInvalidationMode:snb
		 */
		wa_masked_en(wal,
			     GFX_MODE,
			     GFX_TLB_INVALIDATE_EXPLICIT);

1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
		wa_masked_en(wal,
			     _3D_CHICKEN,
			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);

		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
			     /*
			      * Bspec says:
			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
			      * to normal and 3DSTATE_SF number of SF output attributes
			      * is more than 16."
			      */
			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);

		/*
		 * BSpec recommends 8x4 when MSAA is used,
		 * however in practice 16x4 seems fastest.
		 *
		 * Note that PS/WM thread counts depend on the WIZ hashing
		 * disable bit, which we don't touch here, but it's good
		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
		 */
		wa_add(wal,
		       GEN6_GT_MODE, 0,
		       _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
		       GEN6_WIZ_HASHING_16x4);

		/* WaDisable_RenderCache_OperationalFlush:snb */
		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);

2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039
		/*
		 * From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset. LRA replacement
		 *  policy is not supported."
		 */
		wa_masked_dis(wal,
			      CACHE_MODE_0,
			      CM0_STC_EVICT_DISABLE_LRA_SNB);
	}

	if (IS_GEN_RANGE(i915, 4, 6))
		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
		wa_add(wal, MI_MODE,
		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
		       /* XXX bit doesn't stick on Broadwater */
		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054

	if (IS_GEN(i915, 4))
		/*
		 * Disable CONSTANT_BUFFER before it is loaded from the context
		 * image. For as it is loaded, it is executed and the stored
		 * address may no longer be valid, leading to a GPU hang.
		 *
		 * This imposes the requirement that userspace reload their
		 * CONSTANT_BUFFER on every batch, fortunately a requirement
		 * they are already accustomed to from before contexts were
		 * enabled.
		 */
		wa_add(wal, ECOSKPD,
		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
		       0 /* XXX bit doesn't stick on Broadwater */);
2055 2056
}

2057 2058
static void
xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2059 2060 2061 2062
{
	struct drm_i915_private *i915 = engine->i915;

	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2063
	if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
2064 2065 2066 2067 2068 2069
		wa_write(wal,
			 RING_SEMA_WAIT_POLL(engine->mmio_base),
			 1);
	}
}

2070 2071 2072
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
2073
	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
2074 2075
		return;

2076
	if (engine->class == RENDER_CLASS)
2077 2078 2079 2080 2081
		rcs_engine_wa_init(engine, wal);
	else
		xcs_engine_wa_init(engine, wal);
}

2082 2083 2084 2085
void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
	struct i915_wa_list *wal = &engine->wa_list;

2086
	if (INTEL_GEN(engine->i915) < 4)
2087 2088
		return;

2089
	wa_init_start(wal, "engine", engine->name);
2090
	engine_init_workarounds(engine, wal);
2091 2092 2093 2094 2095
	wa_init_finish(wal);
}

void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
{
2096
	wa_list_apply(engine->uncore, &engine->wa_list);
2097 2098
}

2099
struct mcr_range {
M
Matt Roper 已提交
2100 2101
	u32 start;
	u32 end;
2102 2103 2104
};

static const struct mcr_range mcr_ranges_gen8[] = {
M
Matt Roper 已提交
2105 2106 2107 2108 2109 2110 2111 2112
	{ .start = 0x5500, .end = 0x55ff },
	{ .start = 0x7000, .end = 0x7fff },
	{ .start = 0x9400, .end = 0x97ff },
	{ .start = 0xb000, .end = 0xb3ff },
	{ .start = 0xe000, .end = 0xe7ff },
	{},
};

2113 2114 2115 2116 2117 2118 2119 2120 2121
static const struct mcr_range mcr_ranges_gen12[] = {
	{ .start =  0x8150, .end =  0x815f },
	{ .start =  0x9520, .end =  0x955f },
	{ .start =  0xb100, .end =  0xb3ff },
	{ .start =  0xde80, .end =  0xe8ff },
	{ .start = 0x24a00, .end = 0x24a7f },
	{},
};

2122 2123
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
2124
	const struct mcr_range *mcr_ranges;
M
Matt Roper 已提交
2125 2126
	int i;

2127 2128 2129 2130 2131
	if (INTEL_GEN(i915) >= 12)
		mcr_ranges = mcr_ranges_gen12;
	else if (INTEL_GEN(i915) >= 8)
		mcr_ranges = mcr_ranges_gen8;
	else
M
Matt Roper 已提交
2132 2133
		return false;

2134
	/*
M
Matt Roper 已提交
2135
	 * Registers in these ranges are affected by the MCR selector
2136 2137 2138
	 * which only controls CPU initiated MMIO. Routing does not
	 * work for CS access so we cannot verify them on this path.
	 */
2139 2140 2141
	for (i = 0; mcr_ranges[i].start; i++)
		if (offset >= mcr_ranges[i].start &&
		    offset <= mcr_ranges[i].end)
M
Matt Roper 已提交
2142
			return true;
2143 2144 2145 2146

	return false;
}

2147 2148 2149 2150 2151
static int
wa_list_srm(struct i915_request *rq,
	    const struct i915_wa_list *wal,
	    struct i915_vma *vma)
{
2152
	struct drm_i915_private *i915 = rq->engine->i915;
2153
	unsigned int i, count = 0;
2154 2155 2156 2157
	const struct i915_wa *wa;
	u32 srm, *cs;

	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2158
	if (INTEL_GEN(i915) >= 8)
2159 2160
		srm++;

2161 2162 2163 2164 2165 2166
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
			count++;
	}

	cs = intel_ring_begin(rq, 4 * count);
2167 2168 2169 2170
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2171 2172 2173 2174 2175
		u32 offset = i915_mmio_reg_offset(wa->reg);

		if (mcr_range(i915, offset))
			continue;

2176
		*cs++ = srm;
2177
		*cs++ = offset;
2178 2179 2180 2181 2182 2183 2184 2185
		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
		*cs++ = 0;
	}
	intel_ring_advance(rq, cs);

	return 0;
}

2186
static int engine_wa_list_verify(struct intel_context *ce,
2187 2188 2189 2190 2191 2192
				 const struct i915_wa_list * const wal,
				 const char *from)
{
	const struct i915_wa *wa;
	struct i915_request *rq;
	struct i915_vma *vma;
2193
	struct i915_gem_ww_ctx ww;
2194 2195 2196 2197 2198 2199 2200
	unsigned int i;
	u32 *results;
	int err;

	if (!wal->count)
		return 0;

2201 2202
	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
					   wal->count * sizeof(u32));
2203 2204 2205
	if (IS_ERR(vma))
		return PTR_ERR(vma);

2206
	intel_engine_pm_get(ce->engine);
2207 2208 2209 2210 2211 2212 2213 2214 2215
	i915_gem_ww_ctx_init(&ww, false);
retry:
	err = i915_gem_object_lock(vma->obj, &ww);
	if (err == 0)
		err = intel_context_pin_ww(ce, &ww);
	if (err)
		goto err_pm;

	rq = i915_request_create(ce);
2216 2217
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
2218
		goto err_unpin;
2219 2220
	}

2221 2222 2223
	err = i915_request_await_object(rq, vma->obj, true);
	if (err == 0)
		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2224 2225
	if (err == 0)
		err = wa_list_srm(rq, wal, vma);
2226

2227
	i915_request_get(rq);
2228 2229
	if (err)
		i915_request_set_error_once(rq, err);
2230
	i915_request_add(rq);
2231 2232 2233 2234

	if (err)
		goto err_rq;

2235
	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2236
		err = -ETIME;
2237
		goto err_rq;
2238 2239 2240 2241 2242
	}

	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
	if (IS_ERR(results)) {
		err = PTR_ERR(results);
2243
		goto err_rq;
2244 2245 2246
	}

	err = 0;
2247
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2248
		if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2249 2250
			continue;

2251 2252
		if (!wa_verify(wa, results[i], wal->name, from))
			err = -ENXIO;
2253
	}
2254 2255 2256

	i915_gem_object_unpin_map(vma->obj);

2257 2258
err_rq:
	i915_request_put(rq);
2259 2260 2261 2262 2263 2264 2265 2266 2267 2268
err_unpin:
	intel_context_unpin(ce);
err_pm:
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
	intel_engine_pm_put(ce->engine);
2269 2270 2271 2272 2273 2274 2275 2276
	i915_vma_unpin(vma);
	i915_vma_put(vma);
	return err;
}

int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
				    const char *from)
{
2277 2278 2279
	return engine_wa_list_verify(engine->kernel_context,
				     &engine->wa_list,
				     from);
2280 2281
}

2282
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2283
#include "selftest_workarounds.c"
2284
#endif