intel_workarounds.c 48.8 KB
Newer Older
1 2 3 4 5 6 7
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2014-2018 Intel Corporation
 */

#include "i915_drv.h"
8
#include "intel_context.h"
9
#include "intel_engine_pm.h"
10
#include "intel_gt.h"
11
#include "intel_ring.h"
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
#include "intel_workarounds.h"

/**
 * DOC: Hardware workarounds
 *
 * This file is intended as a central place to implement most [1]_ of the
 * required workarounds for hardware to work as originally intended. They fall
 * in five basic categories depending on how/when they are applied:
 *
 * - Workarounds that touch registers that are saved/restored to/from the HW
 *   context image. The list is emitted (via Load Register Immediate commands)
 *   everytime a new context is created.
 * - GT workarounds. The list of these WAs is applied whenever these registers
 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
 * - Display workarounds. The list is applied during display clock-gating
 *   initialization.
 * - Workarounds that whitelist a privileged register, so that UMDs can manage
 *   them directly. This is just a special case of a MMMIO workaround (as we
 *   write the list of these to/be-whitelisted registers to some special HW
 *   registers).
 * - Workaround batchbuffers, that get executed automatically by the hardware
 *   on every HW context restore.
 *
 * .. [1] Please notice that there are other WAs that, due to their nature,
 *    cannot be applied from a central place. Those are peppered around the rest
 *    of the code, as needed.
 *
 * .. [2] Technically, some registers are powercontext saved & restored, so they
 *    survive a suspend/resume. In practice, writing them again is not too
 *    costly and simplifies things. We can revisit this in the future.
 *
 * Layout
44
 * ~~~~~~
45 46 47 48 49 50 51 52 53 54
 *
 * Keep things in this file ordered by WA type, as per the above (context, GT,
 * display, register whitelist, batchbuffer). Then, inside each type, keep the
 * following order:
 *
 * - Infrastructure functions and macros
 * - WAs per platform in standard gen/chrono order
 * - Public functions to init or apply the given workaround type.
 */

55
static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
56 57
{
	wal->name = name;
58
	wal->engine_name = engine_name;
59 60
}

61 62
#define WA_LIST_CHUNK (1 << 4)

63 64
static void wa_init_finish(struct i915_wa_list *wal)
{
65 66 67 68 69 70 71 72 73 74 75 76
	/* Trim unused entries. */
	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
		struct i915_wa *list = kmemdup(wal->list,
					       wal->count * sizeof(*list),
					       GFP_KERNEL);

		if (list) {
			kfree(wal->list);
			wal->list = list;
		}
	}

77 78 79
	if (!wal->count)
		return;

80 81
	DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
			 wal->wa_count, wal->name, wal->engine_name);
82 83
}

84
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
85
{
86 87
	unsigned int addr = i915_mmio_reg_offset(wa->reg);
	unsigned int start = 0, end = wal->count;
88
	const unsigned int grow = WA_LIST_CHUNK;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
	struct i915_wa *wa_;

	GEM_BUG_ON(!is_power_of_2(grow));

	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
		struct i915_wa *list;

		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
				     GFP_KERNEL);
		if (!list) {
			DRM_ERROR("No space for workaround init!\n");
			return;
		}

		if (wal->list)
			memcpy(list, wal->list, sizeof(*wa) * wal->count);

		wal->list = list;
	}
108 109 110 111

	while (start < end) {
		unsigned int mid = start + (end - start) / 2;

112
		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
113
			start = mid + 1;
114
		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
115 116
			end = mid;
		} else {
117
			wa_ = &wal->list[mid];
118

119 120
			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
				DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
121
					  i915_mmio_reg_offset(wa_->reg),
122
					  wa_->clr, wa_->set);
123

124
				wa_->set &= ~wa->clr;
125 126
			}

127
			wal->wa_count++;
128 129
			wa_->set |= wa->set;
			wa_->clr |= wa->clr;
130
			wa_->read |= wa->read;
131 132 133
			return;
		}
	}
134

135 136 137
	wal->wa_count++;
	wa_ = &wal->list[wal->count++];
	*wa_ = *wa;
138

139 140 141 142 143
	while (wa_-- > wal->list) {
		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
			   i915_mmio_reg_offset(wa_[1].reg));
		if (i915_mmio_reg_offset(wa_[1].reg) >
		    i915_mmio_reg_offset(wa_[0].reg))
144
			break;
145

146
		swap(wa_[1], wa_[0]);
147
	}
148 149
}

150 151
static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
		   u32 clear, u32 set, u32 read_mask)
152 153
{
	struct i915_wa wa = {
154
		.reg  = reg,
155 156
		.clr  = clear,
		.set  = set,
157
		.read = read_mask,
158 159 160 161 162
	};

	_wa_add(wal, &wa);
}

163
static void
164
wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
165
{
166
	wa_add(wal, reg, clear, set, clear);
167 168
}

169
static void
170 171 172 173 174 175 176
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
	wa_write_masked_or(wal, reg, ~0, set);
}

static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
177
{
178
	wa_write_masked_or(wal, reg, set, set);
179 180 181
}

static void
182
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
183
{
184
	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
185 186 187
}

static void
188
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
189
{
190
	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
191 192
}

193
#define WA_SET_BIT_MASKED(addr, mask) \
194
	wa_masked_en(wal, (addr), (mask))
195 196

#define WA_CLR_BIT_MASKED(addr, mask) \
197
	wa_masked_dis(wal, (addr), (mask))
198 199

#define WA_SET_FIELD_MASKED(addr, mask, value) \
200
	wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
201

202 203 204 205 206 207 208 209 210 211 212 213
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
}

static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
{
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
}

214 215
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
{
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);

	/* WaDisableAsyncFlipPerfMode:bdw,chv */
	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);

	/* WaDisablePartialInstShootdown:bdw,chv */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
	 * workaround for for a possible hang in the unlikely event a TLB
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:bdw,chv */
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
			  HDC_FORCE_NON_COHERENT);

	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
	 *  polygons in the same 8x4 pixel/sample area to be processed without
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
	 *  buffer."
	 *
	 * This optimization is off by default for BDW and CHV; turn it on.
	 */
	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);

	/* Wa4x4STCOptimizationDisable:bdw,chv */
	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
}

262 263
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
264
{
265
	struct drm_i915_private *i915 = engine->i915;
266

267
	gen8_ctx_workarounds_init(engine, wal);
268 269 270 271 272 273

	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);

	/* WaDisableDopClockGating:bdw
	 *
274
	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
275 276 277 278 279 280 281 282 283 284 285 286
	 * to disable EUTC clock gating.
	 */
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
			  DOP_CLOCK_GATING_DISABLE);

	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
			  GEN8_SAMPLER_POWER_BYPASS_DIS);

	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  /* WaForceContextSaveRestoreNonCoherent:bdw */
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
287
			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
288 289
}

290 291
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
292
{
293
	gen8_ctx_workarounds_init(engine, wal);
294 295 296 297 298 299 300 301

	/* WaDisableThreadStallDopClockGating:chv */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);

	/* Improve HiZ throughput on CHV. */
	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
}

302 303
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
304
{
305 306 307
	struct drm_i915_private *i915 = engine->i915;

	if (HAS_LLC(i915)) {
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
	}

	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  FLOW_CONTROL_ENABLE |
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);

	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
			  GEN9_ENABLE_YV12_BUGFIX |
			  GEN9_ENABLE_GPGPU_PREEMPTION);

	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(CACHE_MODE_1,
			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);

	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
			  GEN9_CCS_TLB_PREFETCH_ENABLE);

	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);

	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
	 * both tied to WaForceContextSaveRestoreNonCoherent
	 * in some hsds for skl. We keep the tie for all gen9. The
	 * documentation is a bit hazy and so we want to get common behaviour,
	 * even though there is no clear evidence we would need both on kbl/bxt.
	 * This area has been source of system hangs so we play it safe
	 * and mimic the skl regardless of what bspec says.
	 *
	 * Use Force Non-Coherent whenever executing a 3D context. This
	 * is a workaround for a possible hang in the unlikely event
	 * a TLB invalidation occurs during a PSD flush.
	 */

	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_NON_COHERENT);

	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
364
	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
				  GEN8_SAMPLER_POWER_BYPASS_DIS);

	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);

	/*
	 * Supporting preemption with fine-granularity requires changes in the
	 * batch buffer programming. Since we can't break old userspace, we
	 * need to set our default preemption level to safe value. Userspace is
	 * still able to use more fine-grained preemption levels, since in
	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
	 * not real HW workarounds, but merely a way to start using preemption
	 * while maintaining old contract with userspace.
	 */

	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);

	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

390
	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
391
	if (IS_GEN9_LP(i915))
392
		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
393 394
}

395 396
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
				struct i915_wa_list *wal)
397
{
398
	struct drm_i915_private *i915 = engine->i915;
399 400 401 402 403 404 405 406 407 408
	u8 vals[3] = { 0, 0, 0 };
	unsigned int i;

	for (i = 0; i < 3; i++) {
		u8 ss;

		/*
		 * Only consider slices where one, and only one, subslice has 7
		 * EUs
		 */
409
		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
410 411 412 413 414 415 416 417
			continue;

		/*
		 * subslice_7eu[i] != 0 (because of the check above) and
		 * ss_max == 4 (maximum number of subslices possible per slice)
		 *
		 * ->    0 <= ss <= 3;
		 */
418
		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
419 420 421 422
		vals[i] = 3 - ss;
	}

	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
423
		return;
424 425 426 427 428 429 430 431 432 433 434

	/* Tune IZ hashing. See intel_device_info_runtime_init() */
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
			    GEN9_IZ_HASHING_MASK(2) |
			    GEN9_IZ_HASHING_MASK(1) |
			    GEN9_IZ_HASHING_MASK(0),
			    GEN9_IZ_HASHING(2, vals[2]) |
			    GEN9_IZ_HASHING(1, vals[1]) |
			    GEN9_IZ_HASHING(0, vals[0]));
}

435 436
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
437
{
438 439
	gen9_ctx_workarounds_init(engine, wal);
	skl_tune_iz_hashing(engine, wal);
440
}
441

442 443
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
444
{
445
	gen9_ctx_workarounds_init(engine, wal);
446

447 448 449 450 451 452 453
	/* WaDisableThreadStallDopClockGating:bxt */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  STALL_DOP_GATING_DISABLE);

	/* WaToEnableHwFixForPushConstHWBug:bxt */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
454 455
}

456 457
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
458
{
459
	struct drm_i915_private *i915 = engine->i915;
460

461
	gen9_ctx_workarounds_init(engine, wal);
462

463
	/* WaToEnableHwFixForPushConstHWBug:kbl */
464
	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
465 466
		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
467

468 469 470 471 472
	/* WaDisableSbeCacheDispatchPortSharing:kbl */
	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

473 474
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
475
{
476
	gen9_ctx_workarounds_init(engine, wal);
477 478

	/* WaToEnableHwFixForPushConstHWBug:glk */
479 480 481 482
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
}

483 484
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
485
{
486
	gen9_ctx_workarounds_init(engine, wal);
487 488 489 490

	/* WaToEnableHwFixForPushConstHWBug:cfl */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
491

492 493 494 495 496
	/* WaDisableSbeCacheDispatchPortSharing:cfl */
	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

497 498
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
499
{
500 501 502 503 504 505 506 507 508 509 510
	/* WaForceContextSaveRestoreNonCoherent:cnl */
	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);

	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);

	/* WaPushConstantDereferenceHoldDisable:cnl */
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);

511
	/* FtrEnableFastAnisoL1BankingFix:cnl */
512 513 514 515 516 517 518 519 520 521 522 523 524 525
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);

	/* WaDisable3DMidCmdPreemption:cnl */
	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);

	/* WaDisableGPGPUMidCmdPreemption:cnl */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

	/* WaDisableEarlyEOT:cnl */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
}

526 527
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
528
{
529 530
	struct drm_i915_private *i915 = engine->i915;

531 532 533 534 535 536
	/* WaDisableBankHangMode:icl */
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);

537 538 539
	/* Wa_1604370585:icl (pre-prod)
	 * Formerly known as WaPushConstantDereferenceHoldDisable
	 */
540
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
541 542 543 544 545 546 547 548 549 550 551 552
		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
				  PUSH_CONSTANT_DEREF_DISABLE);

	/* WaForceEnableNonCoherent:icl
	 * This is not the same workaround as in early Gen9 platforms, where
	 * lacking this could cause system hangs, but coherency performance
	 * overhead is high and only a few compute workloads really need it
	 * (the register is whitelisted in hardware now, so UMDs can opt in
	 * for coherency if they have a good reason).
	 */
	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);

553 554 555
	/* Wa_2006611047:icl (pre-prod)
	 * Formerly known as WaDisableImprovedTdlClkGating
	 */
556
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
557 558 559
		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);

O
Oscar Mateo 已提交
560
	/* Wa_2006665173:icl (pre-prod) */
561
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
O
Oscar Mateo 已提交
562 563
		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
564 565 566 567 568 569

	/* WaEnableFloatBlendOptimization:icl */
	wa_write_masked_or(wal,
			   GEN10_CACHE_MODE_SS,
			   0, /* write-only, so skip validation */
			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
570 571 572 573 574

	/* WaDisableGPGPUMidThreadPreemption:icl */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
575 576 577 578

	/* allow headerless messages for preemptible GPGPU context */
	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
M
Matt Roper 已提交
579 580 581 582 583 584

	/* Wa_1604278689:icl,ehl */
	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
	wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER,
			   0, /* write-only register; skip validation */
			   0xFFFFFFFF);
M
Matt Roper 已提交
585 586 587

	/* Wa_1406306137:icl,ehl */
	wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
588 589
}

590 591 592
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
{
593 594 595 596 597 598 599 600 601
	/*
	 * Wa_1409142259:tgl
	 * Wa_1409347922:tgl
	 * Wa_1409252684:tgl
	 * Wa_1409217633:tgl
	 * Wa_1409207793:tgl
	 * Wa_1409178076:tgl
	 * Wa_1408979724:tgl
	 */
602 603
	WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
			  GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
604 605

	/*
606 607 608 609 610
	 * Wa_1604555607:gen12 and Wa_1608008084:gen12
	 * FF_MODE2 register will return the wrong value when read. The default
	 * value for this register is zero for all fields and there are no bit
	 * masks. So instead of doing a RMW we should just write the TDS timer
	 * value for Wa_1604555607.
611
	 */
612 613
	wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
	       FF_MODE2_TDS_TIMER_128, 0);
614 615 616 617 618

	/* WaDisableGPGPUMidThreadPreemption:tgl */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
619 620
}

621 622 623 624
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
			   struct i915_wa_list *wal,
			   const char *name)
625
{
626 627
	struct drm_i915_private *i915 = engine->i915;

628 629 630
	if (engine->class != RENDER_CLASS)
		return;

631
	wa_init_start(wal, name, engine->name);
632

633 634 635
	if (IS_GEN(i915, 12))
		tgl_ctx_workarounds_init(engine, wal);
	else if (IS_GEN(i915, 11))
636
		icl_ctx_workarounds_init(engine, wal);
637
	else if (IS_CANNONLAKE(i915))
638
		cnl_ctx_workarounds_init(engine, wal);
639
	else if (IS_COFFEELAKE(i915))
640
		cfl_ctx_workarounds_init(engine, wal);
641
	else if (IS_GEMINILAKE(i915))
642
		glk_ctx_workarounds_init(engine, wal);
643
	else if (IS_KABYLAKE(i915))
644
		kbl_ctx_workarounds_init(engine, wal);
645
	else if (IS_BROXTON(i915))
646
		bxt_ctx_workarounds_init(engine, wal);
647
	else if (IS_SKYLAKE(i915))
648
		skl_ctx_workarounds_init(engine, wal);
649
	else if (IS_CHERRYVIEW(i915))
650
		chv_ctx_workarounds_init(engine, wal);
651
	else if (IS_BROADWELL(i915))
652
		bdw_ctx_workarounds_init(engine, wal);
653 654 655 656
	else if (IS_GEN(i915, 7))
		gen7_ctx_workarounds_init(engine, wal);
	else if (IS_GEN(i915, 6))
		gen6_ctx_workarounds_init(engine, wal);
657 658
	else if (INTEL_GEN(i915) < 8)
		return;
659
	else
660
		MISSING_CASE(INTEL_GEN(i915));
661

662
	wa_init_finish(wal);
663 664
}

665 666 667 668 669
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
{
	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
}

670
int intel_engine_emit_ctx_wa(struct i915_request *rq)
671
{
672 673 674
	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
	struct i915_wa *wa;
	unsigned int i;
675
	u32 *cs;
676
	int ret;
677

678
	if (wal->count == 0)
679 680 681
		return 0;

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
682 683 684
	if (ret)
		return ret;

685
	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
686 687 688
	if (IS_ERR(cs))
		return PTR_ERR(cs);

689 690 691
	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		*cs++ = i915_mmio_reg_offset(wa->reg);
692
		*cs++ = wa->set;
693 694 695 696 697 698 699 700 701 702 703 704
	}
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
	if (ret)
		return ret;

	return 0;
}

705 706
static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
707
{
708
	/* WaDisableKillLogic:bxt,skl,kbl */
709 710 711 712
	if (!IS_COFFEELAKE(i915))
		wa_write_or(wal,
			    GAM_ECOCHK,
			    ECOCHK_DIS_TLB);
713

714
	if (HAS_LLC(i915)) {
715 716 717 718 719
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
720 721 722
		wa_write_or(wal,
			    MMCD_MISC_CTRL,
			    MMCD_PCLA | MMCD_HOTSPOT_EN);
723 724 725
	}

	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
726 727 728
	wa_write_or(wal,
		    GAM_ECOCHK,
		    BDW_DISABLE_HDC_INVALIDATION);
729 730
}

731 732
static void
skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
733
{
734
	gen9_gt_workarounds_init(i915, wal);
735 736

	/* WaDisableGafsUnitClkGating:skl */
737 738 739
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
740 741

	/* WaInPlaceDecompressionHang:skl */
742 743 744 745
	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
		wa_write_or(wal,
			    GEN9_GAMT_ECO_REG_RW_IA,
			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
746 747
}

748 749
static void
bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
750
{
751
	gen9_gt_workarounds_init(i915, wal);
752 753

	/* WaInPlaceDecompressionHang:bxt */
754 755 756
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
757 758
}

759 760
static void
kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
761
{
762
	gen9_gt_workarounds_init(i915, wal);
763

764
	/* WaDisableDynamicCreditSharing:kbl */
765 766 767 768
	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
769

770
	/* WaDisableGafsUnitClkGating:kbl */
771 772 773
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
774

775
	/* WaInPlaceDecompressionHang:kbl */
776 777 778
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
779
}
780

781 782
static void
glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
783
{
784
	gen9_gt_workarounds_init(i915, wal);
785 786
}

787 788
static void
cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
789
{
790
	gen9_gt_workarounds_init(i915, wal);
791 792

	/* WaDisableGafsUnitClkGating:cfl */
793 794 795
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
796

797
	/* WaInPlaceDecompressionHang:cfl */
798 799 800
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
801
}
802

803
static void
804
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
805
{
806
	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
807 808 809 810
	unsigned int slice, subslice;
	u32 l3_en, mcr, mcr_mask;

	GEM_BUG_ON(INTEL_GEN(i915) < 10);
811

812 813 814 815 816 817
	/*
	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
	 * L3Banks could be fused off in single slice scenario. If that is
	 * the case, we might need to program MCR select to a valid L3Bank
	 * by default, to make sure we correctly read certain registers
	 * later on (in the range 0xB100 - 0xB3FF).
818
	 *
819
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
820 821 822 823 824 825 826 827
	 * Before any MMIO read into slice/subslice specific registers, MCR
	 * packet control register needs to be programmed to point to any
	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
	 * This means each subsequent MMIO read will be forwarded to an
	 * specific s/ss combination, but this is OK since these registers
	 * are consistent across s/ss in almost all cases. In the rare
	 * occasions, such as INSTDONE, where this value is dependent
	 * on s/ss combo, the read should be done with read_subslice_reg.
828 829 830 831 832 833 834 835 836 837
	 *
	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
	 * to which subslice, or to which L3 bank, the respective mmio reads
	 * will go, we have to find a common index which works for both
	 * accesses.
	 *
	 * Case where we cannot find a common index fortunately should not
	 * happen in production hardware, so we only emit a warning instead of
	 * implementing something more complex that requires checking the range
	 * of every MMIO read.
838
	 */
839 840 841 842 843 844

	if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
		u32 l3_fuse =
			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
			GEN10_L3BANK_MASK;

845
		drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
846 847 848 849 850 851
		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
	} else {
		l3_en = ~0;
	}

	slice = fls(sseu->slice_mask) - 1;
S
Stuart Summers 已提交
852
	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
853
	if (!subslice) {
854 855
		drm_warn(&i915->drm,
			 "No common index found between subslice mask %x and L3 bank mask %x!\n",
S
Stuart Summers 已提交
856
			 intel_sseu_get_subslices(sseu, slice), l3_en);
857
		subslice = fls(l3_en);
858
		drm_WARN_ON(&i915->drm, !subslice);
859 860 861 862 863 864 865 866 867 868 869
	}
	subslice--;

	if (INTEL_GEN(i915) >= 11) {
		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
	} else {
		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
	}

870
	drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
871 872

	wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
873 874
}

875 876
static void
cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
877
{
878
	wa_init_mcr(i915, wal);
879

880
	/* WaInPlaceDecompressionHang:cnl */
881 882 883
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
884 885
}

886 887
static void
icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
888
{
889
	wa_init_mcr(i915, wal);
890

891
	/* WaInPlaceDecompressionHang:icl */
892 893 894
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
895

896
	/* WaModifyGamTlbPartitioning:icl */
897 898 899 900
	wa_write_masked_or(wal,
			   GEN11_GACB_PERF_CTRL,
			   GEN11_HASH_CTRL_MASK,
			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
O
Oscar Mateo 已提交
901

O
Oscar Mateo 已提交
902 903 904
	/* Wa_1405766107:icl
	 * Formerly known as WaCL2SFHalfMaxAlloc
	 */
905 906 907 908
	wa_write_or(wal,
		    GEN11_LSN_UNSLCVC,
		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
O
Oscar Mateo 已提交
909 910 911 912

	/* Wa_220166154:icl
	 * Formerly known as WaDisCtxReload
	 */
913 914 915
	wa_write_or(wal,
		    GEN8_GAMW_ECO_DEV_RW_IA,
		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
O
Oscar Mateo 已提交
916 917

	/* Wa_1405779004:icl (pre-prod) */
918 919 920 921
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    MSCUNIT_CLKGATE_DIS);
O
Oscar Mateo 已提交
922

O
Oscar Mateo 已提交
923
	/* Wa_1406838659:icl (pre-prod) */
924 925 926 927
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
		wa_write_or(wal,
			    INF_UNIT_LEVEL_CLKGATE,
			    CGPSF_CLKGATE_DIS);
928

O
Oscar Mateo 已提交
929 930 931
	/* Wa_1406463099:icl
	 * Formerly known as WaGamTlbPendError
	 */
932 933 934
	wa_write_or(wal,
		    GAMT_CHKN_BIT_REG,
		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
M
Mika Kuoppala 已提交
935

936 937 938 939 940 941 942
	/* Wa_1607087056:icl,ehl,jsl */
	if (IS_ICELAKE(i915) ||
	    IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
	}
943 944
}

945 946 947
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
948 949
	wa_init_mcr(i915, wal);

M
Mika Kuoppala 已提交
950 951 952 953 954
	/* Wa_1409420604:tgl */
	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
		wa_write_or(wal,
			    SUBSLICE_UNIT_LEVEL_CLKGATE2,
			    CPSSUNIT_CLKGATE_DIS);
M
Mika Kuoppala 已提交
955

956
	/* Wa_1607087056:tgl also know as BUG:1409180338 */
M
Mika Kuoppala 已提交
957 958 959 960
	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
961 962
}

963 964
static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
965
{
966 967 968
	if (IS_GEN(i915, 12))
		tgl_gt_workarounds_init(i915, wal);
	else if (IS_GEN(i915, 11))
969
		icl_gt_workarounds_init(i915, wal);
970
	else if (IS_CANNONLAKE(i915))
971
		cnl_gt_workarounds_init(i915, wal);
972 973 974 975 976 977 978 979 980 981 982 983
	else if (IS_COFFEELAKE(i915))
		cfl_gt_workarounds_init(i915, wal);
	else if (IS_GEMINILAKE(i915))
		glk_gt_workarounds_init(i915, wal);
	else if (IS_KABYLAKE(i915))
		kbl_gt_workarounds_init(i915, wal);
	else if (IS_BROXTON(i915))
		bxt_gt_workarounds_init(i915, wal);
	else if (IS_SKYLAKE(i915))
		skl_gt_workarounds_init(i915, wal);
	else if (INTEL_GEN(i915) <= 8)
		return;
984
	else
985
		MISSING_CASE(INTEL_GEN(i915));
986 987 988 989 990
}

void intel_gt_init_workarounds(struct drm_i915_private *i915)
{
	struct i915_wa_list *wal = &i915->gt_wa_list;
991

992
	wa_init_start(wal, "GT", "global");
993
	gt_init_workarounds(i915, wal);
994 995 996 997
	wa_init_finish(wal);
}

static enum forcewake_domains
998
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
999 1000 1001 1002 1003 1004
{
	enum forcewake_domains fw = 0;
	struct i915_wa *wa;
	unsigned int i;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1005
		fw |= intel_uncore_forcewake_for_reg(uncore,
1006 1007 1008 1009 1010 1011 1012
						     wa->reg,
						     FW_REG_READ |
						     FW_REG_WRITE);

	return fw;
}

1013 1014 1015
static bool
wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
{
1016 1017
	if ((cur ^ wa->set) & wa->read) {
		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n",
1018
			  name, from, i915_mmio_reg_offset(wa->reg),
1019
			  cur, cur & wa->read, wa->set);
1020 1021 1022 1023 1024 1025 1026

		return false;
	}

	return true;
}

1027
static void
1028
wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1029 1030 1031 1032 1033 1034 1035 1036 1037
{
	enum forcewake_domains fw;
	unsigned long flags;
	struct i915_wa *wa;
	unsigned int i;

	if (!wal->count)
		return;

1038
	fw = wal_get_fw_for_rmw(uncore, wal);
1039

1040 1041
	spin_lock_irqsave(&uncore->lock, flags);
	intel_uncore_forcewake_get__locked(uncore, fw);
1042 1043

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1044 1045 1046 1047
		if (wa->clr)
			intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
		else
			intel_uncore_write_fw(uncore, wa->reg, wa->set);
1048 1049 1050 1051
		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
			wa_verify(wa,
				  intel_uncore_read_fw(uncore, wa->reg),
				  wal->name, "application");
1052 1053
	}

1054 1055
	intel_uncore_forcewake_put__locked(uncore, fw);
	spin_unlock_irqrestore(&uncore->lock, flags);
1056 1057
}

1058
void intel_gt_apply_workarounds(struct intel_gt *gt)
1059
{
1060
	wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
1061 1062
}

1063
static bool wa_list_verify(struct intel_uncore *uncore,
1064 1065 1066 1067 1068 1069 1070 1071
			   const struct i915_wa_list *wal,
			   const char *from)
{
	struct i915_wa *wa;
	unsigned int i;
	bool ok = true;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1072 1073 1074
		ok &= wa_verify(wa,
				intel_uncore_read(uncore, wa->reg),
				wal->name, from);
1075 1076 1077 1078

	return ok;
}

1079
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1080
{
1081
	return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1082 1083
}

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
static inline bool is_nonpriv_flags_valid(u32 flags)
{
	/* Check only valid flag bits are set */
	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
		return false;

	/* NB: Only 3 out of 4 enum values are valid for access field */
	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
		return false;

	return true;
}

1098
static void
1099
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1100
{
1101 1102 1103
	struct i915_wa wa = {
		.reg = reg
	};
1104

1105 1106
	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;
1107

1108 1109 1110
	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
		return;

1111
	wa.reg.reg |= flags;
1112
	_wa_add(wal, &wa);
1113 1114
}

1115 1116 1117
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
1118
	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1119 1120
}

1121
static void gen9_whitelist_build(struct i915_wa_list *w)
1122 1123
{
	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1124
	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1125 1126

	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1127
	whitelist_reg(w, GEN8_CS_CHICKEN1);
1128 1129

	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1130
	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1131 1132 1133

	/* WaSendPushConstantsFromMMIO:skl,bxt */
	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1134 1135
}

1136
static void skl_whitelist_build(struct intel_engine_cs *engine)
1137
{
1138 1139 1140 1141 1142
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1143
	gen9_whitelist_build(w);
1144 1145

	/* WaDisableLSQCROPERFforOCL:skl */
1146
	whitelist_reg(w, GEN8_L3SQCREG4);
1147 1148
}

1149
static void bxt_whitelist_build(struct intel_engine_cs *engine)
1150
{
1151 1152 1153 1154
	if (engine->class != RENDER_CLASS)
		return;

	gen9_whitelist_build(&engine->whitelist);
1155 1156
}

1157
static void kbl_whitelist_build(struct intel_engine_cs *engine)
1158
{
1159 1160 1161 1162 1163
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1164
	gen9_whitelist_build(w);
1165

1166
	/* WaDisableLSQCROPERFforOCL:kbl */
1167
	whitelist_reg(w, GEN8_L3SQCREG4);
1168 1169
}

1170
static void glk_whitelist_build(struct intel_engine_cs *engine)
1171
{
1172 1173 1174 1175 1176
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1177
	gen9_whitelist_build(w);
1178

1179
	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1180
	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1181
}
1182

1183
static void cfl_whitelist_build(struct intel_engine_cs *engine)
1184
{
1185 1186
	struct i915_wa_list *w = &engine->whitelist;

1187 1188 1189
	if (engine->class != RENDER_CLASS)
		return;

1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
	gen9_whitelist_build(w);

	/*
	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
	 *
	 * This covers 4 register which are next to one another :
	 *   - PS_INVOCATION_COUNT
	 *   - PS_INVOCATION_COUNT_UDW
	 *   - PS_DEPTH_COUNT
	 *   - PS_DEPTH_COUNT_UDW
	 */
	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1202
			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1203
			  RING_FORCE_TO_NONPRIV_RANGE_4);
1204 1205
}

1206
static void cnl_whitelist_build(struct intel_engine_cs *engine)
1207
{
1208 1209 1210 1211 1212
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1213
	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1214 1215 1216
	whitelist_reg(w, GEN8_CS_CHICKEN1);
}

1217
static void icl_whitelist_build(struct intel_engine_cs *engine)
1218
{
1219 1220
	struct i915_wa_list *w = &engine->whitelist;

1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
	switch (engine->class) {
	case RENDER_CLASS:
		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);

		/* WaAllowUMDToModifySamplerMode:icl */
		whitelist_reg(w, GEN10_SAMPLER_MODE);

		/* WaEnableStateCacheRedirectToCS:icl */
		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241

		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
		 *
		 * This covers 4 register which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1242
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1243
				  RING_FORCE_TO_NONPRIV_RANGE_4);
1244 1245 1246 1247 1248
		break;

	case VIDEO_DECODE_CLASS:
		/* hucStatusRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1249
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1250 1251
		/* hucUKernelHdrInfoRegOffset */
		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1252
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1253 1254
		/* hucStatus2RegOffset */
		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1255
				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1256 1257 1258 1259 1260
		break;

	default:
		break;
	}
1261 1262
}

1263 1264
static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
1265 1266 1267 1268 1269 1270
	struct i915_wa_list *w = &engine->whitelist;

	switch (engine->class) {
	case RENDER_CLASS:
		/*
		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1271
		 * Wa_1408556865:tgl
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
		 *
		 * This covers 4 registers which are next to one another :
		 *   - PS_INVOCATION_COUNT
		 *   - PS_INVOCATION_COUNT_UDW
		 *   - PS_DEPTH_COUNT
		 *   - PS_DEPTH_COUNT_UDW
		 */
		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
				  RING_FORCE_TO_NONPRIV_RANGE_4);
1282 1283 1284

		/* Wa_1808121037:tgl */
		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1285 1286 1287

		/* Wa_1806527549:tgl */
		whitelist_reg(w, HIZ_CHICKEN);
1288 1289 1290 1291
		break;
	default:
		break;
	}
1292 1293
}

1294
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1295 1296
{
	struct drm_i915_private *i915 = engine->i915;
1297
	struct i915_wa_list *w = &engine->whitelist;
1298

1299
	wa_init_start(w, "whitelist", engine->name);
1300

1301 1302 1303
	if (IS_GEN(i915, 12))
		tgl_whitelist_build(engine);
	else if (IS_GEN(i915, 11))
1304
		icl_whitelist_build(engine);
1305
	else if (IS_CANNONLAKE(i915))
1306
		cnl_whitelist_build(engine);
1307
	else if (IS_COFFEELAKE(i915))
1308
		cfl_whitelist_build(engine);
1309
	else if (IS_GEMINILAKE(i915))
1310
		glk_whitelist_build(engine);
1311
	else if (IS_KABYLAKE(i915))
1312
		kbl_whitelist_build(engine);
1313
	else if (IS_BROXTON(i915))
1314
		bxt_whitelist_build(engine);
1315
	else if (IS_SKYLAKE(i915))
1316
		skl_whitelist_build(engine);
1317 1318
	else if (INTEL_GEN(i915) <= 8)
		return;
1319 1320
	else
		MISSING_CASE(INTEL_GEN(i915));
1321

1322
	wa_init_finish(w);
1323 1324
}

1325
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1326
{
1327
	const struct i915_wa_list *wal = &engine->whitelist;
1328
	struct intel_uncore *uncore = engine->uncore;
1329
	const u32 base = engine->mmio_base;
1330
	struct i915_wa *wa;
1331 1332
	unsigned int i;

1333
	if (!wal->count)
1334
		return;
1335

1336
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1337 1338 1339
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(wa->reg));
1340

1341 1342
	/* And clear the rest just in case of garbage */
	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1343 1344 1345
		intel_uncore_write(uncore,
				   RING_FORCE_TO_NONPRIV(base, i),
				   i915_mmio_reg_offset(RING_NOPID(base)));
1346 1347
}

1348 1349
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1350 1351 1352
{
	struct drm_i915_private *i915 = engine->i915;

1353
	if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
1354 1355 1356 1357
		/*
		 * Wa_1607138336:tgl
		 * Wa_1607063988:tgl
		 */
M
Mika Kuoppala 已提交
1358 1359 1360
		wa_write_or(wal,
			    GEN9_CTX_PREEMPT_REG,
			    GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1361

1362 1363 1364 1365 1366 1367 1368
		/*
		 * Wa_1607030317:tgl
		 * Wa_1607186500:tgl
		 * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2
		 * of then says it is fixed on B0 the other one says it is
		 * permanent
		 */
1369 1370 1371 1372
		wa_masked_en(wal,
			     GEN6_RC_SLEEP_PSMI_CONTROL,
			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
R
Radhakrishna Sripada 已提交
1373 1374 1375 1376 1377 1378 1379 1380

		/*
		 * Wa_1606679103:tgl
		 * (see also Wa_1606682166:icl)
		 */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
1381 1382 1383 1384 1385

		/* Wa_1407928979:tgl */
		wa_write_or(wal,
			    GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1386

1387
		/* Wa_1408615072:tgl */
1388 1389
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    VSUNIT_CLKGATE_DIS_TGL);
1390 1391
	}

1392
	if (IS_TIGERLAKE(i915)) {
1393 1394 1395
		/* Wa_1606931601:tgl */
		wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);

1396 1397 1398
		/* Wa_1409804808:tgl */
		wa_masked_en(wal, GEN7_ROW_CHICKEN2,
			     GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1399 1400 1401 1402 1403

		/* Wa_1606700617:tgl */
		wa_masked_en(wal,
			     GEN9_CS_DEBUG_MODE1,
			     FF_DOP_CLOCK_GATE_DISABLE);
1404 1405 1406 1407 1408 1409

		/*
		 * Wa_1409085225:tgl
		 * Wa_14010229206:tgl
		 */
		wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1410 1411
	}

1412
	if (IS_GEN(i915, 11)) {
1413 1414 1415 1416 1417 1418
		/* This is not an Wa. Enable for better image quality */
		wa_masked_en(wal,
			     _3D_CHICKEN3,
			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);

		/* WaPipelineFlushCoherentLines:icl */
1419 1420 1421
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447

		/*
		 * Wa_1405543622:icl
		 * Formerly known as WaGAPZPriorityScheme
		 */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN11_ARBITRATION_PRIO_ORDER_MASK);

		/*
		 * Wa_1604223664:icl
		 * Formerly known as WaL3BankAddressHashing
		 */
		wa_write_masked_or(wal,
				   GEN8_GARBCNTL,
				   GEN11_HASH_CTRL_EXCL_MASK,
				   GEN11_HASH_CTRL_EXCL_BIT0);
		wa_write_masked_or(wal,
				   GEN11_GLBLINVL,
				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);

		/*
		 * Wa_1405733216:icl
		 * Formerly known as WaDisableCleanEvicts
		 */
1448 1449 1450
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462

		/* WaForwardProgressSoftReset:icl */
		wa_write_or(wal,
			    GEN10_SCRATCH_LNCF2,
			    PMFLUSHDONE_LNICRSDROP |
			    PMFLUSH_GAPL3UNBLOCK |
			    PMFLUSHDONE_LNEBLK);

		/* Wa_1406609255:icl (pre-prod) */
		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
			wa_write_or(wal,
				    GEN7_SARCHKMD,
1463 1464 1465 1466 1467 1468
				    GEN7_DISABLE_DEMAND_PREFETCH);

		/* Wa_1606682166:icl */
		wa_write_or(wal,
			    GEN7_SARCHKMD,
			    GEN7_DISABLE_SAMPLER_PREFETCH);
T
Tvrtko Ursulin 已提交
1469 1470 1471 1472 1473 1474

		/* Wa_1409178092:icl */
		wa_write_masked_or(wal,
				   GEN11_SCRATCH2,
				   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
				   0);
1475 1476 1477 1478 1479 1480 1481 1482 1483

		/* WaEnable32PlaneMode:icl */
		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
			     GEN11_ENABLE_32_PLANE_MODE);

		/*
		 * Wa_1408615072:icl,ehl  (vsunit)
		 * Wa_1407596294:icl,ehl  (hsunit)
		 */
1484 1485
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
			    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1486 1487

		/* Wa_1407352427:icl,ehl */
1488 1489
		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
			    PSDUNIT_CLKGATE_DIS);
1490 1491 1492 1493 1494

		/* Wa_1406680159:icl,ehl */
		wa_write_or(wal,
			    SUBSLICE_UNIT_LEVEL_CLKGATE,
			    GWUNIT_CLKGATE_DIS);
1495 1496 1497 1498 1499 1500 1501 1502

		/*
		 * Wa_1408767742:icl[a2..forever],ehl[all]
		 * Wa_1605460711:icl[a0..c0]
		 */
		wa_write_or(wal,
			    GEN7_FF_THREAD_MODE,
			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
M
Matt Atwood 已提交
1503 1504 1505 1506 1507 1508

		/* Wa_22010271021:ehl */
		if (IS_ELKHARTLAKE(i915))
			wa_masked_en(wal,
				     GEN9_CS_DEBUG_MODE1,
				     FF_DOP_CLOCK_GATE_DISABLE);
1509 1510
	}

1511 1512
	if (IS_GEN_RANGE(i915, 9, 12)) {
		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
		wa_masked_en(wal,
			     GEN7_FF_SLICE_CS_CHICKEN1,
			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
	}

	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
		wa_write_or(wal,
			    GEN8_GARBCNTL,
			    GEN9_GAPS_TSV_CREDIT_DISABLE);
	}

	if (IS_BROXTON(i915)) {
		/* WaDisablePooledEuLoadBalancingFix:bxt */
		wa_masked_en(wal,
			     FF_SLICE_CS_CHICKEN2,
			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
	}

1532
	if (IS_GEN(i915, 9)) {
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
		wa_masked_en(wal,
			     GEN9_CSFE_CHICKEN1_RCS,
			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);

		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
		wa_write_or(wal,
			    BDW_SCRATCH1,
			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);

		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
		if (IS_GEN9_LP(i915))
			wa_write_masked_or(wal,
					   GEN8_L3SQCREG1,
					   L3_PRIO_CREDITS_MASK,
					   L3_GENERAL_PRIO_CREDITS(62) |
					   L3_HIGH_PRIO_CREDITS(2));

		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
		wa_write_or(wal,
			    GEN8_L3SQCREG4,
			    GEN8_LQSC_FLUSH_COHERENT_LINES);
	}
1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601

	if (IS_GEN(i915, 7))
		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
		wa_masked_en(wal,
			     GFX_MODE_GEN7,
			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);

	if (IS_GEN_RANGE(i915, 6, 7))
		/*
		 * We need to disable the AsyncFlip performance optimisations in
		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
		 * already be programmed to '1' on all products.
		 *
		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
		 */
		wa_masked_en(wal,
			     MI_MODE,
			     ASYNC_FLIP_PERF_DISABLE);

	if (IS_GEN(i915, 6)) {
		/*
		 * Required for the hardware to program scanline values for
		 * waiting
		 * WaEnableFlushTlbInvalidationMode:snb
		 */
		wa_masked_en(wal,
			     GFX_MODE,
			     GFX_TLB_INVALIDATE_EXPLICIT);

		/*
		 * From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset. LRA replacement
		 *  policy is not supported."
		 */
		wa_masked_dis(wal,
			      CACHE_MODE_0,
			      CM0_STC_EVICT_DISABLE_LRA_SNB);
	}

	if (IS_GEN_RANGE(i915, 4, 6))
		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
		wa_add(wal, MI_MODE,
		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
		       /* XXX bit doesn't stick on Broadwater */
		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616

	if (IS_GEN(i915, 4))
		/*
		 * Disable CONSTANT_BUFFER before it is loaded from the context
		 * image. For as it is loaded, it is executed and the stored
		 * address may no longer be valid, leading to a GPU hang.
		 *
		 * This imposes the requirement that userspace reload their
		 * CONSTANT_BUFFER on every batch, fortunately a requirement
		 * they are already accustomed to from before contexts were
		 * enabled.
		 */
		wa_add(wal, ECOSKPD,
		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
		       0 /* XXX bit doesn't stick on Broadwater */);
1617 1618
}

1619 1620
static void
xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
{
	struct drm_i915_private *i915 = engine->i915;

	/* WaKBLVECSSemaphoreWaitPoll:kbl */
	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
		wa_write(wal,
			 RING_SEMA_WAIT_POLL(engine->mmio_base),
			 1);
	}
}

1632 1633 1634
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
1635
	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
1636 1637
		return;

1638
	if (engine->class == RENDER_CLASS)
1639 1640 1641 1642 1643
		rcs_engine_wa_init(engine, wal);
	else
		xcs_engine_wa_init(engine, wal);
}

1644 1645 1646 1647
void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
	struct i915_wa_list *wal = &engine->wa_list;

1648
	if (INTEL_GEN(engine->i915) < 4)
1649 1650
		return;

1651
	wa_init_start(wal, "engine", engine->name);
1652
	engine_init_workarounds(engine, wal);
1653 1654 1655 1656 1657
	wa_init_finish(wal);
}

void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
{
1658
	wa_list_apply(engine->uncore, &engine->wa_list);
1659 1660
}

1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693
static struct i915_vma *
create_scratch(struct i915_address_space *vm, int count)
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;
	unsigned int size;
	int err;

	size = round_up(count * sizeof(u32), PAGE_SIZE);
	obj = i915_gem_object_create_internal(vm->i915, size);
	if (IS_ERR(obj))
		return ERR_CAST(obj);

	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);

	vma = i915_vma_instance(obj, vm, NULL);
	if (IS_ERR(vma)) {
		err = PTR_ERR(vma);
		goto err_obj;
	}

	err = i915_vma_pin(vma, 0, 0,
			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
	if (err)
		goto err_obj;

	return vma;

err_obj:
	i915_gem_object_put(obj);
	return ERR_PTR(err);
}

M
Matt Roper 已提交
1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705
static const struct {
	u32 start;
	u32 end;
} mcr_ranges_gen8[] = {
	{ .start = 0x5500, .end = 0x55ff },
	{ .start = 0x7000, .end = 0x7fff },
	{ .start = 0x9400, .end = 0x97ff },
	{ .start = 0xb000, .end = 0xb3ff },
	{ .start = 0xe000, .end = 0xe7ff },
	{},
};

1706 1707
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
M
Matt Roper 已提交
1708 1709 1710 1711 1712
	int i;

	if (INTEL_GEN(i915) < 8)
		return false;

1713
	/*
M
Matt Roper 已提交
1714
	 * Registers in these ranges are affected by the MCR selector
1715 1716 1717
	 * which only controls CPU initiated MMIO. Routing does not
	 * work for CS access so we cannot verify them on this path.
	 */
M
Matt Roper 已提交
1718 1719 1720 1721
	for (i = 0; mcr_ranges_gen8[i].start; i++)
		if (offset >= mcr_ranges_gen8[i].start &&
		    offset <= mcr_ranges_gen8[i].end)
			return true;
1722 1723 1724 1725

	return false;
}

1726 1727 1728 1729 1730
static int
wa_list_srm(struct i915_request *rq,
	    const struct i915_wa_list *wal,
	    struct i915_vma *vma)
{
1731 1732
	struct drm_i915_private *i915 = rq->i915;
	unsigned int i, count = 0;
1733 1734 1735 1736
	const struct i915_wa *wa;
	u32 srm, *cs;

	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1737
	if (INTEL_GEN(i915) >= 8)
1738 1739
		srm++;

1740 1741 1742 1743 1744 1745
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
			count++;
	}

	cs = intel_ring_begin(rq, 4 * count);
1746 1747 1748 1749
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1750 1751 1752 1753 1754
		u32 offset = i915_mmio_reg_offset(wa->reg);

		if (mcr_range(i915, offset))
			continue;

1755
		*cs++ = srm;
1756
		*cs++ = offset;
1757 1758 1759 1760 1761 1762 1763 1764
		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
		*cs++ = 0;
	}
	intel_ring_advance(rq, cs);

	return 0;
}

1765
static int engine_wa_list_verify(struct intel_context *ce,
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778
				 const struct i915_wa_list * const wal,
				 const char *from)
{
	const struct i915_wa *wa;
	struct i915_request *rq;
	struct i915_vma *vma;
	unsigned int i;
	u32 *results;
	int err;

	if (!wal->count)
		return 0;

1779
	vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
1780 1781 1782
	if (IS_ERR(vma))
		return PTR_ERR(vma);

1783
	intel_engine_pm_get(ce->engine);
1784
	rq = intel_context_create_request(ce);
1785
	intel_engine_pm_put(ce->engine);
1786 1787 1788 1789 1790
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto err_vma;
	}

1791 1792 1793 1794 1795 1796 1797 1798 1799 1800
	i915_vma_lock(vma);
	err = i915_request_await_object(rq, vma->obj, true);
	if (err == 0)
		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
	i915_vma_unlock(vma);
	if (err) {
		i915_request_add(rq);
		goto err_vma;
	}

1801 1802 1803 1804
	err = wa_list_srm(rq, wal, vma);
	if (err)
		goto err_vma;

1805
	i915_request_get(rq);
1806
	i915_request_add(rq);
1807
	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1808
		err = -ETIME;
1809
		goto err_rq;
1810 1811 1812 1813 1814
	}

	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
	if (IS_ERR(results)) {
		err = PTR_ERR(results);
1815
		goto err_rq;
1816 1817 1818
	}

	err = 0;
1819 1820 1821 1822
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
			continue;

1823 1824
		if (!wa_verify(wa, results[i], wal->name, from))
			err = -ENXIO;
1825
	}
1826 1827 1828

	i915_gem_object_unpin_map(vma->obj);

1829 1830
err_rq:
	i915_request_put(rq);
1831 1832 1833 1834 1835 1836 1837 1838 1839
err_vma:
	i915_vma_unpin(vma);
	i915_vma_put(vma);
	return err;
}

int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
				    const char *from)
{
1840 1841 1842
	return engine_wa_list_verify(engine->kernel_context,
				     &engine->wa_list,
				     from);
1843 1844
}

1845
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1846
#include "selftest_workarounds.c"
1847
#endif