diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index fbcb0904f4a828b3ef49153ad365540bc20b77c8..53ff209b91bb03940a991068bbfa30bf57e2514b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -95,6 +95,7 @@ i915-y += \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ + intel_sseu.o \ intel_uncore.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index c1c391816fa77da72ced9675fb2afd0c3885c93d..5bcc78d7ac96d6eecc57d32b518c14a0fabedae7 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -33,6 +33,7 @@ header_test := \ intel_psr.h \ intel_sdvo.h \ intel_sprite.h \ + intel_sseu.h \ intel_tv.h \ intel_workarounds_types.h diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dc74d33c20aac34fc6fb687e5b13e48fb188d182..e6f9a5ddac3de994fcc74ae76efdd2e4c153771b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3390,20 +3390,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -static inline struct intel_sseu -intel_device_default_sseu(struct drm_i915_private *i915) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - struct intel_sseu value = { - .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], - .min_eus_per_subslice = sseu->max_eus_per_subslice, - .max_eus_per_subslice = sseu->max_eus_per_subslice, - }; - - return value; -} - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dd728b26b5aab135e110fc61eca40ca84d4456e2..c02a30612df9653fd9e9d1b5c2ccfcc210109b32 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = gen8_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 39a4804091d70d61a5fd63e7dfcb60d3edde4060..56da457bed21d4205d9b95b6f8af44f6ddfb46f1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - gen8_make_rpcs(i915, &ce->sseu)); + intel_sseu_make_rpcs(i915, &ce->sseu)); } /* diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c index 8931e0fee873d48c8adbdba8c951f0b90ec85cc8..961d1445833d158f639d8df4f999a280cda02471 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/intel_context.c @@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce, ce->gem_context = ctx; ce->engine = engine; ce->ops = engine->cops; + ce->sseu = engine->sseu; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); - /* Use the whole device by default */ - ce->sseu = intel_device_default_sseu(ctx->i915); - i915_active_request_init(&ce->active_tracker, NULL, intel_context_retire); } diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 68b4ca1611e0cb68d3a88e7ecf848f32ea70fdc0..9ec4f787c9082594b25b0d11a606575487d9c965 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -14,6 +14,7 @@ #include #include "i915_active_types.h" +#include "intel_sseu.h" struct i915_gem_context; struct i915_vma; @@ -28,16 +29,6 @@ struct intel_context_ops { void (*destroy)(struct kref *kref); }; -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - struct intel_context { struct kref ref; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7a2f14eff699d0e83f510a9e9aacc3224e5dbf0a..1598c7079ffdea4260063929f2a0ec8b6d5f5f6b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -29,6 +29,7 @@ #include "intel_engine_types.h" #include "intel_display.h" +#include "intel_sseu.h" struct drm_printer; struct drm_i915_private; @@ -140,33 +141,6 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; - u16 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; - - /* Topology fields */ - u8 max_slices; - u8 max_subslices; - u8 max_eus_per_subslice; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; -}; - struct intel_device_info { u16 gen_mask; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index eea9bec04f1ba0898d0e7cf700159dc8f1265c75..202b4b7a24f1c6816925c96a23923118a9463156 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + /* Use the whole device by default */ + engine->sseu = + intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + return 0; err_hwsp: diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h index 1f970c76b6a65e5a3935d993e4f2f3cced4132da..d07a01b3ed0ba4cfed428f960948164c82596400 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/intel_engine_types.h @@ -17,6 +17,7 @@ #include "i915_priolist_types.h" #include "i915_selftest.h" #include "i915_timeline_types.h" +#include "intel_sseu.h" #include "intel_workarounds_types.h" #include "i915_gem_batch_pool.h" @@ -278,6 +279,8 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + struct intel_sseu sseu; + struct intel_ring *buffer; struct i915_timeline timeline; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4e0a351bfbcadb7a1e0d13a591dcbafaac5ef80d..18a9dc6ca87747b254ed73ee619572f6a4d19e57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) regs[CTX_R_PWR_CLK_STATE + 1] = - gen8_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->i915, &ce->sseu); } static int @@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; - u8 slices, subslices; - u32 rpcs = 0; - - /* - * No explicit RPCS request is needed to ensure full - * slice/subslice/EU enablement prior to Gen9. - */ - if (INTEL_GEN(i915) < 9) - return 0; - - /* - * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. - */ - if (!i915->perf.oa.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_device_default_sseu(i915); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); - - /* - * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits - * wide and Icelake has up to eight subslices, specfial programming is - * needed in order to correctly enable all subslices. - * - * According to documentation software must consider the configuration - * as 2x4x8 and hardware will translate this to 1x8x8. - * - * Furthemore, even though SScount is three bits, maximum documented - * value for it is four. From this some rules/restrictions follow: - * - * 1. - * If enabled subslice count is greater than four, two whole slices must - * be enabled instead. - * - * 2. - * When more than one slice is enabled, hardware ignores the subslice - * count altogether. - * - * From these restrictions it follows that it is not possible to enable - * a count of subslices between the SScount maximum of four restriction, - * and the maximum available number on a particular SKU. Either all - * subslices are enabled, or a count between one and four on the first - * slice. - */ - if (IS_GEN(i915, 11) && - slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { - GEM_BUG_ON(subslices & 1); - - subslice_pg = false; - slices *= 2; - } - - /* - * Starting in Gen9, render power gating can leave - * slice/subslice/EU in a partially enabled state. We - * must make an explicit request through RPCS for full - * enablement. - */ - if (sseu->has_slice_pg) { - u32 mask, val = slices; - - if (INTEL_GEN(i915) >= 11) { - mask = GEN11_RPCS_S_CNT_MASK; - val <<= GEN11_RPCS_S_CNT_SHIFT; - } else { - mask = GEN8_RPCS_S_CNT_MASK; - val <<= GEN8_RPCS_S_CNT_SHIFT; - } - - GEM_BUG_ON(val & ~mask); - val &= mask; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; - } - - if (subslice_pg) { - u32 val = subslices; - - val <<= GEN8_RPCS_SS_CNT_SHIFT; - - GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); - val &= GEN8_RPCS_SS_CNT_MASK; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; - } - - if (sseu->has_eu_pg) { - u32 val; - - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); - val &= GEN8_RPCS_EU_MIN_MASK; - - rpcs |= val; - - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); - val &= GEN8_RPCS_EU_MAX_MASK; - - rpcs |= val; - - rpcs |= GEN8_RPCS_ENABLE; - } - - return rpcs; -} - static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) { u32 indirect_ctx_offset; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 84aa230ea27be10acdee4ccf35dabf21a31fc3c2..99f75ee9d087ccb1ea3b0819111408262e977227 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c new file mode 100644 index 0000000000000000000000000000000000000000..7f448f3bea0b9feb01e99908b32310dac77d003a --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.c @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_lrc_reg.h" +#include "intel_sseu.h" + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; + u32 rpcs = 0; + + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_GEN(i915) < 9) + return 0; + + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_sseu_from_device_info(sseu); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (sseu->has_slice_pg) { + u32 mask, val = slices; + + if (INTEL_GEN(i915) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; + } + + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; + } + + if (sseu->has_eu_pg) { + u32 val; + + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h new file mode 100644 index 0000000000000000000000000000000000000000..73bc824094e84e71f0763de3adf960bb3344fa78 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_sseu.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SSEU_H__ +#define __INTEL_SSEU_H__ + +#include + +struct drm_i915_private; + +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask[GEN_MAX_SLICES]; + u16 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +static inline struct intel_sseu +intel_sseu_from_device_info(const struct sseu_dev_info *sseu) +{ + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu); + +#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4e1b6efc6b22e9156cec81109689e851ee6b7af1..e1cb22f03e8e28ed2b24e150d1a49df4fddfffba 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -962,8 +962,7 @@ __sseu_finish(struct drm_i915_private *i915, unsigned int expected, struct igt_spinner *spin) { - unsigned int slices = - hweight32(intel_device_default_sseu(i915).slice_mask); + unsigned int slices = hweight32(engine->sseu.slice_mask); u32 rpcs = 0; int ret = 0; @@ -1047,8 +1046,8 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_sseu default_sseu = intel_device_default_sseu(i915); struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_sseu pg_sseu;