diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index cd786ad12be704f596f51f67ba84f61ab2ae50e8..97bb4aaa52360ea5de8a46ab437ac3c93b9350c9 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -27,8 +27,6 @@ BIT(I915_SAMPLE_WAIT) | \ BIT(I915_SAMPLE_SEMA)) -#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) - static cpumask_t i915_pmu_cpumask; static unsigned int i915_pmu_target_cpu = -1; @@ -57,17 +55,42 @@ static bool is_engine_config(u64 config) return config < __I915_PMU_OTHER(0); } -static unsigned int config_enabled_bit(u64 config) +static unsigned int other_bit(const u64 config) +{ + unsigned int val; + + switch (config) { + case I915_PMU_ACTUAL_FREQUENCY: + val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; + break; + case I915_PMU_REQUESTED_FREQUENCY: + val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED; + break; + case I915_PMU_RC6_RESIDENCY: + val = __I915_PMU_RC6_RESIDENCY_ENABLED; + break; + default: + /* + * Events that do not require sampling, or tracking state + * transitions between enabled and disabled can be ignored. + */ + return -1; + } + + return I915_ENGINE_SAMPLE_COUNT + val; +} + +static unsigned int config_bit(const u64 config) { if (is_engine_config(config)) return engine_config_sample(config); else - return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); + return other_bit(config); } -static u64 config_enabled_mask(u64 config) +static u64 config_mask(u64 config) { - return BIT_ULL(config_enabled_bit(config)); + return BIT_ULL(config_bit(config)); } static bool is_engine_event(struct perf_event *event) @@ -75,15 +98,20 @@ static bool is_engine_event(struct perf_event *event) return is_engine_config(event->attr.config); } -static unsigned int event_enabled_bit(struct perf_event *event) +static unsigned int event_bit(struct perf_event *event) +{ + return config_bit(event->attr.config); +} + +static bool event_read_needs_wakeref(const struct perf_event *event) { - return config_enabled_bit(event->attr.config); + return event->attr.config == I915_PMU_RC6_RESIDENCY; } static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); - u64 enable; + u32 enable; /* * Only some counters need the sampling timer. @@ -96,8 +124,8 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) * Mask out all the ones which do not need the timer, or in * other words keep all the ones that could need the timer. */ - enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | + enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_mask(I915_PMU_REQUESTED_FREQUENCY) | ENGINE_SAMPLE_MASK; /* @@ -189,7 +217,7 @@ static void park_rc6(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + if (pmu->enable & config_mask(I915_PMU_RC6_RESIDENCY)) pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); pmu->sleep_last = ktime_get(); @@ -344,8 +372,8 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) static bool frequency_sampling_enabled(struct i915_pmu *pmu) { return pmu->enable & - (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); + (config_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_mask(I915_PMU_REQUESTED_FREQUENCY)); } static void @@ -363,7 +391,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (!intel_gt_pm_get_if_awake(gt)) return; - if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; /* @@ -385,7 +413,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) intel_gpu_freq(rps, val), period_ns / 1000); } - if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); @@ -627,12 +655,19 @@ static void i915_pmu_enable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - unsigned int bit = event_enabled_bit(event); + bool need_wakeref = event_read_needs_wakeref(event); struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; + intel_wakeref_t wakeref = 0; unsigned long flags; + unsigned int bit; + + if (need_wakeref) + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + bit = event_bit(event); + if (bit == -1) + goto update; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -644,7 +679,7 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(pmu->enable_count[bit] == ~0); if (pmu->enable_count[bit] == 0 && - config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { + config_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); pmu->sleep_last = ktime_get(); @@ -684,6 +719,7 @@ static void i915_pmu_enable(struct perf_event *event) spin_unlock_irqrestore(&pmu->lock, flags); +update: /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has @@ -691,17 +727,21 @@ static void i915_pmu_enable(struct perf_event *event) */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + if (wakeref) + intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - unsigned int bit = event_enabled_bit(event); + unsigned int bit = event_bit(event); struct i915_pmu *pmu = &i915->pmu; unsigned long flags; + if (bit == -1) + return; + spin_lock_irqsave(&pmu->lock, flags); if (is_engine_event(event)) { diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index a24885ab415c288267a45562c7fa11be4ba6bacd..e33be99e6454e9fddaeab3748dbd5338f9096b24 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -14,6 +14,21 @@ struct drm_i915_private; +/** + * Non-engine events that we need to track enabled-disabled transition and + * current state. + */ +enum i915_pmu_tracked_events { + __I915_PMU_ACTUAL_FREQUENCY_ENABLED = 0, + __I915_PMU_REQUESTED_FREQUENCY_ENABLED, + __I915_PMU_RC6_RESIDENCY_ENABLED, + __I915_PMU_TRACKED_EVENT_COUNT, /* count marker */ +}; + +/** + * Slots used from the sampling timer (non-engine events) with some extras for + * convenience. + */ enum { __I915_SAMPLE_FREQ_ACT = 0, __I915_SAMPLE_FREQ_REQ, @@ -28,8 +43,7 @@ enum { * It is also used to know to needed number of event reference counters. */ #define I915_PMU_MASK_BITS \ - ((1 << I915_PMU_SAMPLE_BITS) + \ - (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) + (I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT) #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) @@ -66,18 +80,17 @@ struct i915_pmu { */ struct hrtimer timer; /** - * @enable: Bitmask of all currently enabled events. + * @enable: Bitmask of specific enabled events. + * + * For some events we need to track their state and do some internal + * house keeping. * - * Bits are derived from uAPI event numbers in a way that low 16 bits - * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is - * bit 0), and higher bits correspond to other events (for instance - * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc). + * Each engine event sampler type and event listed in enum + * i915_pmu_tracked_events gets a bit in this field. * - * In other words, low 16 bits are not per engine but per engine - * sampler type, while the upper bits are directly mapped to other - * event types. + * Low bits are engine samplers and other events continue from there. */ - u64 enable; + u32 enable; /** * @timer_last: diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fa1f3d62f9a6cebb300c63bf82955af91ab2a6ed..6edcb2b6c708875f18f04627cd435cfd5be654a0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -178,7 +178,7 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */