提交 64552396 编写于 作者: J Jacob Pan 提交者: Ingo Molnar

perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units

RAPL energy hardware unit can vary within a single CPU package, e.g.
HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas
the unit on other domains can be enumerated from power unit MSR.

There might be other variations in the future, this patch adds
per cpu model quirk to allow special handling of certain cpus.

hw_unit is also removed from per cpu data since it is not per cpu
and the sampling rate for energy counter is typically not high.

Without this patch, DRAM domain on HSW servers will be counted
4x higher than the real energy counter.
Signed-off-by: NJacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: NStephane Eranian <eranian@google.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1427405325-780-1-git-send-email-jacob.jun.pan@linux.intel.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 517e6341
...@@ -62,6 +62,14 @@ ...@@ -62,6 +62,14 @@
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
#define NR_RAPL_DOMAINS 0x4
static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
"pp1-gpu",
};
/* Clients have PP0, PKG */ /* Clients have PP0, PKG */
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\ 1<<RAPL_IDX_PKG_NRG_STAT|\
...@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \ ...@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \
struct rapl_pmu { struct rapl_pmu {
spinlock_t lock; spinlock_t lock;
int hw_unit; /* 1/2^hw_unit Joule */
int n_active; /* number of active events */ int n_active; /* number of active events */
struct list_head active_list; struct list_head active_list;
struct pmu *pmu; /* pointer to rapl_pmu_class */ struct pmu *pmu; /* pointer to rapl_pmu_class */
...@@ -120,6 +127,7 @@ struct rapl_pmu { ...@@ -120,6 +127,7 @@ struct rapl_pmu {
struct hrtimer hrtimer; struct hrtimer hrtimer;
}; };
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
static struct pmu rapl_pmu_class; static struct pmu rapl_pmu_class;
static cpumask_t rapl_cpu_mask; static cpumask_t rapl_cpu_mask;
static int rapl_cntr_mask; static int rapl_cntr_mask;
...@@ -127,6 +135,7 @@ static int rapl_cntr_mask; ...@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event) static inline u64 rapl_read_counter(struct perf_event *event)
{ {
u64 raw; u64 raw;
...@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event) ...@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw; return raw;
} }
static inline u64 rapl_scale(u64 v) #define rapl_add_quirk(func_) \
do { \
static struct x86_pmu_quirk __quirk __initdata = { \
.func = func_, \
}; \
__quirk.next = rapl_quirks; \
rapl_quirks = &__quirk; \
} while (0)
static inline u64 rapl_scale(u64 v, int cfg)
{ {
if (cfg > NR_RAPL_DOMAINS) {
pr_warn("invalid domain %d, failed to scale data\n", cfg);
return v;
}
/* /*
* scale delta to smallest unit (1/2^32) * scale delta to smallest unit (1/2^32)
* users must then scale back: count * 1/(1e9*2^32) to get Joules * users must then scale back: count * 1/(1e9*2^32) to get Joules
* or use ldexp(count, -32). * or use ldexp(count, -32).
* Watts = Joules/Time delta * Watts = Joules/Time delta
*/ */
return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); return v << (32 - rapl_hw_unit[cfg - 1]);
} }
static u64 rapl_event_update(struct perf_event *event) static u64 rapl_event_update(struct perf_event *event)
...@@ -173,7 +195,7 @@ static u64 rapl_event_update(struct perf_event *event) ...@@ -173,7 +195,7 @@ static u64 rapl_event_update(struct perf_event *event)
delta = (new_raw_count << shift) - (prev_raw_count << shift); delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift; delta >>= shift;
sdelta = rapl_scale(delta); sdelta = rapl_scale(delta, event->hw.config);
local64_add(sdelta, &event->count); local64_add(sdelta, &event->count);
...@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu) ...@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
cpumask_set_cpu(cpu, &rapl_cpu_mask); cpumask_set_cpu(cpu, &rapl_cpu_mask);
} }
static __init void rapl_hsw_server_quirk(void)
{
/*
* DRAM domain on HSW server has fixed energy unit which can be
* different than the unit from power unit MSR.
* "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
}
static int rapl_cpu_prepare(int cpu) static int rapl_cpu_prepare(int cpu)
{ {
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu); int phys_id = topology_physical_package_id(cpu);
u64 ms; u64 ms;
u64 msr_rapl_power_unit_bits;
if (pmu) if (pmu)
return 0; return 0;
...@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu) ...@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0) if (phys_id < 0)
return -1; return -1;
/* protect rdmsrl() to handle virtualization */
if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
return -1;
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu) if (!pmu)
return -1; return -1;
spin_lock_init(&pmu->lock); spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list); INIT_LIST_HEAD(&pmu->active_list);
/*
* grab power unit as: 1/2^unit Joules
*
* we cache in local PMU instance
*/
pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class; pmu->pmu = &rapl_pmu_class;
/* /*
...@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu) ...@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
* divide interval by 2 to avoid lockstep (2 * 100) * divide interval by 2 to avoid lockstep (2 * 100)
* if hw unit is 32, then we use 2 ms 1/200/2 * if hw unit is 32, then we use 2 ms 1/200/2
*/ */
if (pmu->hw_unit < 32) if (rapl_hw_unit[0] < 32)
ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
else else
ms = 2; ms = 2;
...@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, ...@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
return NOTIFY_OK; return NOTIFY_OK;
} }
static int rapl_check_hw_unit(void)
{
u64 msr_rapl_power_unit_bits;
int i;
/* protect rdmsrl() to handle virtualization */
if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
return -1;
for (i = 0; i < NR_RAPL_DOMAINS; i++)
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
return 0;
}
static const struct x86_cpu_id rapl_cpu_match[] = { static const struct x86_cpu_id rapl_cpu_match[] = {
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
[1] = {}, [1] = {},
...@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void) ...@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
{ {
struct rapl_pmu *pmu; struct rapl_pmu *pmu;
int cpu, ret; int cpu, ret;
struct x86_pmu_quirk *quirk;
int i;
/* /*
* check for Intel processor family 6 * check for Intel processor family 6
...@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void) ...@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
rapl_cntr_mask = RAPL_IDX_CLN; rapl_cntr_mask = RAPL_IDX_CLN;
rapl_pmu_events_group.attrs = rapl_events_cln_attr; rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break; break;
case 63: /* Haswell-Server */
rapl_add_quirk(rapl_hsw_server_quirk);
rapl_cntr_mask = RAPL_IDX_SRV;
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
case 60: /* Haswell */ case 60: /* Haswell */
case 69: /* Haswell-Celeron */ case 69: /* Haswell-Celeron */
rapl_cntr_mask = RAPL_IDX_HSW; rapl_cntr_mask = RAPL_IDX_HSW;
...@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void) ...@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
/* unsupported */ /* unsupported */
return 0; return 0;
} }
ret = rapl_check_hw_unit();
if (ret)
return ret;
/* run cpu model quirks */
for (quirk = rapl_quirks; quirk; quirk = quirk->next)
quirk->func();
cpu_notifier_register_begin(); cpu_notifier_register_begin();
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
...@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void) ...@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
pmu = __this_cpu_read(rapl_pmu); pmu = __this_cpu_read(rapl_pmu);
pr_info("RAPL PMU detected, hw unit 2^-%d Joules," pr_info("RAPL PMU detected,"
" API unit is 2^-32 Joules," " API unit is 2^-32 Joules,"
" %d fixed counters" " %d fixed counters"
" %llu ms ovfl timer\n", " %llu ms ovfl timer\n",
pmu->hw_unit,
hweight32(rapl_cntr_mask), hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval)); ktime_to_ms(pmu->timer_interval));
for (i = 0; i < NR_RAPL_DOMAINS; i++) {
if (rapl_cntr_mask & (1 << i)) {
pr_info("hw unit of domain %s 2^-%d Joules\n",
rapl_domain_names[i], rapl_hw_unit[i]);
}
}
out: out:
cpu_notifier_register_done(); cpu_notifier_register_done();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册