提交 fa731587 编写于 作者: P Peter Zijlstra 提交者: Ingo Molnar

perf: Fix capabilities bitfield compatibility in 'struct perf_event_mmap_page'

Solve the problems around the broken definition of perf_event_mmap_page::
cap_usr_time and cap_usr_rdpmc fields which used to overlap, partially
fixed by:

  860f085b ("perf: Fix broken union in 'struct perf_event_mmap_page'")

The problem with the fix (merged in v3.12-rc1 and not yet released
officially), noticed by Vince Weaver is that the new behavior is
not detectable by new user-space, and that due to the reuse of the
field names it's easy to mis-compile a binary if old headers are used
on a new kernel or new headers are used on an old kernel.

To solve all that make this change explicit, detectable and self-contained,
by iterating the ABI the following way:

 - Always clear bit 0, and rename it to usrpage->cap_bit0, to at least not
   confuse old user-space binaries. RDPMC will be marked as unavailable
   to old binaries but that's within the ABI, this is a capability bit.

 - Rename bit 1 to ->cap_bit0_is_deprecated and always set it to 1, so new
   libraries can reliably detect that bit 0 is deprecated and perma-zero
   without having to check the kernel version.

 - Use bits 2, 3, 4 for the newly defined, correct functionality:

	cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
	cap_user_time		: 1, /* The time_* fields are used */
	cap_user_time_zero	: 1, /* The time_zero field is used */

 - Rename all the bitfield names in perf_event.h to be different from the
   old names, to make sure it's not possible to mis-compile it
   accidentally with old assumptions.

The 'size' field can then be used in the future to add new fields and it
will act as a natural ABI version indicator as well.

Also adjust tools/perf/ userspace for the new definitions, noticed by
Adrian Hunter.
Reported-by: NVince Weaver <vincent.weaver@maine.edu>
Signed-off-by: NPeter Zijlstra <a.p.zijlstra@chello.nl>
Also-Fixed-by: NAdrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/n/tip-zr03yxjrpXesOzzupszqglbv@git.kernel.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 73c4427c
...@@ -1883,9 +1883,9 @@ static struct pmu pmu = { ...@@ -1883,9 +1883,9 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{ {
userpg->cap_usr_time = 0; userpg->cap_user_time = 0;
userpg->cap_usr_time_zero = 0; userpg->cap_user_time_zero = 0;
userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits; userpg->pmc_width = x86_pmu.cntval_bits;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
...@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) ...@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return; return;
userpg->cap_usr_time = 1; userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
if (sched_clock_stable && !check_tsc_disabled()) { if (sched_clock_stable && !check_tsc_disabled()) {
userpg->cap_usr_time_zero = 1; userpg->cap_user_time_zero = 1;
userpg->time_zero = this_cpu_read(cyc2ns_offset); userpg->time_zero = this_cpu_read(cyc2ns_offset);
} }
} }
......
...@@ -380,10 +380,13 @@ struct perf_event_mmap_page { ...@@ -380,10 +380,13 @@ struct perf_event_mmap_page {
union { union {
__u64 capabilities; __u64 capabilities;
struct { struct {
__u64 cap_usr_time : 1, __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
cap_usr_rdpmc : 1, cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
cap_usr_time_zero : 1,
cap_____res : 61; cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
cap_user_time : 1, /* The time_* fields are used */
cap_user_time_zero : 1, /* The time_zero field is used */
cap_____res : 59;
}; };
}; };
...@@ -442,12 +445,13 @@ struct perf_event_mmap_page { ...@@ -442,12 +445,13 @@ struct perf_event_mmap_page {
* ((rem * time_mult) >> time_shift); * ((rem * time_mult) >> time_shift);
*/ */
__u64 time_zero; __u64 time_zero;
__u32 size; /* Header size up to __reserved[] fields. */
/* /*
* Hole for extension of the self monitor capabilities * Hole for extension of the self monitor capabilities
*/ */
__u64 __reserved[119]; /* align to 1k */ __u8 __reserved[118*8+4]; /* align to 1k. */
/* /*
* Control data for the mmap() data buffer. * Control data for the mmap() data buffer.
......
...@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event, ...@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event,
*running = ctx_time - event->tstamp_running; *running = ctx_time - event->tstamp_running;
} }
static void perf_event_init_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
struct ring_buffer *rb;
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
userpg = rb->user_page;
/* Allow new userspace to detect that bit 0 is deprecated */
userpg->cap_bit0_is_deprecated = 1;
userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
unlock:
rcu_read_unlock();
}
void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{ {
} }
...@@ -4044,6 +4064,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -4044,6 +4064,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ring_buffer_attach(event, rb); ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb); rcu_assign_pointer(event->rb, rb);
perf_event_init_userpage(event);
perf_event_update_userpage(event); perf_event_update_userpage(event);
unlock: unlock:
......
...@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) ...@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
struct perf_tsc_conversion *tc) struct perf_tsc_conversion *tc)
{ {
bool cap_usr_time_zero; bool cap_user_time_zero;
u32 seq; u32 seq;
int i = 0; int i = 0;
...@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, ...@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
tc->time_mult = pc->time_mult; tc->time_mult = pc->time_mult;
tc->time_shift = pc->time_shift; tc->time_shift = pc->time_shift;
tc->time_zero = pc->time_zero; tc->time_zero = pc->time_zero;
cap_usr_time_zero = pc->cap_usr_time_zero; cap_user_time_zero = pc->cap_user_time_zero;
rmb(); rmb();
if (pc->lock == seq && !(seq & 1)) if (pc->lock == seq && !(seq & 1))
break; break;
...@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, ...@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
} }
} }
if (!cap_usr_time_zero) if (!cap_user_time_zero)
return -EOPNOTSUPP; return -EOPNOTSUPP;
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册