提交 c08c8205 编写于 作者: V Vojtech Pavlik 提交者: Andi Kleen

[PATCH] Add the vgetcpu vsyscall

This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.

AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk
Signed-off-by: NVojtech Pavlik <vojtech@suse.cz>
Signed-off-by: NAndi Kleen <ak@suse.de>
上级 a670fad0
......@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table)
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
.quad 0 /* unused */
.quad 0x0000f40000000000 /* node/CPU stored in limit */
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
......
......@@ -899,12 +899,8 @@ static int __cpuinit
time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned cpu = (unsigned long) hcpu;
if (action == CPU_ONLINE &&
cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
unsigned p;
p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
write_rdtscp_aux(p);
}
if (action == CPU_ONLINE)
vsyscall_set_cpu(cpu);
return NOTIFY_DONE;
}
......@@ -993,6 +989,11 @@ void time_init_gtod(void)
if (unsynchronized_tsc())
notsc = 1;
if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
vgetcpu_mode = VGETCPU_RDTSCP;
else
vgetcpu_mode = VGETCPU_LSL;
if (vxtime.hpet_address && notsc) {
timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
if (hpet_use_timer)
......
......@@ -99,6 +99,9 @@ SECTIONS
.vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
vxtime = VVIRT(.vxtime);
.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
vgetcpu_mode = VVIRT(.vgetcpu_mode);
.wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
wall_jiffies = VVIRT(.wall_jiffies);
......
......@@ -26,6 +26,7 @@
#include <linux/seqlock.h>
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/getcpu.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
......@@ -33,11 +34,15 @@
#include <asm/fixmap.h>
#include <asm/errno.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/desc.h>
#include <asm/topology.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
int __vgetcpu_mode __section_vgetcpu_mode;
#include <asm/unistd.h>
......@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
return __xtime.tv_sec;
}
long __vsyscall(2) venosys_0(void)
/* Fast way to get current CPU and node.
This helps to do per node and per CPU caches in user space.
The result is not guaranteed without CPU affinity, but usually
works out because the scheduler tries to keep a thread on the same
CPU.
tcache must point to a two element sized long array.
All arguments can be NULL. */
long __vsyscall(2)
vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
return -ENOSYS;
unsigned int dummy, p;
unsigned long j = 0;
/* Fast cache - only recompute value once per jiffies and avoid
relatively costly rdtscp/cpuid otherwise.
This works because the scheduler usually keeps the process
on the same CPU and this syscall doesn't guarantee its
results anyways.
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
if (tcache && tcache->t0 == (j = __jiffies)) {
p = tcache->t1;
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
rdtscp(dummy, dummy, p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
}
if (tcache) {
tcache->t0 = j;
tcache->t1 = p;
}
if (cpu)
*cpu = p & 0xfff;
if (node)
*node = p >> 12;
return 0;
}
long __vsyscall(3) venosys_1(void)
......@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = {
#endif
static void __cpuinit write_rdtscp_cb(void *info)
{
write_rdtscp_aux((unsigned long)info);
}
void __cpuinit vsyscall_set_cpu(int cpu)
{
unsigned long *d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node[cpu];
#endif
if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
void *info = (void *)((node << 12) | cpu);
/* Can happen on preemptive kernel */
if (get_cpu() == cpu)
write_rdtscp_cb(info);
#ifdef CONFIG_SMP
else {
/* the notifier is unfortunately not executed on the
target CPU */
smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
}
#endif
put_cpu();
}
/* Store cpu number in limit so that it can be loaded quickly
in user space in vgetcpu.
12 bits for the CPU and 8 bits for the node. */
d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
*d = 0x0f40000000000ULL;
*d |= cpu;
*d |= (node & 0xf) << 12;
*d |= (node >> 4) << 48;
}
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
......@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
VSYSCALL_ADDR(__NR_vgettimeofday)));
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
map_vsyscall();
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2, 0);
......
......@@ -20,15 +20,16 @@
#define __USER_CS 0x33 /* 6*8+3 */
#define __USER32_DS __USER_DS
#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8 /* needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MAX 14
/* 15 free */
#define GDT_ENTRY_TLS_ENTRIES 3
#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
/* TLS indexes for 64bit - hardcoded in arch_prctl */
#define FS_TLS 0
#define GS_TLS 1
......
......@@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void)
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
}
#endif
#ifdef CONFIG_SMP
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
#else
#define cpu_physical_id(cpu) boot_cpu_id
#endif
static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
void *info, int retry, int wait)
{
/* Disable interrupts here? */
func(info);
return 0;
}
#endif /* !CONFIG_SMP */
#endif /* !__ASSEMBLY */
#endif
......@@ -4,6 +4,7 @@
enum vsyscall_num {
__NR_vgettimeofday,
__NR_vtime,
__NR_vgetcpu,
};
#define VSYSCALL_START (-10UL << 20)
......@@ -15,6 +16,7 @@ enum vsyscall_num {
#include <linux/seqlock.h>
#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
......@@ -26,6 +28,9 @@ enum vsyscall_num {
#define VXTIME_HPET 2
#define VXTIME_PMTMR 3
#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2
struct vxtime_data {
long hpet_address; /* HPET base address */
int last;
......@@ -40,6 +45,7 @@ struct vxtime_data {
/* vsyscall space (readonly) */
extern struct vxtime_data __vxtime;
extern int __vgetcpu_mode;
extern struct timespec __xtime;
extern volatile unsigned long __jiffies;
extern unsigned long __wall_jiffies;
......@@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock;
/* kernel space (writeable) */
extern struct vxtime_data vxtime;
extern int vgetcpu_mode;
extern unsigned long wall_jiffies;
extern struct timezone sys_tz;
extern int sysctl_vsyscall;
......@@ -55,6 +62,8 @@ extern seqlock_t xtime_lock;
extern int sysctl_vsyscall;
extern void vsyscall_set_cpu(int cpu);
#define ARCH_HAVE_XTIME_LOCK 1
#endif /* __KERNEL__ */
......
#ifndef _LINUX_GETCPU_H
#define _LINUX_GETCPU_H 1
/* Cache for getcpu() to speed it up. Results might be upto a jiffie
out of date, but will be faster.
User programs should not refer to the contents of this structure.
It is only a cache for vgetcpu(). It might change in future kernels.
The user program must store this information per thread (__thread)
If you want 100% accurate information pass NULL instead. */
struct getcpu_cache {
unsigned long t0;
unsigned long t1;
unsigned long res[4];
};
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册