提交 a7f4df4e 编写于 作者: A Alex Smith 提交者: Ralf Baechle

MIPS: VDSO: Add implementations of gettimeofday() and clock_gettime()

Add user-mode implementations of gettimeofday() and clock_gettime() to
the VDSO. This is currently usable with 2 clocksources: the CP0 count
register, which is accessible to user-mode via RDHWR on R2 and later
cores, or the MIPS Global Interrupt Controller (GIC) timer, which
provides a "user-mode visible" section containing a mirror of its
counter registers. This section must be mapped into user memory, which
is done below the VDSO data page.

When a supported clocksource is not in use, the VDSO functions will
return -ENOSYS, which causes libc to fall back on the standard syscall
path.

When support for neither of these clocksources is compiled into the
kernel at all, the VDSO still provides clock_gettime(), as the coarse
realtime/monotonic clocks can still be implemented. However,
gettimeofday() is not provided in this case as nothing can be done
without a suitable clocksource. This causes the symbol lookup to fail
in libc and it will then always use the standard syscall path.

This patch includes a workaround for a bug in QEMU which results in
RDHWR on the CP0 count register always returning a constant (incorrect)
value. A fix for this has been submitted, and the workaround can be
removed after the fix has been in stable releases for a reasonable
amount of time.

A simple performance test which calls gettimeofday() 1000 times in a
loop and calculates the average execution time gives the following
results on a Malta + I6400 (running at 20MHz):

 - Syscall:    ~31000 ns
 - VDSO (GIC): ~15000 ns
 - VDSO (CP0): ~9500 ns

[markos.chandras@imgtec.com:
- Minor code re-arrangements in order for mappings to be made
in the order they appear to the process' address space.
- Move do_{monotonic, realtime} outside of the MIPS_CLOCK_VSYSCALL ifdef
- Use gic_get_usm_range so we can do the GIC mapping in the
arch/mips/kernel/vdso instead of the GIC irqchip driver]
Signed-off-by: NAlex Smith <alex.smith@imgtec.com>
Signed-off-by: NMarkos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11338/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
上级 c0a9f72c
......@@ -61,6 +61,8 @@ config MIPS
select SYSCTL_EXCEPTION_TRACE
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select HAVE_IRQ_TIME_ACCOUNTING
select GENERIC_TIME_VSYSCALL
select ARCH_CLOCKSOURCE_DATA
menu "Machine selection"
......@@ -1040,6 +1042,9 @@ config CSRC_R4K
config CSRC_SB1250
bool
config MIPS_CLOCK_VSYSCALL
def_bool CSRC_R4K || CLKSRC_MIPS_GIC
config GPIO_TXX9
select ARCH_REQUIRE_GPIOLIB
bool
......
/*
* Copyright (C) 2015 Imagination Technologies
* Author: Alex Smith <alex.smith@imgtec.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
#ifndef __ASM_CLOCKSOURCE_H
#define __ASM_CLOCKSOURCE_H
#include <linux/types.h>
/* VDSO clocksources. */
#define VDSO_CLOCK_NONE 0 /* No suitable clocksource. */
#define VDSO_CLOCK_R4K 1 /* Use the coprocessor 0 count. */
#define VDSO_CLOCK_GIC 2 /* Use the GIC. */
/**
* struct arch_clocksource_data - Architecture-specific clocksource information.
* @vdso_clock_mode: Method the VDSO should use to access the clocksource.
*/
struct arch_clocksource_data {
u8 vdso_clock_mode;
};
#endif /* __ASM_CLOCKSOURCE_H */
......@@ -13,6 +13,8 @@
#include <linux/mm_types.h>
#include <asm/barrier.h>
/**
* struct mips_vdso_image - Details of a VDSO image.
* @data: Pointer to VDSO image data (page-aligned).
......@@ -53,18 +55,82 @@ extern struct mips_vdso_image vdso_image_n32;
/**
* union mips_vdso_data - Data provided by the kernel for the VDSO.
* @xtime_sec: Current real time (seconds part).
* @xtime_nsec: Current real time (nanoseconds part, shifted).
* @wall_to_mono_sec: Wall-to-monotonic offset (seconds part).
* @wall_to_mono_nsec: Wall-to-monotonic offset (nanoseconds part).
* @seq_count: Counter to synchronise updates (odd = updating).
* @cs_shift: Clocksource shift value.
* @clock_mode: Clocksource to use for time functions.
* @cs_mult: Clocksource multiplier value.
* @cs_cycle_last: Clock cycle value at last update.
* @cs_mask: Clocksource mask value.
* @tz_minuteswest: Minutes west of Greenwich (from timezone).
* @tz_dsttime: Type of DST correction (from timezone).
*
* This structure contains data needed by functions within the VDSO. It is
* populated by the kernel and mapped read-only into user memory.
* populated by the kernel and mapped read-only into user memory. The time
* fields are mirrors of internal data from the timekeeping infrastructure.
*
* Note: Care should be taken when modifying as the layout must remain the same
* for both 64- and 32-bit (for 32-bit userland on 64-bit kernel).
*/
union mips_vdso_data {
struct {
u64 xtime_sec;
u64 xtime_nsec;
u32 wall_to_mono_sec;
u32 wall_to_mono_nsec;
u32 seq_count;
u32 cs_shift;
u8 clock_mode;
u32 cs_mult;
u64 cs_cycle_last;
u64 cs_mask;
s32 tz_minuteswest;
s32 tz_dsttime;
};
u8 page[PAGE_SIZE];
};
static inline u32 vdso_data_read_begin(const union mips_vdso_data *data)
{
u32 seq;
while (true) {
seq = ACCESS_ONCE(data->seq_count);
if (likely(!(seq & 1))) {
/* Paired with smp_wmb() in vdso_data_write_*(). */
smp_rmb();
return seq;
}
cpu_relax();
}
}
static inline bool vdso_data_read_retry(const union mips_vdso_data *data,
u32 start_seq)
{
/* Paired with smp_wmb() in vdso_data_write_*(). */
smp_rmb();
return unlikely(data->seq_count != start_seq);
}
static inline void vdso_data_write_begin(union mips_vdso_data *data)
{
++data->seq_count;
/* Ensure sequence update is written before other data page values. */
smp_wmb();
}
static inline void vdso_data_write_end(union mips_vdso_data *data)
{
/* Ensure data values are written before updating sequence again. */
smp_wmb();
++data->seq_count;
}
#endif /* __ASM_VDSO_H */
......@@ -28,6 +28,43 @@ static u64 notrace r4k_read_sched_clock(void)
return read_c0_count();
}
static inline unsigned int rdhwr_count(void)
{
unsigned int count;
__asm__ __volatile__(
" .set push\n"
" .set mips32r2\n"
" rdhwr %0, $2\n"
" .set pop\n"
: "=r" (count));
return count;
}
static bool rdhwr_count_usable(void)
{
unsigned int prev, curr, i;
/*
* Older QEMUs have a broken implementation of RDHWR for the CP0 count
* which always returns a constant value. Try to identify this and don't
* use it in the VDSO if it is broken. This workaround can be removed
* once the fix has been in QEMU stable for a reasonable amount of time.
*/
for (i = 0, prev = rdhwr_count(); i < 100; i++) {
curr = rdhwr_count();
if (curr != prev)
return true;
prev = curr;
}
pr_warn("Not using R4K clocksource in VDSO due to broken RDHWR\n");
return false;
}
int __init init_r4k_clocksource(void)
{
if (!cpu_has_counter || !mips_hpt_frequency)
......@@ -36,6 +73,13 @@ int __init init_r4k_clocksource(void)
/* Calculate a somewhat reasonable rating value */
clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
/*
* R2 onwards makes the count accessible to user mode so it can be used
* by the VDSO (HWREna is configured by configure_hwrena()).
*/
if (cpu_has_mips_r2_r6 && rdhwr_count_usable())
clocksource_mips.archdata.vdso_clock_mode = VDSO_CLOCK_R4K;
clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
......
......@@ -12,9 +12,12 @@
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irqchip/mips-gic.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <asm/abi.h>
#include <asm/vdso.h>
......@@ -23,7 +26,7 @@
static union mips_vdso_data vdso_data __page_aligned_data;
/*
* Mapping for the VDSO data pages. The real pages are mapped manually, as
* Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as
* what we map and where within the area they are mapped is determined at
* runtime.
*/
......@@ -64,25 +67,67 @@ static int __init init_vdso(void)
}
subsys_initcall(init_vdso);
void update_vsyscall(struct timekeeper *tk)
{
vdso_data_write_begin(&vdso_data);
vdso_data.xtime_sec = tk->xtime_sec;
vdso_data.xtime_nsec = tk->tkr_mono.xtime_nsec;
vdso_data.wall_to_mono_sec = tk->wall_to_monotonic.tv_sec;
vdso_data.wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec;
vdso_data.cs_shift = tk->tkr_mono.shift;
vdso_data.clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode;
if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
vdso_data.cs_mult = tk->tkr_mono.mult;
vdso_data.cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data.cs_mask = tk->tkr_mono.mask;
}
vdso_data_write_end(&vdso_data);
}
void update_vsyscall_tz(void)
{
if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
vdso_data.tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data.tz_dsttime = sys_tz.tz_dsttime;
}
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mips_vdso_image *image = current->thread.abi->vdso;
struct mm_struct *mm = current->mm;
unsigned long base, vdso_addr;
unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr;
struct vm_area_struct *vma;
struct resource gic_res;
int ret;
down_write(&mm->mmap_sem);
base = get_unmapped_area(NULL, 0, PAGE_SIZE + image->size, 0, 0);
/*
* Determine total area size. This includes the VDSO data itself, the
* data page, and the GIC user page if present. Always create a mapping
* for the GIC user area if the GIC is present regardless of whether it
* is the current clocksource, in case it comes into use later on. We
* only map a page even though the total area is 64K, as we only need
* the counter registers at the start.
*/
gic_size = gic_present ? PAGE_SIZE : 0;
vvar_size = gic_size + PAGE_SIZE;
size = vvar_size + image->size;
base = get_unmapped_area(NULL, 0, size, 0, 0);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
}
vdso_addr = base + PAGE_SIZE;
data_addr = base + gic_size;
vdso_addr = data_addr + PAGE_SIZE;
vma = _install_special_mapping(mm, base, PAGE_SIZE,
vma = _install_special_mapping(mm, base, vvar_size,
VM_READ | VM_MAYREAD,
&vdso_vvar_mapping);
if (IS_ERR(vma)) {
......@@ -90,8 +135,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
goto out;
}
/* Map GIC user page. */
if (gic_size) {
ret = gic_get_usm_range(&gic_res);
if (ret)
goto out;
ret = io_remap_pfn_range(vma, base,
gic_res.start >> PAGE_SHIFT,
gic_size,
pgprot_noncached(PAGE_READONLY));
if (ret)
goto out;
}
/* Map data page. */
ret = remap_pfn_range(vma, base,
ret = remap_pfn_range(vma, data_addr,
virt_to_phys(&vdso_data) >> PAGE_SHIFT,
PAGE_SIZE, PAGE_READONLY);
if (ret)
......
/*
* Copyright (C) 2015 Imagination Technologies
* Author: Alex Smith <alex.smith@imgtec.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
#include "vdso.h"
#include <linux/compiler.h>
#include <linux/irqchip/mips-gic.h>
#include <linux/time.h>
#include <asm/clocksource.h>
#include <asm/io.h>
#include <asm/mips-cm.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
static __always_inline int do_realtime_coarse(struct timespec *ts,
const union mips_vdso_data *data)
{
u32 start_seq;
do {
start_seq = vdso_data_read_begin(data);
ts->tv_sec = data->xtime_sec;
ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
} while (vdso_data_read_retry(data, start_seq));
return 0;
}
static __always_inline int do_monotonic_coarse(struct timespec *ts,
const union mips_vdso_data *data)
{
u32 start_seq;
u32 to_mono_sec;
u32 to_mono_nsec;
do {
start_seq = vdso_data_read_begin(data);
ts->tv_sec = data->xtime_sec;
ts->tv_nsec = data->xtime_nsec >> data->cs_shift;
to_mono_sec = data->wall_to_mono_sec;
to_mono_nsec = data->wall_to_mono_nsec;
} while (vdso_data_read_retry(data, start_seq));
ts->tv_sec += to_mono_sec;
timespec_add_ns(ts, to_mono_nsec);
return 0;
}
#ifdef CONFIG_CSRC_R4K
static __always_inline u64 read_r4k_count(void)
{
unsigned int count;
__asm__ __volatile__(
" .set push\n"
" .set mips32r2\n"
" rdhwr %0, $2\n"
" .set pop\n"
: "=r" (count));
return count;
}
#endif
#ifdef CONFIG_CLKSRC_MIPS_GIC
static __always_inline u64 read_gic_count(const union mips_vdso_data *data)
{
void __iomem *gic = get_gic(data);
u32 hi, hi2, lo;
do {
hi = __raw_readl(gic + GIC_UMV_SH_COUNTER_63_32_OFS);
lo = __raw_readl(gic + GIC_UMV_SH_COUNTER_31_00_OFS);
hi2 = __raw_readl(gic + GIC_UMV_SH_COUNTER_63_32_OFS);
} while (hi2 != hi);
return (((u64)hi) << 32) + lo;
}
#endif
static __always_inline u64 get_ns(const union mips_vdso_data *data)
{
u64 cycle_now, delta, nsec;
switch (data->clock_mode) {
#ifdef CONFIG_CSRC_R4K
case VDSO_CLOCK_R4K:
cycle_now = read_r4k_count();
break;
#endif
#ifdef CONFIG_CLKSRC_MIPS_GIC
case VDSO_CLOCK_GIC:
cycle_now = read_gic_count(data);
break;
#endif
default:
return 0;
}
delta = (cycle_now - data->cs_cycle_last) & data->cs_mask;
nsec = (delta * data->cs_mult) + data->xtime_nsec;
nsec >>= data->cs_shift;
return nsec;
}
static __always_inline int do_realtime(struct timespec *ts,
const union mips_vdso_data *data)
{
u32 start_seq;
u64 ns;
do {
start_seq = vdso_data_read_begin(data);
if (data->clock_mode == VDSO_CLOCK_NONE)
return -ENOSYS;
ts->tv_sec = data->xtime_sec;
ns = get_ns(data);
} while (vdso_data_read_retry(data, start_seq));
ts->tv_nsec = 0;
timespec_add_ns(ts, ns);
return 0;
}
static __always_inline int do_monotonic(struct timespec *ts,
const union mips_vdso_data *data)
{
u32 start_seq;
u64 ns;
u32 to_mono_sec;
u32 to_mono_nsec;
do {
start_seq = vdso_data_read_begin(data);
if (data->clock_mode == VDSO_CLOCK_NONE)
return -ENOSYS;
ts->tv_sec = data->xtime_sec;
ns = get_ns(data);
to_mono_sec = data->wall_to_mono_sec;
to_mono_nsec = data->wall_to_mono_nsec;
} while (vdso_data_read_retry(data, start_seq));
ts->tv_sec += to_mono_sec;
ts->tv_nsec = 0;
timespec_add_ns(ts, ns + to_mono_nsec);
return 0;
}
#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
/*
* This is behind the ifdef so that we don't provide the symbol when there's no
* possibility of there being a usable clocksource, because there's nothing we
* can do without it. When libc fails the symbol lookup it should fall back on
* the standard syscall path.
*/
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
const union mips_vdso_data *data = get_vdso_data();
struct timespec ts;
int ret;
ret = do_realtime(&ts, data);
if (ret)
return ret;
if (tv) {
tv->tv_sec = ts.tv_sec;
tv->tv_usec = ts.tv_nsec / 1000;
}
if (tz) {
tz->tz_minuteswest = data->tz_minuteswest;
tz->tz_dsttime = data->tz_dsttime;
}
return 0;
}
#endif /* CONFIG_CLKSRC_MIPS_GIC */
int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts)
{
const union mips_vdso_data *data = get_vdso_data();
int ret;
switch (clkid) {
case CLOCK_REALTIME_COARSE:
ret = do_realtime_coarse(ts, data);
break;
case CLOCK_MONOTONIC_COARSE:
ret = do_monotonic_coarse(ts, data);
break;
case CLOCK_REALTIME:
ret = do_realtime(ts, data);
break;
case CLOCK_MONOTONIC:
ret = do_monotonic(ts, data);
break;
default:
ret = -ENOSYS;
break;
}
/* If we return -ENOSYS libc should fall back to a syscall. */
return ret;
}
......@@ -77,4 +77,13 @@ static inline const union mips_vdso_data *get_vdso_data(void)
return (const union mips_vdso_data *)(get_vdso_base() - PAGE_SIZE);
}
#ifdef CONFIG_CLKSRC_MIPS_GIC
static inline void __iomem *get_gic(const union mips_vdso_data *data)
{
return (void __iomem *)data - PAGE_SIZE;
}
#endif /* CONFIG_CLKSRC_MIPS_GIC */
#endif /* __ASSEMBLY__ */
......@@ -95,6 +95,11 @@ PHDRS
VERSION
{
LINUX_2.6 {
#ifndef DISABLE_MIPS_VDSO
global:
__vdso_clock_gettime;
__vdso_gettimeofday;
#endif
local: *;
};
}
......@@ -140,9 +140,10 @@ static cycle_t gic_hpt_read(struct clocksource *cs)
}
static struct clocksource gic_clocksource = {
.name = "GIC",
.read = gic_hpt_read,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.name = "GIC",
.read = gic_hpt_read,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.archdata = { .vdso_clock_mode = VDSO_CLOCK_GIC },
};
static void __init __gic_clocksource_init(void)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册