vclock_gettime.c 8.4 KB
Newer Older
1 2 3 4
/*
 * Copyright 2006 Andi Kleen, SUSE Labs.
 * Subject to the GNU Public License, v.2
 *
A
Andy Lutomirski 已提交
5
 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6
 *
7 8 9
 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
 *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
 *
10 11 12 13
 * The code should have no internal unresolved relocations.
 * Check with readelf after changing.
 */

14
/* Disable profiling for userspace code: */
15
#define DISABLE_BRANCH_PROFILING
16

17
#include <uapi/linux/time.h>
18 19
#include <asm/vgtod.h>
#include <asm/hpet.h>
20
#include <asm/vvar.h>
21
#include <asm/unistd.h>
22 23 24
#include <asm/msr.h>
#include <linux/math64.h>
#include <linux/time.h>
25

26
#define gtod (&VVAR(vsyscall_gtod_data))
27

28 29 30 31
extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);

32 33 34 35 36 37 38
#ifdef CONFIG_HPET_TIMER
static inline u32 read_hpet_counter(const volatile void *addr)
{
	return *(const volatile u32 *) (addr + HPET_COUNTER);
}
#endif

39 40
#ifndef BUILD_VDSO32

41 42 43 44 45
#include <linux/kernel.h>
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>

46
static notrace cycle_t vread_hpet(void)
47
{
48
	return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
49
}
50

51 52 53 54 55 56
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;
	asm("syscall" : "=a" (ret) :
	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
	return ret;
57 58
}

59
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
60
{
61 62 63 64 65
	long ret;

	asm("syscall" : "=a" (ret) :
	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
	return ret;
66 67
}

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#ifdef CONFIG_PARAVIRT_CLOCK

static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
	const struct pvclock_vsyscall_time_info *pvti_base;
	int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
	int offset = cpu % (PAGE_SIZE/PVTI_SIZE);

	BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);

	pvti_base = (struct pvclock_vsyscall_time_info *)
		    __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);

	return &pvti_base[offset];
}

static notrace cycle_t vread_pvclock(int *mode)
{
	const struct pvclock_vsyscall_time_info *pvti;
	cycle_t ret;
	u64 last;
	u32 version;
	u8 flags;
	unsigned cpu, cpu1;


	/*
95 96 97 98 99 100 101
	 * Note: hypervisor must guarantee that:
	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
	 * 2. that per-CPU pvclock time info is updated if the
	 *    underlying CPU changes.
	 * 3. that version is increased whenever underlying CPU
	 *    changes.
	 *
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
	 */
	do {
		cpu = __getcpu() & VGETCPU_CPU_MASK;
		/* TODO: We can put vcpu id into higher bits of pvti.version.
		 * This will save a couple of cycles by getting rid of
		 * __getcpu() calls (Gleb).
		 */

		pvti = get_pvti(cpu);

		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);

		/*
		 * Test we're still on the cpu as well as the version.
		 * We could have been migrated just after the first
		 * vgetcpu but before fetching the version, so we
		 * wouldn't notice a version change.
		 */
		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
	} while (unlikely(cpu != cpu1 ||
			  (pvti->pvti.version & 1) ||
123
			  pvti->pvti.version != version));
124 125 126 127 128

	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
		*mode = VCLOCK_NONE;

	/* refer to tsc.c read_tsc() comment for rationale */
129
	last = gtod->cycle_last;
130 131 132 133 134 135 136 137

	if (likely(ret >= last))
		return ret;

	return last;
}
#endif

138 139 140 141 142 143 144 145
#else

extern u8 hpet_page
	__attribute__((visibility("hidden")));

#ifdef CONFIG_HPET_TIMER
static notrace cycle_t vread_hpet(void)
{
146
	return read_hpet_counter((const void *)(&hpet_page));
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
}
#endif

notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call VDSO32_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
		: "memory", "edx");
	return ret;
}

notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call VDSO32_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
		: "memory", "edx");
	return ret;
}

#ifdef CONFIG_PARAVIRT_CLOCK

static notrace cycle_t vread_pvclock(int *mode)
{
	*mode = VCLOCK_NONE;
	return 0;
}
#endif

#endif

191
notrace static cycle_t vread_tsc(void)
192
{
193 194
	cycle_t ret;
	u64 last;
195

196 197 198 199 200 201 202 203
	/*
	 * Empirically, a fence (of type that depends on the CPU)
	 * before rdtsc is enough to ensure that rdtsc is ordered
	 * with respect to loads.  The various CPU manuals are unclear
	 * as to whether rdtsc can be reordered with later loads,
	 * but no one has ever seen it happen.
	 */
	rdtsc_barrier();
204
	ret = (cycle_t)__native_read_tsc();
205

206
	last = gtod->cycle_last;
207

208 209 210 211 212 213 214 215 216 217 218 219 220 221
	if (likely(ret >= last))
		return ret;

	/*
	 * GCC likes to generate cmov here, but this branch is extremely
	 * predictable (it's just a funciton of time and the likely is
	 * very likely) and there's a data dependence, so force GCC
	 * to generate a branch instead.  I don't barrier() because
	 * we don't actually need a barrier, and if this function
	 * ever gets inlined it will generate worse code.
	 */
	asm volatile ("");
	return last;
}
222

223
notrace static inline u64 vgetsns(int *mode)
224
{
225
	u64 v;
226
	cycles_t cycles;
227 228

	if (gtod->vclock_mode == VCLOCK_TSC)
229
		cycles = vread_tsc();
230
#ifdef CONFIG_HPET_TIMER
231
	else if (gtod->vclock_mode == VCLOCK_HPET)
232
		cycles = vread_hpet();
233
#endif
234
#ifdef CONFIG_PARAVIRT_CLOCK
235
	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
236 237
		cycles = vread_pvclock(mode);
#endif
238 239
	else
		return 0;
240 241
	v = (cycles - gtod->cycle_last) & gtod->mask;
	return v * gtod->mult;
242 243
}

244 245
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
246
{
247 248
	unsigned long seq;
	u64 ns;
249 250
	int mode;

251
	do {
252 253
		seq = gtod_read_begin(gtod);
		mode = gtod->vclock_mode;
254
		ts->tv_sec = gtod->wall_time_sec;
255
		ns = gtod->wall_time_snsec;
256
		ns += vgetsns(&mode);
257 258 259 260 261
		ns >>= gtod->shift;
	} while (unlikely(gtod_read_retry(gtod, seq)));

	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
	ts->tv_nsec = ns;
262 263

	return mode;
264 265
}

266
notrace static int __always_inline do_monotonic(struct timespec *ts)
267
{
268 269
	unsigned long seq;
	u64 ns;
270 271
	int mode;

272
	do {
273 274
		seq = gtod_read_begin(gtod);
		mode = gtod->vclock_mode;
275
		ts->tv_sec = gtod->monotonic_time_sec;
276
		ns = gtod->monotonic_time_snsec;
277
		ns += vgetsns(&mode);
278 279 280 281 282
		ns >>= gtod->shift;
	} while (unlikely(gtod_read_retry(gtod, seq)));

	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
	ts->tv_nsec = ns;
283

284
	return mode;
285 286
}

287
notrace static void do_realtime_coarse(struct timespec *ts)
288 289 290
{
	unsigned long seq;
	do {
291 292 293 294
		seq = gtod_read_begin(gtod);
		ts->tv_sec = gtod->wall_time_coarse_sec;
		ts->tv_nsec = gtod->wall_time_coarse_nsec;
	} while (unlikely(gtod_read_retry(gtod, seq)));
295 296
}

297
notrace static void do_monotonic_coarse(struct timespec *ts)
298
{
299
	unsigned long seq;
300
	do {
301 302 303 304
		seq = gtod_read_begin(gtod);
		ts->tv_sec = gtod->monotonic_time_coarse_sec;
		ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
	} while (unlikely(gtod_read_retry(gtod, seq)));
305 306
}

307
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
308
{
309 310
	switch (clock) {
	case CLOCK_REALTIME:
311 312
		if (do_realtime(ts) == VCLOCK_NONE)
			goto fallback;
313 314
		break;
	case CLOCK_MONOTONIC:
315 316
		if (do_monotonic(ts) == VCLOCK_NONE)
			goto fallback;
317 318
		break;
	case CLOCK_REALTIME_COARSE:
319 320
		do_realtime_coarse(ts);
		break;
321
	case CLOCK_MONOTONIC_COARSE:
322 323 324 325
		do_monotonic_coarse(ts);
		break;
	default:
		goto fallback;
326 327
	}

328
	return 0;
329 330
fallback:
	return vdso_fallback_gettime(clock, ts);
331 332 333 334
}
int clock_gettime(clockid_t, struct timespec *)
	__attribute__((weak, alias("__vdso_clock_gettime")));

335
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
336
{
337
	if (likely(tv != NULL)) {
338 339
		if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
			return vdso_fallback_gtod(tv, tz);
340
		tv->tv_usec /= 1000;
341
	}
342
	if (unlikely(tz != NULL)) {
343 344
		tz->tz_minuteswest = gtod->tz_minuteswest;
		tz->tz_dsttime = gtod->tz_dsttime;
345 346 347
	}

	return 0;
348 349 350
}
int gettimeofday(struct timeval *, struct timezone *)
	__attribute__((weak, alias("__vdso_gettimeofday")));
A
Andy Lutomirski 已提交
351

352 353 354 355
/*
 * This will break when the xtime seconds get inaccurate, but that is
 * unlikely
 */
A
Andy Lutomirski 已提交
356 357
notrace time_t __vdso_time(time_t *t)
{
358
	/* This is atomic on x86 so we don't need any locks. */
359
	time_t result = ACCESS_ONCE(gtod->wall_time_sec);
A
Andy Lutomirski 已提交
360 361 362 363 364 365 366

	if (t)
		*t = result;
	return result;
}
int time(time_t *t)
	__attribute__((weak, alias("__vdso_time")));