vclock_gettime.c 8.5 KB
Newer Older
1 2 3 4
/*
 * Copyright 2006 Andi Kleen, SUSE Labs.
 * Subject to the GNU Public License, v.2
 *
A
Andy Lutomirski 已提交
5
 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6
 *
7 8 9
 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
 *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
 *
10 11 12 13
 * The code should have no internal unresolved relocations.
 * Check with readelf after changing.
 */

14
/* Disable profiling for userspace code: */
15
#define DISABLE_BRANCH_PROFILING
16

17
#include <linux/kernel.h>
18
#include <uapi/linux/time.h>
19 20
#include <linux/string.h>
#include <asm/vsyscall.h>
21
#include <asm/fixmap.h>
22 23 24 25
#include <asm/vgtod.h>
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
26
#include <asm/pvclock.h>
27

28
#define gtod (&VVAR(vsyscall_gtod_data))
29

30 31 32 33 34 35
extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);

#ifndef BUILD_VDSO32

36
static notrace cycle_t vread_hpet(void)
37
{
38 39
	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
}
40

41 42 43 44 45 46
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;
	asm("syscall" : "=a" (ret) :
	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
	return ret;
47 48
}

49
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
50
{
51 52 53 54 55
	long ret;

	asm("syscall" : "=a" (ret) :
	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
	return ret;
56 57
}

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
#ifdef CONFIG_PARAVIRT_CLOCK

static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
	const struct pvclock_vsyscall_time_info *pvti_base;
	int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
	int offset = cpu % (PAGE_SIZE/PVTI_SIZE);

	BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);

	pvti_base = (struct pvclock_vsyscall_time_info *)
		    __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);

	return &pvti_base[offset];
}

static notrace cycle_t vread_pvclock(int *mode)
{
	const struct pvclock_vsyscall_time_info *pvti;
	cycle_t ret;
	u64 last;
	u32 version;
	u8 flags;
	unsigned cpu, cpu1;


	/*
85 86 87 88 89 90 91
	 * Note: hypervisor must guarantee that:
	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
	 * 2. that per-CPU pvclock time info is updated if the
	 *    underlying CPU changes.
	 * 3. that version is increased whenever underlying CPU
	 *    changes.
	 *
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
	 */
	do {
		cpu = __getcpu() & VGETCPU_CPU_MASK;
		/* TODO: We can put vcpu id into higher bits of pvti.version.
		 * This will save a couple of cycles by getting rid of
		 * __getcpu() calls (Gleb).
		 */

		pvti = get_pvti(cpu);

		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);

		/*
		 * Test we're still on the cpu as well as the version.
		 * We could have been migrated just after the first
		 * vgetcpu but before fetching the version, so we
		 * wouldn't notice a version change.
		 */
		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
	} while (unlikely(cpu != cpu1 ||
			  (pvti->pvti.version & 1) ||
113
			  pvti->pvti.version != version));
114 115 116 117 118

	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
		*mode = VCLOCK_NONE;

	/* refer to tsc.c read_tsc() comment for rationale */
119
	last = gtod->clock.cycle_last;
120 121 122 123 124 125 126 127

	if (likely(ret >= last))
		return ret;

	return last;
}
#endif

128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
#else

extern u8 hpet_page
	__attribute__((visibility("hidden")));

#ifdef CONFIG_HPET_TIMER
static notrace cycle_t vread_hpet(void)
{
	return readl((const void __iomem *)(&hpet_page + HPET_COUNTER));
}
#endif

notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call VDSO32_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
		: "memory", "edx");
	return ret;
}

notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call VDSO32_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
		: "memory", "edx");
	return ret;
}

#ifdef CONFIG_PARAVIRT_CLOCK

static notrace cycle_t vread_pvclock(int *mode)
{
	*mode = VCLOCK_NONE;
	return 0;
}
#endif

#endif

181
notrace static cycle_t vread_tsc(void)
182
{
183 184
	cycle_t ret;
	u64 last;
185

186 187 188 189 190 191 192 193
	/*
	 * Empirically, a fence (of type that depends on the CPU)
	 * before rdtsc is enough to ensure that rdtsc is ordered
	 * with respect to loads.  The various CPU manuals are unclear
	 * as to whether rdtsc can be reordered with later loads,
	 * but no one has ever seen it happen.
	 */
	rdtsc_barrier();
194
	ret = (cycle_t)__native_read_tsc();
195

196
	last = gtod->clock.cycle_last;
197

198 199 200 201 202 203 204 205 206 207 208 209 210 211
	if (likely(ret >= last))
		return ret;

	/*
	 * GCC likes to generate cmov here, but this branch is extremely
	 * predictable (it's just a funciton of time and the likely is
	 * very likely) and there's a data dependence, so force GCC
	 * to generate a branch instead.  I don't barrier() because
	 * we don't actually need a barrier, and if this function
	 * ever gets inlined it will generate worse code.
	 */
	asm volatile ("");
	return last;
}
212

213
notrace static inline u64 vgetsns(int *mode)
214
{
215
	u64 v;
216 217 218
	cycles_t cycles;
	if (gtod->clock.vclock_mode == VCLOCK_TSC)
		cycles = vread_tsc();
219
#ifdef CONFIG_HPET_TIMER
220
	else if (gtod->clock.vclock_mode == VCLOCK_HPET)
221
		cycles = vread_hpet();
222
#endif
223 224 225 226
#ifdef CONFIG_PARAVIRT_CLOCK
	else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
		cycles = vread_pvclock(mode);
#endif
227 228
	else
		return 0;
229
	v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
230
	return v * gtod->clock.mult;
231 232
}

233 234
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
235
{
236 237
	unsigned long seq;
	u64 ns;
238 239
	int mode;

240
	ts->tv_nsec = 0;
241
	do {
242
		seq = raw_read_seqcount_begin(&gtod->seq);
243
		mode = gtod->clock.vclock_mode;
244
		ts->tv_sec = gtod->wall_time_sec;
245
		ns = gtod->wall_time_snsec;
246
		ns += vgetsns(&mode);
247
		ns >>= gtod->clock.shift;
248
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
249

250
	timespec_add_ns(ts, ns);
251
	return mode;
252 253
}

254
notrace static int __always_inline do_monotonic(struct timespec *ts)
255
{
256 257
	unsigned long seq;
	u64 ns;
258 259
	int mode;

260
	ts->tv_nsec = 0;
261
	do {
262
		seq = raw_read_seqcount_begin(&gtod->seq);
263
		mode = gtod->clock.vclock_mode;
264
		ts->tv_sec = gtod->monotonic_time_sec;
265
		ns = gtod->monotonic_time_snsec;
266
		ns += vgetsns(&mode);
267
		ns >>= gtod->clock.shift;
268
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
269
	timespec_add_ns(ts, ns);
270

271
	return mode;
272 273
}

274
notrace static void do_realtime_coarse(struct timespec *ts)
275 276 277
{
	unsigned long seq;
	do {
278
		seq = raw_read_seqcount_begin(&gtod->seq);
279 280
		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
281
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
282 283
}

284
notrace static void do_monotonic_coarse(struct timespec *ts)
285
{
286
	unsigned long seq;
287
	do {
288
		seq = raw_read_seqcount_begin(&gtod->seq);
289 290
		ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
291
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
292 293
}

294
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
295
{
296 297
	switch (clock) {
	case CLOCK_REALTIME:
298 299
		if (do_realtime(ts) == VCLOCK_NONE)
			goto fallback;
300 301
		break;
	case CLOCK_MONOTONIC:
302 303
		if (do_monotonic(ts) == VCLOCK_NONE)
			goto fallback;
304 305
		break;
	case CLOCK_REALTIME_COARSE:
306 307
		do_realtime_coarse(ts);
		break;
308
	case CLOCK_MONOTONIC_COARSE:
309 310 311 312
		do_monotonic_coarse(ts);
		break;
	default:
		goto fallback;
313 314
	}

315
	return 0;
316 317
fallback:
	return vdso_fallback_gettime(clock, ts);
318 319 320 321
}
int clock_gettime(clockid_t, struct timespec *)
	__attribute__((weak, alias("__vdso_clock_gettime")));

322
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
323
{
324 325 326 327
	if (likely(tv != NULL)) {
		BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
			     offsetof(struct timespec, tv_nsec) ||
			     sizeof(*tv) != sizeof(struct timespec));
328 329
		if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
			return vdso_fallback_gtod(tv, tz);
330
		tv->tv_usec /= 1000;
331
	}
332 333 334 335 336 337 338
	if (unlikely(tz != NULL)) {
		/* Avoid memcpy. Some old compilers fail to inline it */
		tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
		tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
	}

	return 0;
339 340 341
}
int gettimeofday(struct timeval *, struct timezone *)
	__attribute__((weak, alias("__vdso_gettimeofday")));
A
Andy Lutomirski 已提交
342

343 344 345 346
/*
 * This will break when the xtime seconds get inaccurate, but that is
 * unlikely
 */
A
Andy Lutomirski 已提交
347 348
notrace time_t __vdso_time(time_t *t)
{
349
	/* This is atomic on x86 so we don't need any locks. */
350
	time_t result = ACCESS_ONCE(gtod->wall_time_sec);
A
Andy Lutomirski 已提交
351 352 353 354 355 356 357

	if (t)
		*t = result;
	return result;
}
int time(time_t *t)
	__attribute__((weak, alias("__vdso_time")));