vclock_gettime.c 7.2 KB
Newer Older
1 2 3 4
/*
 * Copyright 2006 Andi Kleen, SUSE Labs.
 * Subject to the GNU Public License, v.2
 *
A
Andy Lutomirski 已提交
5
 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6
 *
7 8 9
 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
 *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
 *
10 11 12 13
 * The code should have no internal unresolved relocations.
 * Check with readelf after changing.
 */

14
#include <uapi/linux/time.h>
15
#include <asm/vgtod.h>
16
#include <asm/vvar.h>
17
#include <asm/unistd.h>
18
#include <asm/msr.h>
19
#include <asm/pvclock.h>
20 21
#include <linux/math64.h>
#include <linux/time.h>
22
#include <linux/kernel.h>
23

24
#define gtod (&VVAR(vsyscall_gtod_data))
25

26 27 28 29
extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);

30 31 32 33 34
#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
	__attribute__((visibility("hidden")));
#endif

35 36
#ifndef BUILD_VDSO32

37 38 39 40 41 42
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;
	asm("syscall" : "=a" (ret) :
	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
	return ret;
43 44
}

45
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
46
{
47 48 49 50 51
	long ret;

	asm("syscall" : "=a" (ret) :
	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
	return ret;
52 53
}

54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#else

notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call __kernel_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
		: "memory", "edx");
	return ret;
}

notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
	long ret;

	asm(
		"mov %%ebx, %%edx \n"
		"mov %2, %%ebx \n"
		"call __kernel_vsyscall \n"
		"mov %%edx, %%ebx \n"
		: "=a" (ret)
		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
		: "memory", "edx");
	return ret;
}

#endif

#ifdef CONFIG_PARAVIRT_CLOCK
90
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
91
{
92
	return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
93 94 95 96
}

static notrace cycle_t vread_pvclock(int *mode)
{
97
	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
98
	cycle_t ret;
99 100
	u64 last;
	u32 version;
101 102

	/*
103 104 105 106 107 108 109 110 111 112 113
	 * Note: The kernel and hypervisor must guarantee that cpu ID
	 * number maps 1:1 to per-CPU pvclock time info.
	 *
	 * Because the hypervisor is entirely unaware of guest userspace
	 * preemption, it cannot guarantee that per-CPU pvclock time
	 * info is updated if the underlying CPU changes or that that
	 * version is increased whenever underlying CPU changes.
	 *
	 * On KVM, we are guaranteed that pvti updates for any vCPU are
	 * atomic as seen by *all* vCPUs.  This is an even stronger
	 * guarantee than we get with a normal seqlock.
114
	 *
115 116
	 * On Xen, we don't appear to have that guarantee, but Xen still
	 * supplies a valid seqlock using the version field.
117
	 *
118 119 120 121
	 * We only do pvclock vdso timing at all if
	 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
	 * mean that all vCPUs have matching pvti and that the TSC is
	 * synced, so we can just look at vCPU 0's pvti.
122
	 */
123 124

	do {
125
		version = pvclock_read_begin(pvti);
126

127 128 129 130 131
		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
			*mode = VCLOCK_NONE;
			return 0;
		}

132
		ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
133
	} while (pvclock_read_retry(pvti, version));
134

135
	/* refer to vread_tsc() comment for rationale */
136
	last = gtod->cycle_last;
137 138 139 140 141 142 143 144

	if (likely(ret >= last))
		return ret;

	return last;
}
#endif

145
notrace static cycle_t vread_tsc(void)
146
{
147 148
	cycle_t ret = (cycle_t)rdtsc_ordered();
	u64 last = gtod->cycle_last;
149

150 151 152 153 154
	if (likely(ret >= last))
		return ret;

	/*
	 * GCC likes to generate cmov here, but this branch is extremely
155
	 * predictable (it's just a function of time and the likely is
156 157 158 159 160 161 162 163
	 * very likely) and there's a data dependence, so force GCC
	 * to generate a branch instead.  I don't barrier() because
	 * we don't actually need a barrier, and if this function
	 * ever gets inlined it will generate worse code.
	 */
	asm volatile ("");
	return last;
}
164

165
notrace static inline u64 vgetsns(int *mode)
166
{
167
	u64 v;
168
	cycles_t cycles;
169 170

	if (gtod->vclock_mode == VCLOCK_TSC)
171
		cycles = vread_tsc();
172
#ifdef CONFIG_PARAVIRT_CLOCK
173
	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
174 175
		cycles = vread_pvclock(mode);
#endif
176 177
	else
		return 0;
178 179
	v = (cycles - gtod->cycle_last) & gtod->mask;
	return v * gtod->mult;
180 181
}

182 183
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
184
{
185 186
	unsigned long seq;
	u64 ns;
187 188
	int mode;

189
	do {
190 191
		seq = gtod_read_begin(gtod);
		mode = gtod->vclock_mode;
192
		ts->tv_sec = gtod->wall_time_sec;
193
		ns = gtod->wall_time_snsec;
194
		ns += vgetsns(&mode);
195 196 197 198 199
		ns >>= gtod->shift;
	} while (unlikely(gtod_read_retry(gtod, seq)));

	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
	ts->tv_nsec = ns;
200 201

	return mode;
202 203
}

204
notrace static int __always_inline do_monotonic(struct timespec *ts)
205
{
206 207
	unsigned long seq;
	u64 ns;
208 209
	int mode;

210
	do {
211 212
		seq = gtod_read_begin(gtod);
		mode = gtod->vclock_mode;
213
		ts->tv_sec = gtod->monotonic_time_sec;
214
		ns = gtod->monotonic_time_snsec;
215
		ns += vgetsns(&mode);
216 217 218 219 220
		ns >>= gtod->shift;
	} while (unlikely(gtod_read_retry(gtod, seq)));

	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
	ts->tv_nsec = ns;
221

222
	return mode;
223 224
}

225
notrace static void do_realtime_coarse(struct timespec *ts)
226 227 228
{
	unsigned long seq;
	do {
229 230 231 232
		seq = gtod_read_begin(gtod);
		ts->tv_sec = gtod->wall_time_coarse_sec;
		ts->tv_nsec = gtod->wall_time_coarse_nsec;
	} while (unlikely(gtod_read_retry(gtod, seq)));
233 234
}

235
notrace static void do_monotonic_coarse(struct timespec *ts)
236
{
237
	unsigned long seq;
238
	do {
239 240 241 242
		seq = gtod_read_begin(gtod);
		ts->tv_sec = gtod->monotonic_time_coarse_sec;
		ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
	} while (unlikely(gtod_read_retry(gtod, seq)));
243 244
}

245
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
246
{
247 248
	switch (clock) {
	case CLOCK_REALTIME:
249 250
		if (do_realtime(ts) == VCLOCK_NONE)
			goto fallback;
251 252
		break;
	case CLOCK_MONOTONIC:
253 254
		if (do_monotonic(ts) == VCLOCK_NONE)
			goto fallback;
255 256
		break;
	case CLOCK_REALTIME_COARSE:
257 258
		do_realtime_coarse(ts);
		break;
259
	case CLOCK_MONOTONIC_COARSE:
260 261 262 263
		do_monotonic_coarse(ts);
		break;
	default:
		goto fallback;
264 265
	}

266
	return 0;
267 268
fallback:
	return vdso_fallback_gettime(clock, ts);
269 270 271 272
}
int clock_gettime(clockid_t, struct timespec *)
	__attribute__((weak, alias("__vdso_clock_gettime")));

273
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
274
{
275
	if (likely(tv != NULL)) {
276 277
		if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
			return vdso_fallback_gtod(tv, tz);
278
		tv->tv_usec /= 1000;
279
	}
280
	if (unlikely(tz != NULL)) {
281 282
		tz->tz_minuteswest = gtod->tz_minuteswest;
		tz->tz_dsttime = gtod->tz_dsttime;
283 284 285
	}

	return 0;
286 287 288
}
int gettimeofday(struct timeval *, struct timezone *)
	__attribute__((weak, alias("__vdso_gettimeofday")));
A
Andy Lutomirski 已提交
289

290 291 292 293
/*
 * This will break when the xtime seconds get inaccurate, but that is
 * unlikely
 */
A
Andy Lutomirski 已提交
294 295
notrace time_t __vdso_time(time_t *t)
{
296
	/* This is atomic on x86 so we don't need any locks. */
297
	time_t result = ACCESS_ONCE(gtod->wall_time_sec);
A
Andy Lutomirski 已提交
298 299 300 301 302 303 304

	if (t)
		*t = result;
	return result;
}
int time(time_t *t)
	__attribute__((weak, alias("__vdso_time")));