vclock_gettime.c 7.4 KB
Newer Older
1 2 3 4
/*
 * Copyright 2006 Andi Kleen, SUSE Labs.
 * Subject to the GNU Public License, v.2
 *
A
Andy Lutomirski 已提交
5
 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 7 8 9 10
 *
 * The code should have no internal unresolved relocations.
 * Check with readelf after changing.
 */

11
/* Disable profiling for userspace code: */
12
#define DISABLE_BRANCH_PROFILING
13

14 15 16 17 18
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
#include <linux/string.h>
#include <asm/vsyscall.h>
19
#include <asm/fixmap.h>
20 21 22 23 24
#include <asm/vgtod.h>
#include <asm/timex.h>
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
25
#include <asm/pvclock.h>
26

27
#define gtod (&VVAR(vsyscall_gtod_data))
28

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
notrace static cycle_t vread_tsc(void)
{
	cycle_t ret;
	u64 last;

	/*
	 * Empirically, a fence (of type that depends on the CPU)
	 * before rdtsc is enough to ensure that rdtsc is ordered
	 * with respect to loads.  The various CPU manuals are unclear
	 * as to whether rdtsc can be reordered with later loads,
	 * but no one has ever seen it happen.
	 */
	rdtsc_barrier();
	ret = (cycle_t)vget_cycles();

	last = VVAR(vsyscall_gtod_data).clock.cycle_last;

	if (likely(ret >= last))
		return ret;

	/*
	 * GCC likes to generate cmov here, but this branch is extremely
	 * predictable (it's just a funciton of time and the likely is
	 * very likely) and there's a data dependence, so force GCC
	 * to generate a branch instead.  I don't barrier() because
	 * we don't actually need a barrier, and if this function
	 * ever gets inlined it will generate worse code.
	 */
	asm volatile ("");
	return last;
}

static notrace cycle_t vread_hpet(void)
{
	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
}

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
#ifdef CONFIG_PARAVIRT_CLOCK

static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
	const struct pvclock_vsyscall_time_info *pvti_base;
	int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
	int offset = cpu % (PAGE_SIZE/PVTI_SIZE);

	BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);

	pvti_base = (struct pvclock_vsyscall_time_info *)
		    __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);

	return &pvti_base[offset];
}

static notrace cycle_t vread_pvclock(int *mode)
{
	const struct pvclock_vsyscall_time_info *pvti;
	cycle_t ret;
	u64 last;
	u32 version;
	u32 migrate_count;
	u8 flags;
	unsigned cpu, cpu1;


	/*
	 * When looping to get a consistent (time-info, tsc) pair, we
	 * also need to deal with the possibility we can switch vcpus,
	 * so make sure we always re-fetch time-info for the current vcpu.
	 */
	do {
		cpu = __getcpu() & VGETCPU_CPU_MASK;
		/* TODO: We can put vcpu id into higher bits of pvti.version.
		 * This will save a couple of cycles by getting rid of
		 * __getcpu() calls (Gleb).
		 */

		pvti = get_pvti(cpu);

		migrate_count = pvti->migrate_count;

		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);

		/*
		 * Test we're still on the cpu as well as the version.
		 * We could have been migrated just after the first
		 * vgetcpu but before fetching the version, so we
		 * wouldn't notice a version change.
		 */
		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
	} while (unlikely(cpu != cpu1 ||
			  (pvti->pvti.version & 1) ||
			  pvti->pvti.version != version ||
			  pvti->migrate_count != migrate_count));

	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
		*mode = VCLOCK_NONE;

	/* refer to tsc.c read_tsc() comment for rationale */
	last = VVAR(vsyscall_gtod_data).clock.cycle_last;

	if (likely(ret >= last))
		return ret;

	return last;
}
#endif

136
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
137 138 139 140 141 142 143
{
	long ret;
	asm("syscall" : "=a" (ret) :
	    "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
	return ret;
}

144 145 146 147 148 149 150 151 152 153
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
	long ret;

	asm("syscall" : "=a" (ret) :
	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
	return ret;
}


154
notrace static inline u64 vgetsns(int *mode)
155
{
156
	long v;
157 158 159
	cycles_t cycles;
	if (gtod->clock.vclock_mode == VCLOCK_TSC)
		cycles = vread_tsc();
160
	else if (gtod->clock.vclock_mode == VCLOCK_HPET)
161
		cycles = vread_hpet();
162 163 164 165
#ifdef CONFIG_PARAVIRT_CLOCK
	else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
		cycles = vread_pvclock(mode);
#endif
166 167
	else
		return 0;
168
	v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
169
	return v * gtod->clock.mult;
170 171
}

172 173
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
174
{
175 176
	unsigned long seq;
	u64 ns;
177 178
	int mode;

179
	ts->tv_nsec = 0;
180
	do {
181
		seq = read_seqcount_begin(&gtod->seq);
182
		mode = gtod->clock.vclock_mode;
183
		ts->tv_sec = gtod->wall_time_sec;
184
		ns = gtod->wall_time_snsec;
185
		ns += vgetsns(&mode);
186
		ns >>= gtod->clock.shift;
187
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
188

189
	timespec_add_ns(ts, ns);
190
	return mode;
191 192
}

193
notrace static int do_monotonic(struct timespec *ts)
194
{
195 196
	unsigned long seq;
	u64 ns;
197 198
	int mode;

199
	ts->tv_nsec = 0;
200
	do {
201
		seq = read_seqcount_begin(&gtod->seq);
202
		mode = gtod->clock.vclock_mode;
203
		ts->tv_sec = gtod->monotonic_time_sec;
204
		ns = gtod->monotonic_time_snsec;
205
		ns += vgetsns(&mode);
206
		ns >>= gtod->clock.shift;
207
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
208
	timespec_add_ns(ts, ns);
209

210
	return mode;
211 212
}

213
notrace static int do_realtime_coarse(struct timespec *ts)
214 215 216
{
	unsigned long seq;
	do {
217
		seq = read_seqcount_begin(&gtod->seq);
218 219
		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
220
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
221 222 223
	return 0;
}

224
notrace static int do_monotonic_coarse(struct timespec *ts)
225
{
226
	unsigned long seq;
227
	do {
228
		seq = read_seqcount_begin(&gtod->seq);
229 230
		ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
231
	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
232

233 234 235
	return 0;
}

236
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
237
{
238 239
	int ret = VCLOCK_NONE;

240 241
	switch (clock) {
	case CLOCK_REALTIME:
242
		ret = do_realtime(ts);
243 244
		break;
	case CLOCK_MONOTONIC:
245
		ret = do_monotonic(ts);
246 247 248 249 250 251 252
		break;
	case CLOCK_REALTIME_COARSE:
		return do_realtime_coarse(ts);
	case CLOCK_MONOTONIC_COARSE:
		return do_monotonic_coarse(ts);
	}

253 254 255
	if (ret == VCLOCK_NONE)
		return vdso_fallback_gettime(clock, ts);
	return 0;
256 257 258 259
}
int clock_gettime(clockid_t, struct timespec *)
	__attribute__((weak, alias("__vdso_clock_gettime")));

260
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
261
{
262 263 264 265 266 267 268 269
	long ret = VCLOCK_NONE;

	if (likely(tv != NULL)) {
		BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
			     offsetof(struct timespec, tv_nsec) ||
			     sizeof(*tv) != sizeof(struct timespec));
		ret = do_realtime((struct timespec *)tv);
		tv->tv_usec /= 1000;
270
	}
271 272 273 274 275 276 277 278 279
	if (unlikely(tz != NULL)) {
		/* Avoid memcpy. Some old compilers fail to inline it */
		tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
		tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
	}

	if (ret == VCLOCK_NONE)
		return vdso_fallback_gtod(tv, tz);
	return 0;
280 281 282
}
int gettimeofday(struct timeval *, struct timezone *)
	__attribute__((weak, alias("__vdso_gettimeofday")));
A
Andy Lutomirski 已提交
283

284 285 286 287
/*
 * This will break when the xtime seconds get inaccurate, but that is
 * unlikely
 */
A
Andy Lutomirski 已提交
288 289
notrace time_t __vdso_time(time_t *t)
{
A
Andy Lutomirski 已提交
290
	/* This is atomic on x86_64 so we don't need any locks. */
291
	time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
A
Andy Lutomirski 已提交
292 293 294 295 296 297 298

	if (t)
		*t = result;
	return result;
}
int time(time_t *t)
	__attribute__((weak, alias("__vdso_time")));