sched_clock.c 6.2 KB
Newer Older
1 2 3 4 5
/*
 * sched_clock for unstable cpu clocks
 *
 *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
6 7 8
 *  Updates and enhancements:
 *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
 *
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 * Based on code by:
 *   Ingo Molnar <mingo@redhat.com>
 *   Guillaume Chazarain <guichaz@gmail.com>
 *
 * Create a semi stable clock from a mixture of other events, including:
 *  - gtod
 *  - jiffies
 *  - sched_clock()
 *  - explicit idle events
 *
 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
 * making it monotonic and keeping it within an expected window.  This window
 * is set up using jiffies.
 *
 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
 * that is otherwise invisible (TSC gets stopped).
 *
 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
 * consistent between cpus (never more than 1 jiffies difference).
 */
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/spinlock.h>
#include <linux/ktime.h>
#include <linux/module.h>

35 36 37 38 39 40 41 42 43
/*
 * Scheduler clock - returns current time in nanosec units.
 * This is default implementation.
 * Architectures and sub-architectures can override this.
 */
unsigned long long __attribute__((weak)) sched_clock(void)
{
	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
}
44

45 46
static __read_mostly int sched_clock_running;

47 48 49 50 51 52 53 54 55 56
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK

struct sched_clock_data {
	/*
	 * Raw spinlock - this is a special case: this might be called
	 * from within instrumentation code so we dont want to do any
	 * instrumentation ourselves.
	 */
	raw_spinlock_t		lock;

57
	unsigned long		tick_jiffies;
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
	u64			tick_raw;
	u64			tick_gtod;
	u64			clock;
};

static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);

static inline struct sched_clock_data *this_scd(void)
{
	return &__get_cpu_var(sched_clock_data);
}

static inline struct sched_clock_data *cpu_sdc(int cpu)
{
	return &per_cpu(sched_clock_data, cpu);
}

void sched_clock_init(void)
{
	u64 ktime_now = ktime_to_ns(ktime_get());
P
Peter Zijlstra 已提交
78
	unsigned long now_jiffies = jiffies;
79 80 81 82 83 84
	int cpu;

	for_each_possible_cpu(cpu) {
		struct sched_clock_data *scd = cpu_sdc(cpu);

		scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
85
		scd->tick_jiffies = now_jiffies;
P
Peter Zijlstra 已提交
86
		scd->tick_raw = 0;
87 88 89
		scd->tick_gtod = ktime_now;
		scd->clock = ktime_now;
	}
P
Peter Zijlstra 已提交
90 91

	sched_clock_running = 1;
92 93 94 95 96 97 98 99
}

/*
 * update the percpu scd from the raw @now value
 *
 *  - filter out backward motion
 *  - use jiffies to generate a min,max window to clip the raw values
 */
100
static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
101 102
{
	unsigned long now_jiffies = jiffies;
103
	long delta_jiffies = now_jiffies - scd->tick_jiffies;
104 105
	u64 clock = scd->clock;
	u64 min_clock, max_clock;
I
Ingo Molnar 已提交
106
	s64 delta = now - scd->tick_raw;
107 108

	WARN_ON_ONCE(!irqs_disabled());
109
	min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
110 111 112 113 114 115

	if (unlikely(delta < 0)) {
		clock++;
		goto out;
	}

116
	max_clock = min_clock + TICK_NSEC;
117

118
	if (unlikely(clock + delta > max_clock)) {
119 120 121 122 123 124 125 126 127 128 129 130
		if (clock < max_clock)
			clock = max_clock;
		else
			clock++;
	} else {
		clock += delta;
	}

 out:
	if (unlikely(clock < min_clock))
		clock = min_clock;

131 132
	scd->tick_jiffies = now_jiffies;
	scd->clock = clock;
133 134

	return clock;
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

static void lock_double_clock(struct sched_clock_data *data1,
				struct sched_clock_data *data2)
{
	if (data1 < data2) {
		__raw_spin_lock(&data1->lock);
		__raw_spin_lock(&data2->lock);
	} else {
		__raw_spin_lock(&data2->lock);
		__raw_spin_lock(&data1->lock);
	}
}

u64 sched_clock_cpu(int cpu)
{
	struct sched_clock_data *scd = cpu_sdc(cpu);
152
	u64 now, clock, this_clock, remote_clock;
153

P
Peter Zijlstra 已提交
154 155 156
	if (unlikely(!sched_clock_running))
		return 0ull;

157 158 159 160 161 162 163 164
	WARN_ON_ONCE(!irqs_disabled());
	now = sched_clock();

	if (cpu != raw_smp_processor_id()) {
		struct sched_clock_data *my_scd = this_scd();

		lock_double_clock(scd, my_scd);

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
		this_clock = __update_sched_clock(my_scd, now);
		remote_clock = scd->clock;

		/*
		 * Use the opportunity that we have both locks
		 * taken to couple the two clocks: we take the
		 * larger time as the latest time for both
		 * runqueues. (this creates monotonic movement)
		 */
		if (likely(remote_clock < this_clock)) {
			clock = this_clock;
			scd->clock = clock;
		} else {
			/*
			 * Should be rare, but possible:
			 */
			clock = remote_clock;
			my_scd->clock = remote_clock;
		}
184 185 186 187

		__raw_spin_unlock(&my_scd->lock);
	} else {
		__raw_spin_lock(&scd->lock);
188
		clock = __update_sched_clock(scd, now);
189 190
	}

191 192
	__raw_spin_unlock(&scd->lock);

193 194 195 196 197 198 199 200
	return clock;
}

void sched_clock_tick(void)
{
	struct sched_clock_data *scd = this_scd();
	u64 now, now_gtod;

P
Peter Zijlstra 已提交
201 202 203
	if (unlikely(!sched_clock_running))
		return;

204 205 206
	WARN_ON_ONCE(!irqs_disabled());

	now_gtod = ktime_to_ns(ktime_get());
207
	now = sched_clock();
208 209

	__raw_spin_lock(&scd->lock);
210
	__update_sched_clock(scd, now);
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
	/*
	 * update tick_gtod after __update_sched_clock() because that will
	 * already observe 1 new jiffy; adding a new tick_gtod to that would
	 * increase the clock 2 jiffies.
	 */
	scd->tick_raw = now;
	scd->tick_gtod = now_gtod;
	__raw_spin_unlock(&scd->lock);
}

/*
 * We are going deep-idle (irqs are disabled):
 */
void sched_clock_idle_sleep_event(void)
{
	sched_clock_cpu(smp_processor_id());
}
EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);

/*
 * We just idled delta nanoseconds (called with irqs disabled):
 */
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
	struct sched_clock_data *scd = this_scd();

	/*
	 * Override the previous timestamp and ignore all
	 * sched_clock() deltas that occured while we idled,
	 * and use the PM-provided delta_ns to advance the
	 * rq clock:
	 */
	__raw_spin_lock(&scd->lock);
	scd->clock += delta_ns;
	__raw_spin_unlock(&scd->lock);

	touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */

void sched_clock_init(void)
{
	sched_clock_running = 1;
}

u64 sched_clock_cpu(int cpu)
{
	if (unlikely(!sched_clock_running))
		return 0;

	return sched_clock();
}

266 267
#endif

268 269 270 271 272
unsigned long long cpu_clock(int cpu)
{
	unsigned long long clock;
	unsigned long flags;

273
	local_irq_save(flags);
274
	clock = sched_clock_cpu(cpu);
275
	local_irq_restore(flags);
276 277 278

	return clock;
}
I
Ingo Molnar 已提交
279
EXPORT_SYMBOL_GPL(cpu_clock);