time.c 6.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/******************************************************************************
 * arch/ia64/xen/time.c
 *
 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
 *                    VA Linux Systems Japan K.K.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include <linux/delay.h>
#include <linux/kernel_stat.h>
#include <linux/posix-timers.h>
#include <linux/irq.h>
#include <linux/clocksource.h>

29 30
#include <asm/timex.h>

31 32 33 34 35 36
#include <asm/xen/hypervisor.h>

#include <xen/interface/vcpu.h>

#include "../kernel/fsyscall_gtod_data.h"

37 38 39
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
static DEFINE_PER_CPU(unsigned long, xen_stolen_time);
static DEFINE_PER_CPU(unsigned long, xen_blocked_time);
40 41 42 43 44

/* taken from i386/kernel/time-xen.c */
static void xen_init_missing_ticks_accounting(int cpu)
{
	struct vcpu_register_runstate_memory_area area;
45
	struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu);
46 47 48 49 50 51 52 53 54
	int rc;

	memset(runstate, 0, sizeof(*runstate));

	area.addr.v = runstate;
	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
				&area);
	WARN_ON(rc && rc != -ENOSYS);

55 56
	per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
	per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
57 58 59 60 61 62 63 64 65 66 67 68 69 70
					    + runstate->time[RUNSTATE_offline];
}

/*
 * Runstate accounting
 */
/* stolen from arch/x86/xen/time.c */
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
{
	u64 state_time;
	struct vcpu_runstate_info *state;

	BUG_ON(preemptible());

71
	state = &__get_cpu_var(xen_runstate);
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

	/*
	 * The runstate info is always updated by the hypervisor on
	 * the current CPU, so there's no need to use anything
	 * stronger than a compiler barrier when fetching it.
	 */
	do {
		state_time = state->state_entry_time;
		rmb();
		*res = *state;
		rmb();
	} while (state->state_entry_time != state_time);
}

#define NS_PER_TICK (1000000000LL/HZ)

static unsigned long
consider_steal_time(unsigned long new_itm)
{
	unsigned long stolen, blocked;
	unsigned long delta_itm = 0, stolentick = 0;
	int cpu = smp_processor_id();
	struct vcpu_runstate_info runstate;
	struct task_struct *p = current;

	get_runstate_snapshot(&runstate);

	/*
	 * Check for vcpu migration effect
	 * In this case, itc value is reversed.
	 * This causes huge stolen value.
	 * This function just checks and reject this effect.
	 */
	if (!time_after_eq(runstate.time[RUNSTATE_blocked],
106
			   per_cpu(xen_blocked_time, cpu)))
107 108 109 110
		blocked = 0;

	if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
			   runstate.time[RUNSTATE_offline],
111
			   per_cpu(xen_stolen_time, cpu)))
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
		stolen = 0;

	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
		stolentick = ia64_get_itc() - new_itm;

	do_div(stolentick, NS_PER_TICK);
	stolentick++;

	do_div(stolen, NS_PER_TICK);

	if (stolen > stolentick)
		stolen = stolentick;

	stolentick -= stolen;
	do_div(blocked, NS_PER_TICK);

	if (blocked > stolentick)
		blocked = stolentick;

	if (stolen > 0 || blocked > 0) {
132 133
		account_steal_ticks(stolen);
		account_idle_ticks(blocked);
134 135
		run_local_timers();

136
		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
137 138 139 140 141

		scheduler_tick();
		run_posix_cpu_timers(p);
		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);

142 143 144 145 146
		if (cpu == time_keeper_id)
			xtime_update(stolen + blocked);

		local_cpu_data->itm_next = delta_itm + new_itm;

147 148
		per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
		per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	}
	return delta_itm;
}

static int xen_do_steal_accounting(unsigned long *new_itm)
{
	unsigned long delta_itm;
	delta_itm = consider_steal_time(*new_itm);
	*new_itm += delta_itm;
	if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
		return 1;

	return 0;
}

static void xen_itc_jitter_data_reset(void)
{
	u64 lcycle, ret;

	do {
		lcycle = itc_jitter_data.itc_lastcycle;
		ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
	} while (unlikely(ret != lcycle));
}

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
/* based on xen_sched_clock() in arch/x86/xen/time.c. */
/*
 * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
 * something similar logic should be implemented here.
 */
/*
 * Xen sched_clock implementation.  Returns the number of unstolen
 * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
 * states.
 */
static unsigned long long xen_sched_clock(void)
{
	struct vcpu_runstate_info runstate;

	unsigned long long now;
	unsigned long long offset;
	unsigned long long ret;

	/*
	 * Ideally sched_clock should be called on a per-cpu basis
	 * anyway, so preempt should already be disabled, but that's
	 * not current practice at the moment.
	 */
	preempt_disable();

	/*
	 * both ia64_native_sched_clock() and xen's runstate are
	 * based on mAR.ITC. So difference of them makes sense.
	 */
	now = ia64_native_sched_clock();

	get_runstate_snapshot(&runstate);

	WARN_ON(runstate.state != RUNSTATE_running);

	offset = 0;
	if (now > runstate.state_entry_time)
		offset = now - runstate.state_entry_time;
	ret = runstate.time[RUNSTATE_blocked] +
		runstate.time[RUNSTATE_running] +
		offset;

	preempt_enable();

	return ret;
}

221 222 223 224
struct pv_time_ops xen_time_ops __initdata = {
	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
	.do_steal_accounting		= xen_do_steal_accounting,
	.clocksource_resume		= xen_itc_jitter_data_reset,
225
	.sched_clock			= xen_sched_clock,
226
};
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257

/* Called after suspend, to resume time.  */
static void xen_local_tick_resume(void)
{
	/* Just trigger a tick.  */
	ia64_cpu_local_tick();
	touch_softlockup_watchdog();
}

void
xen_timer_resume(void)
{
	unsigned int cpu;

	xen_local_tick_resume();

	for_each_online_cpu(cpu)
		xen_init_missing_ticks_accounting(cpu);
}

static void ia64_cpu_local_tick_fn(void *unused)
{
	xen_local_tick_resume();
	xen_init_missing_ticks_accounting(smp_processor_id());
}

void
xen_timer_resume_on_aps(void)
{
	smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
}