hypercalls.c 10.6 KB
Newer Older
1 2 3 4 5 6 7
/*P:500 Just as userspace programs request kernel operations through a system
 * call, the Guest requests Host operations through a "hypercall".  You might
 * notice this nomenclature doesn't really follow any logic, but the name has
 * been around for long enough that we're stuck with it.  As you'd expect, this
 * code is basically a one big switch statement. :*/

/*  Copyright (C) 2006 Rusty Russell IBM Corporation
R
Rusty Russell 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
*/
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <irq_vectors.h>
#include "lg.h"

R
Rusty Russell 已提交
31 32 33 34 35
/*H:120 This is the core hypercall routine: where the Guest gets what it
 * wants.  Or gets killed.  Or, in the case of LHCALL_CRASH, both.
 *
 * Remember from the Guest: %eax == which call to make, and the arguments are
 * packed into %edx, %ebx and %ecx if needed. */
R
Rusty Russell 已提交
36 37 38 39
static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
{
	switch (regs->eax) {
	case LHCALL_FLUSH_ASYNC:
R
Rusty Russell 已提交
40 41
		/* This call does nothing, except by breaking out of the Guest
		 * it makes us process all the asynchronous hypercalls. */
R
Rusty Russell 已提交
42 43
		break;
	case LHCALL_LGUEST_INIT:
R
Rusty Russell 已提交
44 45
		/* You can't get here unless you're already initialized.  Don't
		 * do that. */
R
Rusty Russell 已提交
46 47 48
		kill_guest(lg, "already have lguest_data");
		break;
	case LHCALL_CRASH: {
R
Rusty Russell 已提交
49 50
		/* Crash is such a trivial hypercall that we do it in four
		 * lines right here. */
R
Rusty Russell 已提交
51
		char msg[128];
R
Rusty Russell 已提交
52 53
		/* If the lgread fails, it will call kill_guest() itself; the
		 * kill_guest() with the message will be ignored. */
R
Rusty Russell 已提交
54 55 56 57 58 59
		lgread(lg, msg, regs->edx, sizeof(msg));
		msg[sizeof(msg)-1] = '\0';
		kill_guest(lg, "CRASH: %s", msg);
		break;
	}
	case LHCALL_FLUSH_TLB:
R
Rusty Russell 已提交
60 61
		/* FLUSH_TLB comes in two flavors, depending on the
		 * argument: */
R
Rusty Russell 已提交
62 63 64 65 66 67
		if (regs->edx)
			guest_pagetable_clear_all(lg);
		else
			guest_pagetable_flush_user(lg);
		break;
	case LHCALL_BIND_DMA:
R
Rusty Russell 已提交
68 69 70 71 72 73
		/* BIND_DMA really wants four arguments, but it's the only call
		 * which does.  So the Guest packs the number of buffers and
		 * the interrupt number into the final argument, and we decode
		 * it here.  This can legitimately fail, since we currently
		 * place a limit on the number of DMA pools a Guest can have.
		 * So we return true or false from this call. */
R
Rusty Russell 已提交
74 75 76
		regs->eax = bind_dma(lg, regs->edx, regs->ebx,
				     regs->ecx >> 8, regs->ecx & 0xFF);
		break;
R
Rusty Russell 已提交
77 78 79

	/* All these calls simply pass the arguments through to the right
	 * routines. */
R
Rusty Russell 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
	case LHCALL_SEND_DMA:
		send_dma(lg, regs->edx, regs->ebx);
		break;
	case LHCALL_LOAD_GDT:
		load_guest_gdt(lg, regs->edx, regs->ebx);
		break;
	case LHCALL_LOAD_IDT_ENTRY:
		load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx);
		break;
	case LHCALL_NEW_PGTABLE:
		guest_new_pagetable(lg, regs->edx);
		break;
	case LHCALL_SET_STACK:
		guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx);
		break;
	case LHCALL_SET_PTE:
		guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx));
		break;
	case LHCALL_SET_PMD:
		guest_set_pmd(lg, regs->edx, regs->ebx);
		break;
	case LHCALL_LOAD_TLS:
		guest_load_tls(lg, regs->edx);
		break;
	case LHCALL_SET_CLOCKEVENT:
		guest_set_clockevent(lg, regs->edx);
		break;
R
Rusty Russell 已提交
107

R
Rusty Russell 已提交
108
	case LHCALL_TS:
R
Rusty Russell 已提交
109
		/* This sets the TS flag, as we saw used in run_guest(). */
R
Rusty Russell 已提交
110 111 112
		lg->ts = regs->edx;
		break;
	case LHCALL_HALT:
R
Rusty Russell 已提交
113
		/* Similarly, this sets the halted flag for run_guest(). */
R
Rusty Russell 已提交
114 115 116 117 118 119 120
		lg->halted = 1;
		break;
	default:
		kill_guest(lg, "Bad hypercall %li\n", regs->eax);
	}
}

R
Rusty Russell 已提交
121 122 123 124 125 126 127
/* Asynchronous hypercalls are easy: we just look in the array in the Guest's
 * "struct lguest_data" and see if there are any new ones marked "ready".
 *
 * We are careful to do these in order: obviously we respect the order the
 * Guest put them in the ring, but we also promise the Guest that they will
 * happen before any normal hypercall (which is why we check this before
 * checking for a normal hcall). */
R
Rusty Russell 已提交
128 129 130 131 132
static void do_async_hcalls(struct lguest *lg)
{
	unsigned int i;
	u8 st[LHCALL_RING_SIZE];

R
Rusty Russell 已提交
133
	/* For simplicity, we copy the entire call status array in at once. */
R
Rusty Russell 已提交
134 135 136
	if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
		return;

R
Rusty Russell 已提交
137 138

	/* We process "struct lguest_data"s hcalls[] ring once. */
R
Rusty Russell 已提交
139 140
	for (i = 0; i < ARRAY_SIZE(st); i++) {
		struct lguest_regs regs;
R
Rusty Russell 已提交
141 142 143
		/* We remember where we were up to from last time.  This makes
		 * sure that the hypercalls are done in the order the Guest
		 * places them in the ring. */
R
Rusty Russell 已提交
144 145
		unsigned int n = lg->next_hcall;

R
Rusty Russell 已提交
146
		/* 0xFF means there's no call here (yet). */
R
Rusty Russell 已提交
147 148 149
		if (st[n] == 0xFF)
			break;

R
Rusty Russell 已提交
150 151
		/* OK, we have hypercall.  Increment the "next_hcall" cursor,
		 * and wrap back to 0 if we reach the end. */
R
Rusty Russell 已提交
152 153 154
		if (++lg->next_hcall == LHCALL_RING_SIZE)
			lg->next_hcall = 0;

R
Rusty Russell 已提交
155 156
		/* We copy the hypercall arguments into a fake register
		 * structure.  This makes life simple for do_hcall(). */
R
Rusty Russell 已提交
157 158 159 160 161 162 163 164
		if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax)
		    || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx)
		    || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
		    || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) {
			kill_guest(lg, "Fetching async hypercalls");
			break;
		}

R
Rusty Russell 已提交
165
		/* Do the hypercall, same as a normal one. */
R
Rusty Russell 已提交
166
		do_hcall(lg, &regs);
R
Rusty Russell 已提交
167 168

		/* Mark the hypercall done. */
R
Rusty Russell 已提交
169 170 171 172 173
		if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
			kill_guest(lg, "Writing result for async hypercall");
			break;
		}

R
Rusty Russell 已提交
174 175
 		/* Stop doing hypercalls if we've just done a DMA to the
		 * Launcher: it needs to service this first. */
R
Rusty Russell 已提交
176 177 178 179 180
		if (lg->dma_is_pending)
			break;
	}
}

R
Rusty Russell 已提交
181 182
/* Last of all, we look at what happens first of all.  The very first time the
 * Guest makes a hypercall, we end up here to set things up: */
R
Rusty Russell 已提交
183 184 185 186
static void initialize(struct lguest *lg)
{
	u32 tsc_speed;

R
Rusty Russell 已提交
187 188
	/* You can't do anything until you're initialized.  The Guest knows the
	 * rules, so we're unforgiving here. */
R
Rusty Russell 已提交
189 190 191 192 193 194
	if (lg->regs->eax != LHCALL_LGUEST_INIT) {
		kill_guest(lg, "hypercall %li before LGUEST_INIT",
			   lg->regs->eax);
		return;
	}

R
Rusty Russell 已提交
195 196 197 198 199 200 201
	/* We insist that the Time Stamp Counter exist and doesn't change with
	 * cpu frequency.  Some devious chip manufacturers decided that TSC
	 * changes could be handled in software.  I decided that time going
	 * backwards might be good for benchmarks, but it's bad for users.
	 *
	 * We also insist that the TSC be stable: the kernel detects unreliable
	 * TSCs for its own purposes, and we use that here. */
R
Rusty Russell 已提交
202 203 204 205 206
	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
		tsc_speed = tsc_khz;
	else
		tsc_speed = 0;

R
Rusty Russell 已提交
207
	/* The pointer to the Guest's "struct lguest_data" is the only
208
	 * argument.  We check that address now. */
R
Rusty Russell 已提交
209 210 211 212
	if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
		kill_guest(lg, "bad guest page %p", lg->lguest_data);
		return;
	}
213 214 215 216 217 218 219 220

	/* Having checked it, we simply set lg->lguest_data to point straight
	 * into the Launcher's memory at the right place and then use
	 * copy_to_user/from_user from now on, instead of lgread/write.  I put
	 * this in to show that I'm not immune to writing stupid
	 * optimizations. */
	lg->lguest_data = lg->mem_base + lg->regs->edx;

R
Rusty Russell 已提交
221 222
	/* The Guest tells us where we're not to deliver interrupts by putting
	 * the range of addresses into "struct lguest_data". */
R
Rusty Russell 已提交
223 224
	if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
	    || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
R
Rusty Russell 已提交
225 226
	    /* We tell the Guest that it can't use the top 4MB of virtual
	     * addresses used by the Switcher. */
R
Rusty Russell 已提交
227 228
	    || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
	    || put_user(tsc_speed, &lg->lguest_data->tsc_khz)
R
Rusty Russell 已提交
229
	    /* We also give the Guest a unique id, as used in lguest_net.c. */
R
Rusty Russell 已提交
230 231 232
	    || put_user(lg->guestid, &lg->lguest_data->guestid))
		kill_guest(lg, "bad guest page %p", lg->lguest_data);

233 234 235
	/* We write the current time into the Guest's data page once now. */
	write_timestamp(lg);

R
Rusty Russell 已提交
236 237 238 239
	/* This is the one case where the above accesses might have been the
	 * first write to a Guest page.  This may have caused a copy-on-write
	 * fault, but the Guest might be referring to the old (read-only)
	 * page. */
R
Rusty Russell 已提交
240 241
	guest_pagetable_clear_all(lg);
}
R
Rusty Russell 已提交
242 243 244
/* Now we've examined the hypercall code; our Guest can make requests.  There
 * is one other way we can do things for the Guest, as we see in
 * emulate_insn(). */
R
Rusty Russell 已提交
245

R
Rusty Russell 已提交
246 247 248 249 250 251 252 253
/*H:110 Tricky point: we mark the hypercall as "done" once we've done it.
 * Normally we don't need to do this: the Guest will run again and update the
 * trap number before we come back around the run_guest() loop to
 * do_hypercalls().
 *
 * However, if we are signalled or the Guest sends DMA to the Launcher, that
 * loop will exit without running the Guest.  When it comes back it would try
 * to re-run the hypercall. */
R
Rusty Russell 已提交
254 255 256 257 258
static void clear_hcall(struct lguest *lg)
{
	lg->regs->trapnum = 255;
}

R
Rusty Russell 已提交
259 260 261 262 263 264
/*H:100
 * Hypercalls
 *
 * Remember from the Guest, hypercalls come in two flavors: normal and
 * asynchronous.  This file handles both of types.
 */
R
Rusty Russell 已提交
265 266
void do_hypercalls(struct lguest *lg)
{
R
Rusty Russell 已提交
267
	/* Not initialized yet? */
R
Rusty Russell 已提交
268
	if (unlikely(!lg->lguest_data)) {
R
Rusty Russell 已提交
269 270
		/* Did the Guest make a hypercall?  We might have come back for
		 * some other reason (an interrupt, a different trap). */
R
Rusty Russell 已提交
271
		if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
R
Rusty Russell 已提交
272
			/* Set up the "struct lguest_data" */
R
Rusty Russell 已提交
273
			initialize(lg);
R
Rusty Russell 已提交
274
			/* The hypercall is done. */
R
Rusty Russell 已提交
275 276 277 278 279
			clear_hcall(lg);
		}
		return;
	}

R
Rusty Russell 已提交
280 281 282
	/* The Guest has initialized.
	 *
	 * Look in the hypercall ring for the async hypercalls: */
R
Rusty Russell 已提交
283
	do_async_hcalls(lg);
R
Rusty Russell 已提交
284 285 286 287

	/* If we stopped reading the hypercall ring because the Guest did a
	 * SEND_DMA to the Launcher, we want to return now.  Otherwise if the
	 * Guest asked us to do a hypercall, we do it. */
R
Rusty Russell 已提交
288 289
	if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
		do_hcall(lg, lg->regs);
R
Rusty Russell 已提交
290
		/* The hypercall is done. */
R
Rusty Russell 已提交
291 292 293
		clear_hcall(lg);
	}
}
294 295 296 297 298 299 300

/* This routine supplies the Guest with time: it's used for wallclock time at
 * initial boot and as a rough time source if the TSC isn't available. */
void write_timestamp(struct lguest *lg)
{
	struct timespec now;
	ktime_get_real_ts(&now);
301
	if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
302 303
		kill_guest(lg, "Writing timestamp");
}