diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 0f23d67f958ff5b6ee96a0248fdd2c9b9ab65586..bec5a32e4095d705ee16d6bd9a538b4fdaadfe52 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -486,9 +486,12 @@ static void concat(char *dst, char *args[]) unsigned int i, len = 0; for (i = 0; args[i]; i++) { + if (i) { + strcat(dst+len, " "); + len++; + } strcpy(dst+len, args[i]); - strcat(dst+len, " "); - len += strlen(args[i]) + 1; + len += strlen(args[i]); } /* In case it's empty. */ dst[len] = '\0'; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cccb38a59653bff74271593004f995a457645b54..a104c532ff706cb9845140421371fc208a90605b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -84,7 +84,6 @@ struct lguest_data lguest_data = { .blocked_interrupts = { 1 }, /* Block timer interrupts */ .syscall_vec = SYSCALL_VECTOR, }; -static cycle_t clock_base; /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we @@ -327,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ *cx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ - *dx &= 0x07808101; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ + *dx &= 0x07808111; /* The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls @@ -481,7 +480,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { *pmdp = pmdval; lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, - (__pa(pmdp)&(PAGE_SIZE-1)), 0); + (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); } /* There are a couple of legacy places where the kernel sets a PTE, but we @@ -595,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) return lguest_data.time.tv_sec; } +/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, + * or 0 if it's unusable as a reliable clock source. This matches what we want + * here: if we return 0 from this function, the x86 TSC clock will not register + * itself. */ +static unsigned long lguest_cpu_khz(void) +{ + return lguest_data.tsc_khz; +} + +/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where + * we read the time value given to us by the Host. */ static cycle_t lguest_clock_read(void) { unsigned long sec, nsec; - /* If the Host tells the TSC speed, we can trust that. */ - if (lguest_data.tsc_khz) - return native_read_tsc(); - - /* If we can't use the TSC, we read the time value written by the Host. - * Since it's in two parts (seconds and nanoseconds), we risk reading - * it just as it's changing from 99 & 0.999999999 to 100 and 0, and - * getting 99 and 0. As Linux tends to come apart under the stress of - * time travel, we must be careful: */ + /* Since the time is in two parts (seconds and nanoseconds), we risk + * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, + * and getting 99 and 0. As Linux tends to come apart under the stress + * of time travel, we must be careful: */ do { /* First we read the seconds part. */ sec = lguest_data.time.tv_sec; @@ -622,14 +627,14 @@ static cycle_t lguest_clock_read(void) /* Now if the seconds part has changed, try again. */ } while (unlikely(lguest_data.time.tv_sec != sec)); - /* Our non-TSC clock is in real nanoseconds. */ + /* Our lguest clock is in real nanoseconds. */ return sec*1000000000ULL + nsec; } -/* This is what we tell the kernel is our clocksource. */ +/* This is the fallback clocksource: lower priority than the TSC clocksource. */ static struct clocksource lguest_clock = { .name = "lguest", - .rating = 400, + .rating = 200, .read = lguest_clock_read, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << 22, @@ -637,12 +642,6 @@ static struct clocksource lguest_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -/* The "scheduler clock" is just our real clock, adjusted to start at zero */ -static unsigned long long lguest_sched_clock(void) -{ - return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); -} - /* We also need a "struct clock_event_device": Linux asks us to set it to go * off some time in the future. Actually, James Morris figured all this out, I * just applied the patch. */ @@ -712,19 +711,8 @@ static void lguest_time_init(void) /* Set up the timer interrupt (0) to go to our simple timer routine */ set_irq_handler(0, lguest_time_irq); - /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can - * use the TSC, otherwise it's a dumb nanosecond-resolution clock. - * Either way, the "rating" is set so high that it's always chosen over - * any other clocksource. */ - if (lguest_data.tsc_khz) - lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, - lguest_clock.shift); - clock_base = lguest_clock_read(); clocksource_register(&lguest_clock); - /* Now we've set up our clock, we can use it as the scheduler clock */ - pv_time_ops.sched_clock = lguest_sched_clock; - /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ lguest_clockevent.cpumask = cpumask_of_cpu(0); @@ -995,6 +983,7 @@ __init void lguest_init(void) /* time operations */ pv_time_ops.get_wallclock = lguest_get_wallclock; pv_time_ops.time_init = lguest_time_init; + pv_time_ops.get_cpu_khz = lguest_cpu_khz; /* Now is a good time to look at the implementations of these functions * before returning to the rest of lguest_init(). */ diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 7743d73768df273c008b700f8165ec5dafd668ec..c632c08cbbdc949793f322b75186aa77beddf1a4 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -69,11 +69,22 @@ static __init int map_switcher(void) switcher_page[i] = virt_to_page(addr); } + /* First we check that the Switcher won't overlap the fixmap area at + * the top of memory. It's currently nowhere near, but it could have + * very strange effects if it ever happened. */ + if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ + err = -ENOMEM; + printk("lguest: mapping switcher would thwack fixmap\n"); + goto free_pages; + } + /* Now we reserve the "virtual memory area" we want: 0xFFC00000 * (SWITCHER_ADDR). We might not get it in theory, but in practice - * it's worked so far. */ + * it's worked so far. The end address needs +1 because __get_vm_area + * allocates an extra guard page, so we need space for that. */ switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, - VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); + VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR + + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); if (!switcher_vma) { err = -ENOMEM; printk("lguest: could not map switcher pages high\n"); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 85d42d3d01a9a9bbeb739ce422271848cd4be3ff..2221485b07739a48114f44dc38b4f4249a93360c 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -241,15 +241,16 @@ static ssize_t write(struct file *file, const char __user *in, cpu = &lg->cpus[cpu_id]; if (!cpu) return -EINVAL; - } - /* Once the Guest is dead, all you can do is read() why it died. */ - if (lg && lg->dead) - return -ENOENT; + /* Once the Guest is dead, you can only read() why it died. */ + if (lg->dead) + return -ENOENT; - /* If you're not the task which owns the Guest, you can only break */ - if (lg && current != cpu->tsk && req != LHREQ_BREAK) - return -EPERM; + /* If you're not the task which owns the Guest, all you can do + * is break the Launcher out of running the Guest. */ + if (current != cpu->tsk && req != LHREQ_BREAK) + return -EPERM; + } switch (req) { case LHREQ_INITIALIZE: diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 275f23c2deb49ff302e836abfb4eb14e26395a06..a7f64a9d67e009437b30e5007a208c25863be392 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -391,7 +391,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) { unsigned int i; for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) - if (lg->pgdirs[i].gpgdir == pgtable) + if (lg->pgdirs[i].pgdir && lg->pgdirs[i].gpgdir == pgtable) break; return i; }