setup.c 4.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Machine specific setup for xen
 *
 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
 */

#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/pm.h>

#include <asm/elf.h>
R
Roland McGrath 已提交
13
#include <asm/vdso.h>
14 15
#include <asm/e820.h>
#include <asm/setup.h>
16
#include <asm/acpi.h>
17 18 19
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>

20
#include <xen/page.h>
21
#include <xen/interface/callback.h>
22 23 24 25
#include <xen/interface/physdev.h>
#include <xen/features.h>

#include "xen-ops.h"
26
#include "vdso.h"
27 28 29 30

/* These are code, but not functions.  Defined in entry.S */
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
T
Tej 已提交
31 32 33
extern void xen_sysenter_target(void);
extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
34 35 36 37 38 39 40 41 42 43


/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/

char * __init xen_memory_setup(void)
{
	unsigned long max_pfn = xen_start_info->nr_pages;

44 45
	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);

46
	e820.nr_map = 0;
47

48
	e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
49 50 51 52 53 54 55 56

	/*
	 * Even though this is normal, usable memory under Xen, reserve
	 * ISA memory anyway because too many things think they can poke
	 * about in there.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);
57

58 59 60 61 62 63 64 65 66 67 68 69
	/*
	 * Reserve Xen bits:
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 */
	e820_add_region(__pa(xen_start_info->mfn_list),
			xen_start_info->pt_base - xen_start_info->mfn_list,
			E820_RESERVED);

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
	return "Xen";
}

static void xen_idle(void)
{
	local_irq_disable();

	if (need_resched())
		local_irq_enable();
	else {
		current_thread_info()->status &= ~TS_POLLING;
		smp_mb__after_clear_bit();
		safe_halt();
		current_thread_info()->status |= TS_POLLING;
	}
}

87 88
/*
 * Set the bit indicating "nosegneg" library variants should be used.
89 90
 * We only need to bother in pure 32-bit mode; compat 32-bit processes
 * can have un-truncated segments, so wrapping around is allowed.
91
 */
92
static void __init fiddle_vdso(void)
93
{
94 95 96 97 98
#ifdef CONFIG_X86_32
	u32 *mask;
	mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
	mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
99
	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
100
#endif
101 102
}

103
static __cpuinit int register_callback(unsigned type, const void *func)
104
{
105 106 107
	struct callback_register callback = {
		.type = type,
		.address = XEN_CALLBACK(__KERNEL_CS, func),
108 109 110
		.flags = CALLBACKF_mask_events,
	};

111 112 113 114 115
	return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
}

void __cpuinit xen_enable_sysenter(void)
{
116
	int ret;
117
	unsigned sysenter_feature;
118 119

#ifdef CONFIG_X86_32
120
	sysenter_feature = X86_FEATURE_SEP;
121
#else
122
	sysenter_feature = X86_FEATURE_SYSENTER32;
123
#endif
124

125 126 127
	if (!boot_cpu_has(sysenter_feature))
		return;

128
	ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
129 130
	if(ret != 0)
		setup_clear_cpu_cap(sysenter_feature);
131 132
}

133 134 135 136 137 138 139
void __cpuinit xen_enable_syscall(void)
{
#ifdef CONFIG_X86_64
	int ret;

	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
	if (ret != 0) {
140
		printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
141 142 143 144 145
		/* Pretty fatal; 64-bit userspace has no other
		   mechanism for syscalls. */
	}

	if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
146 147
		ret = register_callback(CALLBACKTYPE_syscall32,
					xen_syscall32_target);
148
		if (ret != 0)
149
			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
150 151 152 153
	}
#endif /* CONFIG_X86_64 */
}

154 155 156 157 158 159 160 161 162
void __init xen_arch_setup(void)
{
	struct physdev_set_iopl set_iopl;
	int rc;

	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);

	if (!xen_feature(XENFEAT_auto_translated_physmap))
T
Tej 已提交
163 164
		HYPERVISOR_vm_assist(VMASST_CMD_enable,
				     VMASST_TYPE_pae_extended_cr3);
165

166 167 168
	if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
		BUG();
169

170
	xen_enable_sysenter();
171
	xen_enable_syscall();
172

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
	set_iopl.iopl = 1;
	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
	if (rc != 0)
		printk(KERN_INFO "physdev_op failed %d\n", rc);

#ifdef CONFIG_ACPI
	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
		disable_acpi();
	}
#endif

	memcpy(boot_command_line, xen_start_info->cmd_line,
	       MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
	       COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);

	pm_idle = xen_idle;
J
Jeremy Fitzhardinge 已提交
190

191
	paravirt_disable_iospace();
192 193

	fiddle_vdso();
194
}