paravirt.c 7.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*  Paravirtualization interfaces
    Copyright (C) 2006 Rusty Russell IBM Corporation

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/efi.h>
#include <linux/bcd.h>
22
#include <linux/start_kernel.h>
23 24 25 26 27 28 29 30 31

#include <asm/bug.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/delay.h>
32 33
#include <asm/fixmap.h>
#include <asm/apic.h>
34
#include <asm/tlbflush.h>
35
#include <asm/timer.h>
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52

/* nop stub */
static void native_nop(void)
{
}

static void __init default_banner(void)
{
	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
	       paravirt_ops.name);
}

char *memory_setup(void)
{
	return paravirt_ops.memory_setup();
}

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
/* Simple instruction patching code. */
#define DEF_NATIVE(name, code)					\
	extern const char start_##name[], end_##name[];		\
	asm("start_" #name ": " code "; end_" #name ":")
DEF_NATIVE(cli, "cli");
DEF_NATIVE(sti, "sti");
DEF_NATIVE(popf, "push %eax; popf");
DEF_NATIVE(pushf, "pushf; pop %eax");
DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
DEF_NATIVE(iret, "iret");
DEF_NATIVE(sti_sysexit, "sti; sysexit");

static const struct native_insns
{
	const char *start, *end;
} native_insns[] = {
	[PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
	[PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
	[PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
	[PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
	[PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
	[PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
	[PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
};

static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
{
	unsigned int insn_len;

	/* Don't touch it if we don't have a replacement */
	if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
		return len;

	insn_len = native_insns[type].end - native_insns[type].start;

	/* Similarly if we can't fit replacement. */
	if (len < insn_len)
		return len;

	memcpy(insns, native_insns[type].start, insn_len);
	return insn_len;
}

96 97 98 99 100
void init_IRQ(void)
{
	paravirt_ops.init_IRQ();
}

101
static void native_flush_tlb(void)
102 103 104 105 106 107 108 109
{
	__native_flush_tlb();
}

/*
 * Global pages have to be flushed a bit differently. Not a real
 * performance problem because this does not happen often.
 */
110
static void native_flush_tlb_global(void)
111 112 113 114
{
	__native_flush_tlb_global();
}

115
static void native_flush_tlb_single(u32 addr)
116 117 118 119 120
{
	__native_flush_tlb_single(addr);
}

#ifndef CONFIG_X86_PAE
121
static void native_set_pte(pte_t *ptep, pte_t pteval)
122 123 124 125
{
	*ptep = pteval;
}

126
static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
127 128 129 130
{
	*ptep = pteval;
}

131
static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
132 133 134 135 136 137
{
	*pmdp = pmdval;
}

#else /* CONFIG_X86_PAE */

138
static void native_set_pte(pte_t *ptep, pte_t pte)
139 140 141 142 143 144
{
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

145
static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
146 147 148 149 150 151
{
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

152
static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
153 154 155 156 157 158 159 160
{
	ptep->pte_low = 0;
	smp_wmb();
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

161
static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
162 163 164 165
{
	set_64bit((unsigned long long *)ptep,pte_val(pteval));
}

166
static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
167 168 169 170
{
	set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
}

171
static void native_set_pud(pud_t *pudp, pud_t pudval)
172 173 174 175
{
	*pudp = pudval;
}

176
static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
177 178 179 180 181 182
{
	ptep->pte_low = 0;
	smp_wmb();
	ptep->pte_high = 0;
}

183
static void native_pmd_clear(pmd_t *pmd)
184 185 186 187 188 189 190 191
{
	u32 *tmp = (u32 *)pmd;
	*tmp = 0;
	smp_wmb();
	*(tmp + 1) = 0;
}
#endif /* CONFIG_X86_PAE */

192
/* These are in entry.S */
193 194
extern void native_iret(void);
extern void native_irq_enable_sysexit(void);
195 196 197 198 199 200 201 202 203 204 205 206 207

static int __init print_banner(void)
{
	paravirt_ops.banner();
	return 0;
}
core_initcall(print_banner);

struct paravirt_ops paravirt_ops = {
	.name = "bare hardware",
	.paravirt_enabled = 0,
	.kernel_rpl = 0,

208
 	.patch = native_patch,
209 210 211 212 213
	.banner = default_banner,
	.arch_setup = native_nop,
	.memory_setup = machine_specific_memory_setup,
	.get_wallclock = native_get_wallclock,
	.set_wallclock = native_set_wallclock,
Z
Zachary Amsden 已提交
214
	.time_init = hpet_time_init,
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
	.init_IRQ = native_init_IRQ,

	.cpuid = native_cpuid,
	.get_debugreg = native_get_debugreg,
	.set_debugreg = native_set_debugreg,
	.clts = native_clts,
	.read_cr0 = native_read_cr0,
	.write_cr0 = native_write_cr0,
	.read_cr2 = native_read_cr2,
	.write_cr2 = native_write_cr2,
	.read_cr3 = native_read_cr3,
	.write_cr3 = native_write_cr3,
	.read_cr4 = native_read_cr4,
	.read_cr4_safe = native_read_cr4_safe,
	.write_cr4 = native_write_cr4,
	.save_fl = native_save_fl,
	.restore_fl = native_restore_fl,
	.irq_disable = native_irq_disable,
	.irq_enable = native_irq_enable,
	.safe_halt = native_safe_halt,
	.halt = native_halt,
	.wbinvd = native_wbinvd,
237 238
	.read_msr = native_read_msr_safe,
	.write_msr = native_write_msr_safe,
239 240
	.read_tsc = native_read_tsc,
	.read_pmc = native_read_pmc,
241
	.get_scheduled_cycles = native_read_tsc,
Z
Zachary Amsden 已提交
242
	.get_cpu_khz = native_calculate_cpu_khz,
243 244 245 246 247 248 249 250
	.load_tr_desc = native_load_tr_desc,
	.set_ldt = native_set_ldt,
	.load_gdt = native_load_gdt,
	.load_idt = native_load_idt,
	.store_gdt = native_store_gdt,
	.store_idt = native_store_idt,
	.store_tr = native_store_tr,
	.load_tls = native_load_tls,
251 252 253
	.write_ldt_entry = write_dt_entry,
	.write_gdt_entry = write_dt_entry,
	.write_idt_entry = write_dt_entry,
254 255 256 257 258
	.load_esp0 = native_load_esp0,

	.set_iopl_mask = native_set_iopl_mask,
	.io_delay = native_io_delay,

259 260 261 262
#ifdef CONFIG_X86_LOCAL_APIC
	.apic_write = native_apic_write,
	.apic_write_atomic = native_apic_write_atomic,
	.apic_read = native_apic_read,
Z
Zachary Amsden 已提交
263 264
	.setup_boot_clock = setup_boot_APIC_clock,
	.setup_secondary_clock = setup_secondary_APIC_clock,
265
#endif
266
	.set_lazy_mode = (void *)native_nop,
267

268 269 270 271
	.flush_tlb_user = native_flush_tlb,
	.flush_tlb_kernel = native_flush_tlb_global,
	.flush_tlb_single = native_flush_tlb_single,

Z
Zachary Amsden 已提交
272 273
	.map_pt_hook = (void *)native_nop,

274 275 276 277 278 279
	.alloc_pt = (void *)native_nop,
	.alloc_pd = (void *)native_nop,
	.alloc_pd_clone = (void *)native_nop,
	.release_pt = (void *)native_nop,
	.release_pd = (void *)native_nop,

280 281 282 283 284 285 286 287 288 289 290 291 292
	.set_pte = native_set_pte,
	.set_pte_at = native_set_pte_at,
	.set_pmd = native_set_pmd,
	.pte_update = (void *)native_nop,
	.pte_update_defer = (void *)native_nop,
#ifdef CONFIG_X86_PAE
	.set_pte_atomic = native_set_pte_atomic,
	.set_pte_present = native_set_pte_present,
	.set_pud = native_set_pud,
	.pte_clear = native_pte_clear,
	.pmd_clear = native_pmd_clear,
#endif

293 294
	.irq_enable_sysexit = native_irq_enable_sysexit,
	.iret = native_iret,
295 296

	.startup_ipi_hook = (void *)native_nop,
297
};
298 299 300 301 302 303 304 305

/*
 * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
 * semantics are subject to change. Hence we only do this
 * internal-only export of this, until it gets sorted out and
 * all lowlevel CPU ops used by modules are separately exported.
 */
EXPORT_SYMBOL_GPL(paravirt_ops);