setup64.c 7.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* 
 * X86-64 specific CPU setup.
 * Copyright (C) 1995  Linus Torvalds
 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
 * See setup.c for older changelog.
 * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
 */ 
#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/bitops.h>
15
#include <linux/module.h>
16
#include <asm/bootsetup.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/atomic.h>
#include <asm/mmu_context.h>
#include <asm/smp.h>
#include <asm/i387.h>
#include <asm/percpu.h>
#include <asm/proto.h>
27
#include <asm/sections.h>
L
Linus Torvalds 已提交
28

29
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
L
Linus Torvalds 已提交
30

31
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
L
Linus Torvalds 已提交
32

33 34
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
L
Linus Torvalds 已提交
35 36 37 38 39

struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; 

char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));

40
unsigned long __supported_pte_mask __read_mostly = ~0UL;
41
static int do_not_nx __cpuinitdata = 0;
L
Linus Torvalds 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

/* noexec=on|off
Control non executable mappings for 64bit processes.

on	Enable(default)
off	Disable
*/ 
int __init nonx_setup(char *str)
{
	if (!strncmp(str, "on", 2)) {
                __supported_pte_mask |= _PAGE_NX; 
 		do_not_nx = 0; 
	} else if (!strncmp(str, "off", 3)) {
		do_not_nx = 1;
		__supported_pte_mask &= ~_PAGE_NX;
        }
	return 0;
} 
__setup("noexec=", nonx_setup);	/* parsed early actually */

int force_personality32 = READ_IMPLIES_EXEC;

/* noexec32=on|off
Control non executable heap for 32bit processes.
To control the stack too use noexec=off

on	PROT_READ does not imply PROT_EXEC for 32bit processes
off	PROT_READ implies PROT_EXEC (default)
*/
static int __init nonx32_setup(char *str)
{
	if (!strcmp(str, "on"))
		force_personality32 &= ~READ_IMPLIES_EXEC;
	else if (!strcmp(str, "off"))
		force_personality32 |= READ_IMPLIES_EXEC;
	return 0;
}
__setup("noexec32=", nonx32_setup);

/*
 * Great future plan:
 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
 * Always point %gs to its beginning
 */
void __init setup_per_cpu_areas(void)
{ 
	int i;
	unsigned long size;

91 92 93 94
#ifdef CONFIG_HOTPLUG_CPU
	prefill_possible_map();
#endif

L
Linus Torvalds 已提交
95 96 97 98 99 100 101
	/* Copy section for each CPU (we discard the original) */
	size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
#ifdef CONFIG_MODULES
	if (size < PERCPU_ENOUGH_ROOM)
		size = PERCPU_ENOUGH_ROOM;
#endif

102
	for_each_cpu_mask (i, cpu_possible_map) {
103
		char *ptr;
L
Linus Torvalds 已提交
104 105 106 107 108 109 110 111 112 113

		if (!NODE_DATA(cpu_to_node(i))) {
			printk("cpu with no node %d, num_online_nodes %d\n",
			       i, num_online_nodes());
			ptr = alloc_bootmem(size);
		} else { 
			ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
		}
		if (!ptr)
			panic("Cannot allocate cpu data for CPU %d\n", i);
114
		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
L
Linus Torvalds 已提交
115 116 117 118 119 120
		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
	}
} 

void pda_init(int cpu)
{ 
121
	struct x8664_pda *pda = cpu_pda(cpu);
L
Linus Torvalds 已提交
122 123 124

	/* Setup up data that may be needed in __get_free_pages early */
	asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
125
	wrmsrl(MSR_GS_BASE, pda);
L
Linus Torvalds 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148

	pda->cpunumber = cpu; 
	pda->irqcount = -1;
	pda->kernelstack = 
		(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; 
	pda->active_mm = &init_mm;
	pda->mmu_state = 0;

	if (cpu == 0) {
		/* others are initialized in smpboot.c */
		pda->pcurrent = &init_task;
		pda->irqstackptr = boot_cpu_stack; 
	} else {
		pda->irqstackptr = (char *)
			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
		if (!pda->irqstackptr)
			panic("cannot allocate irqstack for cpu %d", cpu); 
	}


	pda->irqstackptr += IRQSTACKSIZE-64;
} 

149
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
L
Linus Torvalds 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
__attribute__((section(".bss.page_aligned")));

/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
	/* 
	 * LSTAR and STAR live in a bit strange symbiosis.
	 * They both write to the same internal register. STAR allows to set CS/DS
	 * but only a 32bit target. LSTAR sets the 64bit rip. 	 
	 */ 
	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
	wrmsrl(MSR_LSTAR, system_call); 

#ifdef CONFIG_IA32_EMULATION   		
	syscall32_cpu_init ();
#endif

	/* Flags to clear on syscall */
	wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
}

171
void __cpuinit check_efer(void)
L
Linus Torvalds 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
{
	unsigned long efer;

	rdmsrl(MSR_EFER, efer); 
        if (!(efer & EFER_NX) || do_not_nx) { 
                __supported_pte_mask &= ~_PAGE_NX; 
        }       
}

/*
 * cpu_init() initializes state that is per-CPU. Some data is already
 * initialized (naturally) in the bootstrap process, such as the GDT
 * and IDT. We reload them nevertheless, this function acts as a
 * 'CPU state barrier', nothing should get across.
 * A lot of state is already set up in PDA init.
 */
188
void __cpuinit cpu_init (void)
L
Linus Torvalds 已提交
189 190 191 192 193 194 195 196 197 198 199
{
	int cpu = stack_smp_processor_id();
	struct tss_struct *t = &per_cpu(init_tss, cpu);
	unsigned long v; 
	char *estacks = NULL; 
	struct task_struct *me;
	int i;

	/* CPU 0 is initialised in head64.c */
	if (cpu != 0) {
		pda_init(cpu);
200
		zap_low_mappings(cpu);
L
Linus Torvalds 已提交
201 202 203 204 205 206 207 208 209 210
	} else 
		estacks = boot_exception_stacks; 

	me = current;

	if (cpu_test_and_set(cpu, cpu_initialized))
		panic("CPU#%d already initialized!\n", cpu);

	printk("Initializing CPU#%d\n", cpu);

211
	clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
L
Linus Torvalds 已提交
212 213 214 215 216

	/*
	 * Initialize the per-CPU GDT with the boot GDT,
	 * and set up the GDT descriptor:
	 */
217 218
	if (cpu)
 		memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
L
Linus Torvalds 已提交
219 220 221 222 223

	cpu_gdt_descr[cpu].size = GDT_SIZE;
	asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
	asm volatile("lidt %0" :: "m" (idt_descr));

224
	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
L
Linus Torvalds 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237
	syscall_init();

	wrmsrl(MSR_FS_BASE, 0);
	wrmsrl(MSR_KERNEL_GS_BASE, 0);
	barrier(); 

	check_efer();

	/*
	 * set up and load the per-CPU TSS
	 */
	for (v = 0; v < N_EXCEPTION_STACKS; v++) {
		if (cpu) {
238 239 240 241 242 243
			static const unsigned int order[N_EXCEPTION_STACKS] = {
				[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
				[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
			};

			estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
L
Linus Torvalds 已提交
244 245 246 247
			if (!estacks)
				panic("Cannot allocate exception stack %ld %d\n",
				      v, cpu); 
		}
248 249 250 251 252 253 254 255 256 257 258
		switch (v + 1) {
#if DEBUG_STKSZ > EXCEPTION_STKSZ
		case DEBUG_STACK:
			cpu_pda[cpu].debugstack = (unsigned long)estacks;
			estacks += DEBUG_STKSZ;
			break;
#endif
		default:
			estacks += EXCEPTION_STKSZ;
			break;
		}
L
Linus Torvalds 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
		t->ist[v] = (unsigned long)estacks;
	}

	t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
	/*
	 * <= is required because the CPU will access up to
	 * 8 bits beyond the end of the IO permission bitmap.
	 */
	for (i = 0; i <= IO_BITMAP_LONGS; i++)
		t->io_bitmap[i] = ~0UL;

	atomic_inc(&init_mm.mm_count);
	me->active_mm = &init_mm;
	if (me->mm)
		BUG();
	enter_lazy_tlb(&init_mm, me);

	set_tss_desc(cpu, t);
	load_TR_desc();
	load_LDT(&init_mm.context);

	/*
	 * Clear all 6 debug registers:
	 */

	set_debug(0UL, 0);
	set_debug(0UL, 1);
	set_debug(0UL, 2);
	set_debug(0UL, 3);
	set_debug(0UL, 6);
	set_debug(0UL, 7);

	fpu_init(); 
}