desc.h 14.8 KB
Newer Older
H
H. Peter Anvin 已提交
1 2
#ifndef _ASM_X86_DESC_H
#define _ASM_X86_DESC_H
3 4 5

#include <asm/desc_defs.h>
#include <asm/ldt.h>
6
#include <asm/mmu.h>
7
#include <asm/fixmap.h>
I
Ingo Molnar 已提交
8

9
#include <linux/smp.h>
10
#include <linux/percpu.h>
11

I
Ingo Molnar 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
{
	desc->limit0		= info->limit & 0x0ffff;

	desc->base0		= (info->base_addr & 0x0000ffff);
	desc->base1		= (info->base_addr & 0x00ff0000) >> 16;

	desc->type		= (info->read_exec_only ^ 1) << 1;
	desc->type	       |= info->contents << 2;

	desc->s			= 1;
	desc->dpl		= 0x3;
	desc->p			= info->seg_not_present ^ 1;
	desc->limit		= (info->limit & 0xf0000) >> 16;
	desc->avl		= info->useable;
	desc->d			= info->seg_32bit;
	desc->g			= info->limit_in_pages;

	desc->base2		= (info->base_addr & 0xff000000) >> 24;
31
	/*
32 33
	 * Don't allow setting of the lm bit. It would confuse
	 * user_64bit_mode and would get overridden by sysret anyway.
34
	 */
I
Ingo Molnar 已提交
35
	desc->l			= 0;
36 37
}

38 39
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
40
extern const struct desc_ptr debug_idt_descr;
41
extern gate_desc debug_idt_table[];
42
extern pgprot_t pg_fixmap_gdt_flags;
43

44 45 46
struct gdt_page {
	struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
I
Ingo Molnar 已提交
47

48
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
49

50 51
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
52 53 54 55
{
	return per_cpu(gdt_page, cpu).gdt;
}

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
static inline unsigned long get_cpu_gdt_rw_vaddr(unsigned int cpu)
{
	return (unsigned long)get_cpu_gdt_rw(cpu);
}

/* Provide the current original GDT */
static inline struct desc_struct *get_current_gdt_rw(void)
{
	return this_cpu_ptr(&gdt_page)->gdt;
}

static inline unsigned long get_current_gdt_rw_vaddr(void)
{
	return (unsigned long)get_current_gdt_rw();
}

/* Get the fixmap index for a specific processor */
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
{
	return FIX_GDT_REMAP_BEGIN + cpu;
}

/* Provide the fixmap address of the remapped GDT */
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
{
	unsigned int idx = get_cpu_gdt_ro_index(cpu);
	return (struct desc_struct *)__fix_to_virt(idx);
}

static inline unsigned long get_cpu_gdt_ro_vaddr(int cpu)
{
	return (unsigned long)get_cpu_gdt_ro(cpu);
}

/* Provide the current read-only GDT */
static inline struct desc_struct *get_current_gdt_ro(void)
{
	return get_cpu_gdt_ro(smp_processor_id());
}

static inline unsigned long get_current_gdt_ro_vaddr(void)
{
	return (unsigned long)get_current_gdt_ro();
}

101
#ifdef CONFIG_X86_64
102 103 104 105

static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
			     unsigned dpl, unsigned ist, unsigned seg)
{
I
Ingo Molnar 已提交
106 107 108 109 110 111 112 113 114 115
	gate->offset_low	= PTR_LOW(func);
	gate->segment		= __KERNEL_CS;
	gate->ist		= ist;
	gate->p			= 1;
	gate->dpl		= dpl;
	gate->zero0		= 0;
	gate->zero1		= 0;
	gate->type		= type;
	gate->offset_middle	= PTR_MIDDLE(func);
	gate->offset_high	= PTR_HIGH(func);
116 117
}

118
#else
119
static inline void pack_gate(gate_desc *gate, unsigned char type,
120 121
			     unsigned long base, unsigned dpl, unsigned flags,
			     unsigned short seg)
122 123
{
	gate->a = (seg << 16) | (base & 0xffff);
I
Ingo Molnar 已提交
124
	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
125 126
}

127 128
#endif

129 130 131
static inline int desc_empty(const void *ptr)
{
	const u32 *desc = ptr;
I
Ingo Molnar 已提交
132

133 134 135
	return !(desc[0] | desc[1]);
}

136 137 138
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
I
Ingo Molnar 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
#define load_TR_desc()				native_load_tr_desc()
#define load_gdt(dtr)				native_load_gdt(dtr)
#define load_idt(dtr)				native_load_idt(dtr)
#define load_tr(tr)				asm volatile("ltr %0"::"m" (tr))
#define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))

#define store_gdt(dtr)				native_store_gdt(dtr)
#define store_idt(dtr)				native_store_idt(dtr)
#define store_tr(tr)				(tr = native_store_tr())

#define load_TLS(t, cpu)			native_load_tls(t, cpu)
#define set_ldt					native_set_ldt

#define write_ldt_entry(dt, entry, desc)	native_write_ldt_entry(dt, entry, desc)
#define write_gdt_entry(dt, entry, desc, type)	native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g)		native_write_idt_entry(dt, entry, g)
155 156 157 158 159 160 161 162 163

static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
}

static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
#endif	/* CONFIG_PARAVIRT */
164

165 166
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))

I
Ingo Molnar 已提交
167
static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
168 169 170 171
{
	memcpy(&idt[entry], gate, sizeof(*gate));
}

I
Ingo Molnar 已提交
172
static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
173 174 175 176
{
	memcpy(&ldt[entry], desc, 8);
}

I
Ingo Molnar 已提交
177 178
static inline void
native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
179 180
{
	unsigned int size;
I
Ingo Molnar 已提交
181

182
	switch (type) {
I
Ingo Molnar 已提交
183 184 185
	case DESC_TSS:	size = sizeof(tss_desc);	break;
	case DESC_LDT:	size = sizeof(ldt_desc);	break;
	default:	size = sizeof(*gdt);		break;
186
	}
I
Ingo Molnar 已提交
187

188 189 190 191 192 193 194 195 196
	memcpy(&gdt[entry], desc, size);
}

static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
				   unsigned long limit, unsigned char type,
				   unsigned char flags)
{
	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
197 198
		(limit & 0x000f0000) | ((type & 0xff) << 8) |
		((flags & 0xf) << 20);
199 200 201 202
	desc->p = 1;
}


I
Ingo Molnar 已提交
203
static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
204 205
{
#ifdef CONFIG_X86_64
206
	struct ldttss_desc64 *desc = d;
I
Ingo Molnar 已提交
207

208
	memset(desc, 0, sizeof(*desc));
I
Ingo Molnar 已提交
209 210 211 212 213 214 215 216 217

	desc->limit0		= size & 0xFFFF;
	desc->base0		= PTR_LOW(addr);
	desc->base1		= PTR_MIDDLE(addr) & 0xFF;
	desc->type		= type;
	desc->p			= 1;
	desc->limit1		= (size >> 16) & 0xF;
	desc->base2		= (PTR_MIDDLE(addr) >> 8) & 0xFF;
	desc->base3		= PTR_HIGH(addr);
218
#else
219
	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
220 221 222 223 224
#endif
}

static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
225
	struct desc_struct *d = get_cpu_gdt_rw(cpu);
226 227
	tss_desc tss;

228
	set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
229
			      __KERNEL_TSS_LIMIT);
230 231 232 233 234
	write_gdt_entry(d, entry, &tss, DESC_TSS);
}

#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)

235 236 237
static inline void native_set_ldt(const void *addr, unsigned int entries)
{
	if (likely(entries == 0))
238
		asm volatile("lldt %w0"::"q" (0));
239 240 241 242
	else {
		unsigned cpu = smp_processor_id();
		ldt_desc ldt;

M
Michael Karcher 已提交
243 244
		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
				      entries * LDT_ENTRY_SIZE - 1);
245
		write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
246
				&ldt, DESC_LDT);
247
		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
248 249 250 251 252 253 254 255
	}
}

static inline void native_load_tr_desc(void)
{
	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}

256 257
DECLARE_PER_CPU(bool, __tss_limit_invalid);

258 259
static inline void force_reload_TR(void)
{
260
	struct desc_struct *d = get_current_gdt_rw();
261 262 263 264 265 266 267 268 269 270 271 272
	tss_desc tss;

	memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));

	/*
	 * LTR requires an available TSS, and the TSS is currently
	 * busy.  Make it be available so that LTR will work.
	 */
	tss.type = DESC_TSS;
	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);

	load_TR_desc();
273
	this_cpu_write(__tss_limit_invalid, false);
274 275
}

276 277 278 279 280 281
/*
 * Call this if you need the TSS limit to be correct, which should be the case
 * if and only if you have TIF_IO_BITMAP set or you're switching to a task
 * with TIF_IO_BITMAP set.
 */
static inline void refresh_tss_limit(void)
282 283 284
{
	DEBUG_LOCKS_WARN_ON(preemptible());

285
	if (unlikely(this_cpu_read(__tss_limit_invalid)))
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
		force_reload_TR();
}

/*
 * If you do something evil that corrupts the cached TSS limit (I'm looking
 * at you, VMX exits), call this function.
 *
 * The optimization here is that the TSS limit only matters for Linux if the
 * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
 * everything works except that IO bitmap will be ignored and all CPL 3 IO
 * instructions will #GP, which is exactly what we want for normal tasks.
 */
static inline void invalidate_tss_limit(void)
{
	DEBUG_LOCKS_WARN_ON(preemptible());

	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
		force_reload_TR();
	else
305
		this_cpu_write(__tss_limit_invalid, true);
306 307
}

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
static inline void native_load_gdt(const struct desc_ptr *dtr)
{
	asm volatile("lgdt %0"::"m" (*dtr));
}

static inline void native_load_idt(const struct desc_ptr *dtr)
{
	asm volatile("lidt %0"::"m" (*dtr));
}

static inline void native_store_gdt(struct desc_ptr *dtr)
{
	asm volatile("sgdt %0":"=m" (*dtr));
}

static inline void native_store_idt(struct desc_ptr *dtr)
{
	asm volatile("sidt %0":"=m" (*dtr));
}

static inline unsigned long native_store_tr(void)
{
	unsigned long tr;
I
Ingo Molnar 已提交
331

332
	asm volatile("str %0":"=r" (tr));
I
Ingo Molnar 已提交
333

334 335 336 337 338
	return tr;
}

static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
{
339
	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
I
Ingo Molnar 已提交
340
	unsigned int i;
341 342 343 344 345

	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
}

346 347
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info)					\
348 349 350 351 352 353 354 355
	((info)->base_addr		== 0	&&	\
	 (info)->limit			== 0	&&	\
	 (info)->contents		== 0	&&	\
	 (info)->read_exec_only		== 1	&&	\
	 (info)->seg_32bit		== 0	&&	\
	 (info)->limit_in_pages		== 0	&&	\
	 (info)->seg_not_present	== 1	&&	\
	 (info)->useable		== 0)
356

357 358 359 360 361 362 363 364 365 366 367 368 369
/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
static inline bool LDT_zero(const struct user_desc *info)
{
	return (info->base_addr		== 0 &&
		info->limit		== 0 &&
		info->contents		== 0 &&
		info->read_exec_only	== 0 &&
		info->seg_32bit		== 0 &&
		info->limit_in_pages	== 0 &&
		info->seg_not_present	== 0 &&
		info->useable		== 0);
}

370 371 372 373 374
static inline void clear_LDT(void)
{
	set_ldt(NULL, 0);
}

375
static inline unsigned long get_desc_base(const struct desc_struct *desc)
376
{
377
	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
378
}
379

380 381 382 383 384 385 386
static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
{
	desc->base0 = base & 0xffff;
	desc->base1 = (base >> 16) & 0xff;
	desc->base2 = (base >> 24) & 0xff;
}

387 388 389 390 391
static inline unsigned long get_desc_limit(const struct desc_struct *desc)
{
	return desc->limit0 | (desc->limit << 16);
}

392 393 394 395 396 397
static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
{
	desc->limit0 = limit & 0xffff;
	desc->limit = (limit >> 16) & 0xf;
}

398 399 400 401 402 403
#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
	gate_desc s;

	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
404
	write_idt_entry(debug_idt_table, gate, &s);
405 406 407
}
#endif

408 409 410 411 412 413 414
#ifdef CONFIG_TRACING
extern struct desc_ptr trace_idt_descr;
extern gate_desc trace_idt_table[];
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
	write_idt_entry(trace_idt_table, entry, gate);
}
415 416 417 418 419 420 421 422 423 424 425 426 427

static inline void _trace_set_gate(int gate, unsigned type, void *addr,
				   unsigned dpl, unsigned ist, unsigned seg)
{
	gate_desc s;

	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
	/*
	 * does not need to be atomic because it is only done once at
	 * setup time
	 */
	write_trace_idt_entry(gate, &s);
}
428 429 430 431
#else
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
}
432 433

#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
434 435
#endif

436
static inline void _set_gate(int gate, unsigned type, void *addr,
437
			     unsigned dpl, unsigned ist, unsigned seg)
438 439
{
	gate_desc s;
I
Ingo Molnar 已提交
440

441 442 443 444 445 446
	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
	/*
	 * does not need to be atomic because it is only done once at
	 * setup time
	 */
	write_idt_entry(idt_table, gate, &s);
447
	write_trace_idt_entry(gate, &s);
448 449 450 451 452 453 454 455
}

/*
 * This needs to use 'idt_table' rather than 'idt', and
 * thus use the _nonmapped_ version of the IDT, as the
 * Pentium F0 0F bugfix can have resulted in the mapped
 * IDT being write-protected.
 */
456
#define set_intr_gate_notrace(n, addr)					\
457 458 459 460
	do {								\
		BUG_ON((unsigned)n > 0xFF);				\
		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
			  __KERNEL_CS);					\
461 462 463 464 465
	} while (0)

#define set_intr_gate(n, addr)						\
	do {								\
		set_intr_gate_notrace(n, addr);				\
466 467 468
		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
				0, 0, __KERNEL_CS);			\
	} while (0)
469

470
extern int first_system_vector;
471 472
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors[];
473 474 475

static inline void alloc_system_vector(int vector)
{
476 477
	if (!test_bit(vector, used_vectors)) {
		set_bit(vector, used_vectors);
478 479
		if (first_system_vector > vector)
			first_system_vector = vector;
I
Ingo Molnar 已提交
480
	} else {
481
		BUG();
I
Ingo Molnar 已提交
482
	}
483 484
}

485 486 487
#define alloc_intr_gate(n, addr)				\
	do {							\
		alloc_system_vector(n);				\
488
		set_intr_gate(n, addr);				\
489 490
	} while (0)

491 492 493 494 495 496 497 498 499
/*
 * This routine sets up an interrupt gate at directory privilege level 3.
 */
static inline void set_system_intr_gate(unsigned int n, void *addr)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
}

500
static inline void set_system_trap_gate(unsigned int n, void *addr)
501 502
{
	BUG_ON((unsigned)n > 0xFF);
503
	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
504 505
}

506
static inline void set_trap_gate(unsigned int n, void *addr)
507 508
{
	BUG_ON((unsigned)n > 0xFF);
509
	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
510 511 512 513 514 515 516 517 518 519 520 521 522 523
}

static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
}

static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
}

524
static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
525 526 527 528
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
529

530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u32, debug_idt_ctr);
static inline bool is_debug_idt_enabled(void)
{
	if (this_cpu_read(debug_idt_ctr))
		return true;

	return false;
}

static inline void load_debug_idt(void)
{
	load_idt((const struct desc_ptr *)&debug_idt_descr);
}
#else
static inline bool is_debug_idt_enabled(void)
{
	return false;
}

static inline void load_debug_idt(void)
{
}
#endif

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
#ifdef CONFIG_TRACING
extern atomic_t trace_idt_ctr;
static inline bool is_trace_idt_enabled(void)
{
	if (atomic_read(&trace_idt_ctr))
		return true;

	return false;
}

static inline void load_trace_idt(void)
{
	load_idt((const struct desc_ptr *)&trace_idt_descr);
}
#else
static inline bool is_trace_idt_enabled(void)
{
	return false;
}

static inline void load_trace_idt(void)
{
}
#endif

580
/*
581
 * The load_current_idt() must be called with interrupts disabled
582
 * to avoid races. That way the IDT will always be set back to the expected
583 584 585
 * descriptor. It's also called when a CPU is being initialized, and
 * that doesn't need to disable interrupts, as nothing should be
 * bothering the CPU then.
586 587 588 589 590
 */
static inline void load_current_idt(void)
{
	if (is_debug_idt_enabled())
		load_debug_idt();
591 592
	else if (is_trace_idt_enabled())
		load_trace_idt();
593 594 595
	else
		load_idt((const struct desc_ptr *)&idt_descr);
}
H
H. Peter Anvin 已提交
596
#endif /* _ASM_X86_DESC_H */