desc.h 13.4 KB
Newer Older
H
H. Peter Anvin 已提交
1 2
#ifndef _ASM_X86_DESC_H
#define _ASM_X86_DESC_H
3 4 5

#include <asm/desc_defs.h>
#include <asm/ldt.h>
6
#include <asm/mmu.h>
I
Ingo Molnar 已提交
7

8
#include <linux/smp.h>
9
#include <linux/percpu.h>
10

I
Ingo Molnar 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
{
	desc->limit0		= info->limit & 0x0ffff;

	desc->base0		= (info->base_addr & 0x0000ffff);
	desc->base1		= (info->base_addr & 0x00ff0000) >> 16;

	desc->type		= (info->read_exec_only ^ 1) << 1;
	desc->type	       |= info->contents << 2;

	desc->s			= 1;
	desc->dpl		= 0x3;
	desc->p			= info->seg_not_present ^ 1;
	desc->limit		= (info->limit & 0xf0000) >> 16;
	desc->avl		= info->useable;
	desc->d			= info->seg_32bit;
	desc->g			= info->limit_in_pages;

	desc->base2		= (info->base_addr & 0xff000000) >> 24;
30
	/*
31 32
	 * Don't allow setting of the lm bit. It would confuse
	 * user_64bit_mode and would get overridden by sysret anyway.
33
	 */
I
Ingo Molnar 已提交
34
	desc->l			= 0;
35 36
}

37 38
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
39
extern const struct desc_ptr debug_idt_descr;
40
extern gate_desc debug_idt_table[];
41

42 43 44
struct gdt_page {
	struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
I
Ingo Molnar 已提交
45

46
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
47 48 49 50 51 52

static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
	return per_cpu(gdt_page, cpu).gdt;
}

53
#ifdef CONFIG_X86_64
54 55 56 57

static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
			     unsigned dpl, unsigned ist, unsigned seg)
{
I
Ingo Molnar 已提交
58 59 60 61 62 63 64 65 66 67
	gate->offset_low	= PTR_LOW(func);
	gate->segment		= __KERNEL_CS;
	gate->ist		= ist;
	gate->p			= 1;
	gate->dpl		= dpl;
	gate->zero0		= 0;
	gate->zero1		= 0;
	gate->type		= type;
	gate->offset_middle	= PTR_MIDDLE(func);
	gate->offset_high	= PTR_HIGH(func);
68 69
}

70
#else
71
static inline void pack_gate(gate_desc *gate, unsigned char type,
72 73
			     unsigned long base, unsigned dpl, unsigned flags,
			     unsigned short seg)
74 75
{
	gate->a = (seg << 16) | (base & 0xffff);
I
Ingo Molnar 已提交
76
	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
77 78
}

79 80
#endif

81 82 83
static inline int desc_empty(const void *ptr)
{
	const u32 *desc = ptr;
I
Ingo Molnar 已提交
84

85 86 87
	return !(desc[0] | desc[1]);
}

88 89 90
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
I
Ingo Molnar 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
#define load_TR_desc()				native_load_tr_desc()
#define load_gdt(dtr)				native_load_gdt(dtr)
#define load_idt(dtr)				native_load_idt(dtr)
#define load_tr(tr)				asm volatile("ltr %0"::"m" (tr))
#define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))

#define store_gdt(dtr)				native_store_gdt(dtr)
#define store_idt(dtr)				native_store_idt(dtr)
#define store_tr(tr)				(tr = native_store_tr())

#define load_TLS(t, cpu)			native_load_tls(t, cpu)
#define set_ldt					native_set_ldt

#define write_ldt_entry(dt, entry, desc)	native_write_ldt_entry(dt, entry, desc)
#define write_gdt_entry(dt, entry, desc, type)	native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g)		native_write_idt_entry(dt, entry, g)
107 108 109 110 111 112 113 114 115

static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
}

static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
#endif	/* CONFIG_PARAVIRT */
116

117 118
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))

I
Ingo Molnar 已提交
119
static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
120 121 122 123
{
	memcpy(&idt[entry], gate, sizeof(*gate));
}

I
Ingo Molnar 已提交
124
static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
125 126 127 128
{
	memcpy(&ldt[entry], desc, 8);
}

I
Ingo Molnar 已提交
129 130
static inline void
native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
131 132
{
	unsigned int size;
I
Ingo Molnar 已提交
133

134
	switch (type) {
I
Ingo Molnar 已提交
135 136 137
	case DESC_TSS:	size = sizeof(tss_desc);	break;
	case DESC_LDT:	size = sizeof(ldt_desc);	break;
	default:	size = sizeof(*gdt);		break;
138
	}
I
Ingo Molnar 已提交
139

140 141 142 143 144 145 146 147 148
	memcpy(&gdt[entry], desc, size);
}

static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
				   unsigned long limit, unsigned char type,
				   unsigned char flags)
{
	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
149 150
		(limit & 0x000f0000) | ((type & 0xff) << 8) |
		((flags & 0xf) << 20);
151 152 153 154
	desc->p = 1;
}


I
Ingo Molnar 已提交
155
static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
156 157
{
#ifdef CONFIG_X86_64
158
	struct ldttss_desc64 *desc = d;
I
Ingo Molnar 已提交
159

160
	memset(desc, 0, sizeof(*desc));
I
Ingo Molnar 已提交
161 162 163 164 165 166 167 168 169

	desc->limit0		= size & 0xFFFF;
	desc->base0		= PTR_LOW(addr);
	desc->base1		= PTR_MIDDLE(addr) & 0xFF;
	desc->type		= type;
	desc->p			= 1;
	desc->limit1		= (size >> 16) & 0xF;
	desc->base2		= (PTR_MIDDLE(addr) >> 8) & 0xFF;
	desc->base3		= PTR_HIGH(addr);
170
#else
171
	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
172 173 174 175 176 177 178 179
#endif
}

static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
	struct desc_struct *d = get_cpu_gdt_table(cpu);
	tss_desc tss;

180
	set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
181
			      __KERNEL_TSS_LIMIT);
182 183 184 185 186
	write_gdt_entry(d, entry, &tss, DESC_TSS);
}

#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)

187 188 189
static inline void native_set_ldt(const void *addr, unsigned int entries)
{
	if (likely(entries == 0))
190
		asm volatile("lldt %w0"::"q" (0));
191 192 193 194
	else {
		unsigned cpu = smp_processor_id();
		ldt_desc ldt;

M
Michael Karcher 已提交
195 196
		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
				      entries * LDT_ENTRY_SIZE - 1);
197 198
		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
				&ldt, DESC_LDT);
199
		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
200 201 202 203 204 205 206 207
	}
}

static inline void native_load_tr_desc(void)
{
	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}

208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
static inline void force_reload_TR(void)
{
	struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
	tss_desc tss;

	memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));

	/*
	 * LTR requires an available TSS, and the TSS is currently
	 * busy.  Make it be available so that LTR will work.
	 */
	tss.type = DESC_TSS;
	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);

	load_TR_desc();
}

DECLARE_PER_CPU(bool, need_tr_refresh);

static inline void refresh_TR(void)
{
	DEBUG_LOCKS_WARN_ON(preemptible());

	if (unlikely(this_cpu_read(need_tr_refresh))) {
		force_reload_TR();
		this_cpu_write(need_tr_refresh, false);
	}
}

/*
 * If you do something evil that corrupts the cached TSS limit (I'm looking
 * at you, VMX exits), call this function.
 *
 * The optimization here is that the TSS limit only matters for Linux if the
 * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
 * everything works except that IO bitmap will be ignored and all CPL 3 IO
 * instructions will #GP, which is exactly what we want for normal tasks.
 */
static inline void invalidate_tss_limit(void)
{
	DEBUG_LOCKS_WARN_ON(preemptible());

	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
		force_reload_TR();
	else
		this_cpu_write(need_tr_refresh, true);
}

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
static inline void native_load_gdt(const struct desc_ptr *dtr)
{
	asm volatile("lgdt %0"::"m" (*dtr));
}

static inline void native_load_idt(const struct desc_ptr *dtr)
{
	asm volatile("lidt %0"::"m" (*dtr));
}

static inline void native_store_gdt(struct desc_ptr *dtr)
{
	asm volatile("sgdt %0":"=m" (*dtr));
}

static inline void native_store_idt(struct desc_ptr *dtr)
{
	asm volatile("sidt %0":"=m" (*dtr));
}

static inline unsigned long native_store_tr(void)
{
	unsigned long tr;
I
Ingo Molnar 已提交
279

280
	asm volatile("str %0":"=r" (tr));
I
Ingo Molnar 已提交
281

282 283 284 285 286 287
	return tr;
}

static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
{
	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
I
Ingo Molnar 已提交
288
	unsigned int i;
289 290 291 292 293

	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
}

294 295
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info)					\
296 297 298 299 300 301 302 303
	((info)->base_addr		== 0	&&	\
	 (info)->limit			== 0	&&	\
	 (info)->contents		== 0	&&	\
	 (info)->read_exec_only		== 1	&&	\
	 (info)->seg_32bit		== 0	&&	\
	 (info)->limit_in_pages		== 0	&&	\
	 (info)->seg_not_present	== 1	&&	\
	 (info)->useable		== 0)
304

305 306 307 308 309 310 311 312 313 314 315 316 317
/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
static inline bool LDT_zero(const struct user_desc *info)
{
	return (info->base_addr		== 0 &&
		info->limit		== 0 &&
		info->contents		== 0 &&
		info->read_exec_only	== 0 &&
		info->seg_32bit		== 0 &&
		info->limit_in_pages	== 0 &&
		info->seg_not_present	== 0 &&
		info->useable		== 0);
}

318 319 320 321 322
static inline void clear_LDT(void)
{
	set_ldt(NULL, 0);
}

323
static inline unsigned long get_desc_base(const struct desc_struct *desc)
324
{
325
	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
326
}
327

328 329 330 331 332 333 334
static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
{
	desc->base0 = base & 0xffff;
	desc->base1 = (base >> 16) & 0xff;
	desc->base2 = (base >> 24) & 0xff;
}

335 336 337 338 339
static inline unsigned long get_desc_limit(const struct desc_struct *desc)
{
	return desc->limit0 | (desc->limit << 16);
}

340 341 342 343 344 345
static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
{
	desc->limit0 = limit & 0xffff;
	desc->limit = (limit >> 16) & 0xf;
}

346 347 348 349 350 351
#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
	gate_desc s;

	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
352
	write_idt_entry(debug_idt_table, gate, &s);
353 354 355
}
#endif

356 357 358 359 360 361 362
#ifdef CONFIG_TRACING
extern struct desc_ptr trace_idt_descr;
extern gate_desc trace_idt_table[];
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
	write_idt_entry(trace_idt_table, entry, gate);
}
363 364 365 366 367 368 369 370 371 372 373 374 375

static inline void _trace_set_gate(int gate, unsigned type, void *addr,
				   unsigned dpl, unsigned ist, unsigned seg)
{
	gate_desc s;

	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
	/*
	 * does not need to be atomic because it is only done once at
	 * setup time
	 */
	write_trace_idt_entry(gate, &s);
}
376 377 378 379
#else
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
}
380 381

#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
382 383
#endif

384
static inline void _set_gate(int gate, unsigned type, void *addr,
385
			     unsigned dpl, unsigned ist, unsigned seg)
386 387
{
	gate_desc s;
I
Ingo Molnar 已提交
388

389 390 391 392 393 394
	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
	/*
	 * does not need to be atomic because it is only done once at
	 * setup time
	 */
	write_idt_entry(idt_table, gate, &s);
395
	write_trace_idt_entry(gate, &s);
396 397 398 399 400 401 402 403
}

/*
 * This needs to use 'idt_table' rather than 'idt', and
 * thus use the _nonmapped_ version of the IDT, as the
 * Pentium F0 0F bugfix can have resulted in the mapped
 * IDT being write-protected.
 */
404
#define set_intr_gate_notrace(n, addr)					\
405 406 407 408
	do {								\
		BUG_ON((unsigned)n > 0xFF);				\
		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
			  __KERNEL_CS);					\
409 410 411 412 413
	} while (0)

#define set_intr_gate(n, addr)						\
	do {								\
		set_intr_gate_notrace(n, addr);				\
414 415 416
		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
				0, 0, __KERNEL_CS);			\
	} while (0)
417

418
extern int first_system_vector;
419 420
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors[];
421 422 423

static inline void alloc_system_vector(int vector)
{
424 425
	if (!test_bit(vector, used_vectors)) {
		set_bit(vector, used_vectors);
426 427
		if (first_system_vector > vector)
			first_system_vector = vector;
I
Ingo Molnar 已提交
428
	} else {
429
		BUG();
I
Ingo Molnar 已提交
430
	}
431 432
}

433 434 435
#define alloc_intr_gate(n, addr)				\
	do {							\
		alloc_system_vector(n);				\
436
		set_intr_gate(n, addr);				\
437 438
	} while (0)

439 440 441 442 443 444 445 446 447
/*
 * This routine sets up an interrupt gate at directory privilege level 3.
 */
static inline void set_system_intr_gate(unsigned int n, void *addr)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
}

448
static inline void set_system_trap_gate(unsigned int n, void *addr)
449 450
{
	BUG_ON((unsigned)n > 0xFF);
451
	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
452 453
}

454
static inline void set_trap_gate(unsigned int n, void *addr)
455 456
{
	BUG_ON((unsigned)n > 0xFF);
457
	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
458 459 460 461 462 463 464 465 466 467 468 469 470 471
}

static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
}

static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
}

472
static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
473 474 475 476
{
	BUG_ON((unsigned)n > 0xFF);
	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u32, debug_idt_ctr);
static inline bool is_debug_idt_enabled(void)
{
	if (this_cpu_read(debug_idt_ctr))
		return true;

	return false;
}

static inline void load_debug_idt(void)
{
	load_idt((const struct desc_ptr *)&debug_idt_descr);
}
#else
static inline bool is_debug_idt_enabled(void)
{
	return false;
}

static inline void load_debug_idt(void)
{
}
#endif

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
#ifdef CONFIG_TRACING
extern atomic_t trace_idt_ctr;
static inline bool is_trace_idt_enabled(void)
{
	if (atomic_read(&trace_idt_ctr))
		return true;

	return false;
}

static inline void load_trace_idt(void)
{
	load_idt((const struct desc_ptr *)&trace_idt_descr);
}
#else
static inline bool is_trace_idt_enabled(void)
{
	return false;
}

static inline void load_trace_idt(void)
{
}
#endif

528
/*
529
 * The load_current_idt() must be called with interrupts disabled
530
 * to avoid races. That way the IDT will always be set back to the expected
531 532 533
 * descriptor. It's also called when a CPU is being initialized, and
 * that doesn't need to disable interrupts, as nothing should be
 * bothering the CPU then.
534 535 536 537 538
 */
static inline void load_current_idt(void)
{
	if (is_debug_idt_enabled())
		load_debug_idt();
539 540
	else if (is_trace_idt_enabled())
		load_trace_idt();
541 542 543
	else
		load_idt((const struct desc_ptr *)&idt_descr);
}
H
H. Peter Anvin 已提交
544
#endif /* _ASM_X86_DESC_H */