tlb.c 11.5 KB
Newer Older
J
Jeff Dike 已提交
1
/*
J
Jeff Dike 已提交
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
L
Linus Torvalds 已提交
3 4 5
 * Licensed under the GPL
 */

J
Jeff Dike 已提交
6 7 8 9
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
J
Jeff Dike 已提交
10
#include "as-layout.h"
L
Linus Torvalds 已提交
11 12
#include "mem_user.h"
#include "os.h"
13
#include "skas.h"
J
Jeff Dike 已提交
14
#include "tlb.h"
L
Linus Torvalds 已提交
15

J
Jeff Dike 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
struct host_vm_change {
	struct host_vm_op {
		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
		union {
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
				int fd;
				__u64 offset;
			} mmap;
			struct {
				unsigned long addr;
				unsigned long len;
			} munmap;
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
			} mprotect;
		} u;
	} ops[1];
	int index;
	struct mm_id *id;
	void *data;
	int force;
};

#define INIT_HVC(mm, force) \
	((struct host_vm_change) \
	 { .ops		= { { .type = NONE } },	\
	   .id		= &mm->context.id, \
       	   .data	= NULL, \
	   .index	= 0, \
	   .force	= force })

static int do_ops(struct host_vm_change *hvc, int end,
		  int finished)
{
	struct host_vm_op *op;
	int i, ret = 0;

	for (i = 0; i < end && !ret; i++) {
		op = &hvc->ops[i];
		switch(op->type) {
		case MMAP:
			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
				  op->u.mmap.prot, op->u.mmap.fd,
				  op->u.mmap.offset, finished, &hvc->data);
			break;
		case MUNMAP:
			ret = unmap(hvc->id, op->u.munmap.addr,
				    op->u.munmap.len, finished, &hvc->data);
			break;
		case MPROTECT:
			ret = protect(hvc->id, op->u.mprotect.addr,
				      op->u.mprotect.len, op->u.mprotect.prot,
				      finished, &hvc->data);
			break;
		default:
			printk(KERN_ERR "Unknown op type %d in do_ops\n",
			       op->type);
			break;
		}
	}

	return ret;
}

J
Jeff Dike 已提交
85
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
J
Jeff Dike 已提交
86
		    unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
87
{
J
Jeff Dike 已提交
88
	__u64 offset;
J
Jeff Dike 已提交
89
	struct host_vm_op *last;
90
	int fd, ret = 0;
J
Jeff Dike 已提交
91 92

	fd = phys_mapping(phys, &offset);
J
Jeff Dike 已提交
93 94
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
95
		if ((last->type == MMAP) &&
J
Jeff Dike 已提交
96
		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
J
Jeff Dike 已提交
97
		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
J
Jeff Dike 已提交
98
		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
J
Jeff Dike 已提交
99
			last->u.mmap.len += len;
100
			return 0;
J
Jeff Dike 已提交
101 102 103
		}
	}

J
Jeff Dike 已提交
104 105 106
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
107 108
	}

J
Jeff Dike 已提交
109 110 111 112 113 114 115
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MMAP,
				    .u = { .mmap = { .addr	= virt,
						     .len	= len,
						     .prot	= prot,
						     .fd	= fd,
						     .offset	= offset }
116 117
			   } });
	return ret;
J
Jeff Dike 已提交
118 119 120
}

static int add_munmap(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
121
		      struct host_vm_change *hvc)
J
Jeff Dike 已提交
122 123
{
	struct host_vm_op *last;
124
	int ret = 0;
J
Jeff Dike 已提交
125

J
Jeff Dike 已提交
126 127
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
128 129
		if ((last->type == MUNMAP) &&
		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
J
Jeff Dike 已提交
130
			last->u.munmap.len += len;
131
			return 0;
J
Jeff Dike 已提交
132 133 134
		}
	}

J
Jeff Dike 已提交
135 136 137
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
138 139
	}

J
Jeff Dike 已提交
140 141 142 143
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MUNMAP,
			     	    .u = { .munmap = { .addr	= addr,
						       .len	= len } } });
144
	return ret;
J
Jeff Dike 已提交
145 146
}

J
Jeff Dike 已提交
147
static int add_mprotect(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
148
			unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
149 150
{
	struct host_vm_op *last;
151
	int ret = 0;
J
Jeff Dike 已提交
152

J
Jeff Dike 已提交
153 154
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
155
		if ((last->type == MPROTECT) &&
J
Jeff Dike 已提交
156
		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
J
Jeff Dike 已提交
157
		   (last->u.mprotect.prot == prot)) {
J
Jeff Dike 已提交
158
			last->u.mprotect.len += len;
159
			return 0;
J
Jeff Dike 已提交
160 161 162
		}
	}

J
Jeff Dike 已提交
163 164 165
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
166 167
	}

J
Jeff Dike 已提交
168 169 170 171 172
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MPROTECT,
			     	    .u = { .mprotect = { .addr	= addr,
							 .len	= len,
							 .prot	= prot } } });
173
	return ret;
J
Jeff Dike 已提交
174 175
}

L
Linus Torvalds 已提交
176 177
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))

J
Jeff Dike 已提交
178
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
J
Jeff Dike 已提交
179 180
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
181 182
{
	pte_t *pte;
J
Jeff Dike 已提交
183
	int r, w, x, prot, ret = 0;
J
Jeff Dike 已提交
184 185 186 187 188 189 190 191 192 193 194 195

	pte = pte_offset_kernel(pmd, addr);
	do {
		r = pte_read(*pte);
		w = pte_write(*pte);
		x = pte_exec(*pte);
		if (!pte_young(*pte)) {
			r = 0;
			w = 0;
		} else if (!pte_dirty(*pte)) {
			w = 0;
		}
J
Jeff Dike 已提交
196 197
		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
			(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
198
		if (hvc->force || pte_newpage(*pte)) {
J
Jeff Dike 已提交
199
			if (pte_present(*pte))
J
Jeff Dike 已提交
200
				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
J
Jeff Dike 已提交
201 202
					       PAGE_SIZE, prot, hvc);
			else ret = add_munmap(addr, PAGE_SIZE, hvc);
J
Jeff Dike 已提交
203
		}
J
Jeff Dike 已提交
204
		else if (pte_newprot(*pte))
J
Jeff Dike 已提交
205
			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
J
Jeff Dike 已提交
206 207 208 209 210 211
		*pte = pte_mkuptodate(*pte);
	} while (pte++, addr += PAGE_SIZE, ((addr != end) && !ret));
	return ret;
}

static inline int update_pmd_range(pud_t *pud, unsigned long addr,
J
Jeff Dike 已提交
212 213
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
214 215 216 217 218 219 220 221
{
	pmd_t *pmd;
	unsigned long next;
	int ret = 0;

	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
J
Jeff Dike 已提交
222
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
223 224
			if (hvc->force || pmd_newpage(*pmd)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
225 226 227
				pmd_mkuptodate(*pmd);
			}
		}
J
Jeff Dike 已提交
228
		else ret = update_pte_range(pmd, addr, next, hvc);
J
Jeff Dike 已提交
229 230 231 232 233
	} while (pmd++, addr = next, ((addr != end) && !ret));
	return ret;
}

static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
J
Jeff Dike 已提交
234 235
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
236 237 238 239 240 241 242 243
{
	pud_t *pud;
	unsigned long next;
	int ret = 0;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
J
Jeff Dike 已提交
244
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
245 246
			if (hvc->force || pud_newpage(*pud)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
247 248 249
				pud_mkuptodate(*pud);
			}
		}
J
Jeff Dike 已提交
250
		else ret = update_pmd_range(pud, addr, next, hvc);
J
Jeff Dike 已提交
251 252 253 254
	} while (pud++, addr = next, ((addr != end) && !ret));
	return ret;
}

L
Linus Torvalds 已提交
255
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
J
Jeff Dike 已提交
256
		      unsigned long end_addr, int force)
L
Linus Torvalds 已提交
257
{
J
Jeff Dike 已提交
258
	pgd_t *pgd;
J
Jeff Dike 已提交
259
	struct host_vm_change hvc;
J
Jeff Dike 已提交
260
	unsigned long addr = start_addr, next;
J
Jeff Dike 已提交
261
	int ret = 0;
L
Linus Torvalds 已提交
262

J
Jeff Dike 已提交
263
	hvc = INIT_HVC(mm, force);
J
Jeff Dike 已提交
264 265 266
	pgd = pgd_offset(mm, addr);
	do {
		next = pgd_addr_end(addr, end_addr);
J
Jeff Dike 已提交
267 268
		if (!pgd_present(*pgd)) {
			if (force || pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
269
				ret = add_munmap(addr, next - addr, &hvc);
J
Jeff Dike 已提交
270
				pgd_mkuptodate(*pgd);
J
Jeff Dike 已提交
271 272
			}
		}
J
Jeff Dike 已提交
273
		else ret = update_pud_range(pgd, addr, next, &hvc);
J
Jeff Dike 已提交
274
	} while (pgd++, addr = next, ((addr != end_addr) && !ret));
J
Jeff Dike 已提交
275

J
Jeff Dike 已提交
276
	if (!ret)
J
Jeff Dike 已提交
277
		ret = do_ops(&hvc, hvc.index, 1);
278

J
Jeff Dike 已提交
279
	/* This is not an else because ret is modified above */
J
Jeff Dike 已提交
280 281 282
	if (ret) {
		printk(KERN_ERR "fix_range_common: failed, killing current "
		       "process\n");
283 284
		force_sig(SIGKILL, current);
	}
L
Linus Torvalds 已提交
285 286 287 288
}

int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
J
Jeff Dike 已提交
289 290 291 292 293 294 295 296 297
	struct mm_struct *mm;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned long addr, last;
	int updated = 0, err;

	mm = &init_mm;
J
Jeff Dike 已提交
298
	for (addr = start; addr < end;) {
J
Jeff Dike 已提交
299
		pgd = pgd_offset(mm, addr);
J
Jeff Dike 已提交
300
		if (!pgd_present(*pgd)) {
J
Jeff Dike 已提交
301
			last = ADD_ROUND(addr, PGDIR_SIZE);
J
Jeff Dike 已提交
302
			if (last > end)
J
Jeff Dike 已提交
303
				last = end;
J
Jeff Dike 已提交
304
			if (pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
305 306 307
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
308
				if (err < 0)
J
Jeff Dike 已提交
309 310 311 312 313 314 315 316
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pud = pud_offset(pgd, addr);
J
Jeff Dike 已提交
317
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
318
			last = ADD_ROUND(addr, PUD_SIZE);
J
Jeff Dike 已提交
319
			if (last > end)
J
Jeff Dike 已提交
320
				last = end;
J
Jeff Dike 已提交
321
			if (pud_newpage(*pud)) {
J
Jeff Dike 已提交
322 323 324
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
325
				if (err < 0)
J
Jeff Dike 已提交
326 327 328 329 330 331 332 333
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pmd = pmd_offset(pud, addr);
J
Jeff Dike 已提交
334
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
335
			last = ADD_ROUND(addr, PMD_SIZE);
J
Jeff Dike 已提交
336
			if (last > end)
J
Jeff Dike 已提交
337
				last = end;
J
Jeff Dike 已提交
338
			if (pmd_newpage(*pmd)) {
J
Jeff Dike 已提交
339 340 341
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
342
				if (err < 0)
J
Jeff Dike 已提交
343 344 345 346 347 348 349 350
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pte = pte_offset_kernel(pmd, addr);
J
Jeff Dike 已提交
351
		if (!pte_present(*pte) || pte_newpage(*pte)) {
J
Jeff Dike 已提交
352 353 354
			updated = 1;
			err = os_unmap_memory((void *) addr,
					      PAGE_SIZE);
J
Jeff Dike 已提交
355
			if (err < 0)
J
Jeff Dike 已提交
356 357
				panic("munmap failed, errno = %d\n",
				      -err);
J
Jeff Dike 已提交
358
			if (pte_present(*pte))
J
Jeff Dike 已提交
359 360 361 362
				map_memory(addr,
					   pte_val(*pte) & PAGE_MASK,
					   PAGE_SIZE, 1, 1, 1);
		}
J
Jeff Dike 已提交
363
		else if (pte_newprot(*pte)) {
J
Jeff Dike 已提交
364 365 366 367 368
			updated = 1;
			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
		}
		addr += PAGE_SIZE;
	}
J
Jeff Dike 已提交
369
	return updated;
L
Linus Torvalds 已提交
370 371
}

372 373 374 375 376 377 378 379 380 381 382 383 384
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	struct mm_struct *mm = vma->vm_mm;
	void *flush = NULL;
	int r, w, x, prot, err = 0;
	struct mm_id *mm_id;

	address &= PAGE_MASK;
	pgd = pgd_offset(mm, address);
J
Jeff Dike 已提交
385
	if (!pgd_present(*pgd))
386 387 388
		goto kill;

	pud = pud_offset(pgd, address);
J
Jeff Dike 已提交
389
	if (!pud_present(*pud))
390 391 392
		goto kill;

	pmd = pmd_offset(pud, address);
J
Jeff Dike 已提交
393
	if (!pmd_present(*pmd))
394 395 396 397 398 399 400 401 402 403 404 405 406 407
		goto kill;

	pte = pte_offset_kernel(pmd, address);

	r = pte_read(*pte);
	w = pte_write(*pte);
	x = pte_exec(*pte);
	if (!pte_young(*pte)) {
		r = 0;
		w = 0;
	} else if (!pte_dirty(*pte)) {
		w = 0;
	}

408
	mm_id = &mm->context.id;
409 410
	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
		(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
411 412
	if (pte_newpage(*pte)) {
		if (pte_present(*pte)) {
413 414 415 416 417 418 419 420 421
			unsigned long long offset;
			int fd;

			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
				  1, &flush);
		}
		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
	}
J
Jeff Dike 已提交
422
	else if (pte_newprot(*pte))
423 424
		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);

J
Jeff Dike 已提交
425
	if (err)
426 427 428 429 430 431 432
		goto kill;

	*pte = pte_mkuptodate(*pte);

	return;

kill:
J
Jeff Dike 已提交
433
	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
434 435 436
	force_sig(SIGKILL, current);
}

L
Linus Torvalds 已提交
437 438
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
{
J
Jeff Dike 已提交
439
	return pgd_offset(mm, address);
L
Linus Torvalds 已提交
440 441 442 443
}

pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
{
J
Jeff Dike 已提交
444
	return pud_offset(pgd, address);
L
Linus Torvalds 已提交
445 446 447 448
}

pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
{
J
Jeff Dike 已提交
449
	return pmd_offset(pud, address);
L
Linus Torvalds 已提交
450 451 452 453
}

pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
{
J
Jeff Dike 已提交
454
	return pte_offset_kernel(pmd, address);
L
Linus Torvalds 已提交
455 456 457 458
}

pte_t *addr_pte(struct task_struct *task, unsigned long addr)
{
J
Jeff Dike 已提交
459 460 461
	pgd_t *pgd = pgd_offset(task->mm, addr);
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);
L
Linus Torvalds 已提交
462

J
Jeff Dike 已提交
463
	return pte_offset_map(pmd, addr);
L
Linus Torvalds 已提交
464 465
}

466 467
void flush_tlb_all(void)
{
J
Jeff Dike 已提交
468
	flush_tlb_mm(current->mm);
469 470 471 472
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
J
Jeff Dike 已提交
473
	flush_tlb_kernel_range_common(start, end);
474 475 476 477
}

void flush_tlb_kernel_vm(void)
{
J
Jeff Dike 已提交
478
	flush_tlb_kernel_range_common(start_vm, end_vm);
479 480 481 482
}

void __flush_tlb_one(unsigned long addr)
{
J
Jeff Dike 已提交
483
	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
484 485 486 487 488
}

static void fix_range(struct mm_struct *mm, unsigned long start_addr,
		      unsigned long end_addr, int force)
{
J
Jeff Dike 已提交
489 490
	if (!proc_mm && (end_addr > STUB_START))
		end_addr = STUB_START;
491

J
Jeff Dike 已提交
492
	fix_range_common(mm, start_addr, end_addr, force);
493 494 495 496 497
}

void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
		     unsigned long end)
{
J
Jeff Dike 已提交
498 499 500
	if (vma->vm_mm == NULL)
		flush_tlb_kernel_range_common(start, end);
	else fix_range(vma->vm_mm, start, end, 0);
501 502 503 504
}

void flush_tlb_mm(struct mm_struct *mm)
{
505 506
	unsigned long end;

J
Jeff Dike 已提交
507 508 509 510 511 512
	/*
	 * Don't bother flushing if this address space is about to be
	 * destroyed.
	 */
	if (atomic_read(&mm->mm_users) == 0)
		return;
513

J
Jeff Dike 已提交
514
	end = proc_mm ? TASK_SIZE : STUB_START;
J
Jeff Dike 已提交
515
	fix_range(mm, 0, end, 0);
516 517 518 519
}

void force_flush_all(void)
{
520 521 522
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma = mm->mmap;

J
Jeff Dike 已提交
523
	while (vma != NULL) {
524 525 526
		fix_range(mm, vma->vm_start, vma->vm_end, 1);
		vma = vma->vm_next;
	}
527
}