tlb.c 11.7 KB
Newer Older
J
Jeff Dike 已提交
1
/*
J
Jeff Dike 已提交
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
L
Linus Torvalds 已提交
3 4 5
 * Licensed under the GPL
 */

J
Jeff Dike 已提交
6 7 8 9
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
J
Jeff Dike 已提交
10
#include "as-layout.h"
L
Linus Torvalds 已提交
11 12
#include "mem_user.h"
#include "os.h"
13
#include "skas.h"
L
Linus Torvalds 已提交
14

J
Jeff Dike 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
struct host_vm_change {
	struct host_vm_op {
		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
		union {
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
				int fd;
				__u64 offset;
			} mmap;
			struct {
				unsigned long addr;
				unsigned long len;
			} munmap;
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
			} mprotect;
		} u;
	} ops[1];
	int index;
	struct mm_id *id;
	void *data;
	int force;
};

#define INIT_HVC(mm, force) \
	((struct host_vm_change) \
	 { .ops		= { { .type = NONE } },	\
	   .id		= &mm->context.id, \
       	   .data	= NULL, \
	   .index	= 0, \
	   .force	= force })

static int do_ops(struct host_vm_change *hvc, int end,
		  int finished)
{
	struct host_vm_op *op;
	int i, ret = 0;

	for (i = 0; i < end && !ret; i++) {
		op = &hvc->ops[i];
J
Jeff Dike 已提交
59
		switch (op->type) {
J
Jeff Dike 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
		case MMAP:
			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
				  op->u.mmap.prot, op->u.mmap.fd,
				  op->u.mmap.offset, finished, &hvc->data);
			break;
		case MUNMAP:
			ret = unmap(hvc->id, op->u.munmap.addr,
				    op->u.munmap.len, finished, &hvc->data);
			break;
		case MPROTECT:
			ret = protect(hvc->id, op->u.mprotect.addr,
				      op->u.mprotect.len, op->u.mprotect.prot,
				      finished, &hvc->data);
			break;
		default:
			printk(KERN_ERR "Unknown op type %d in do_ops\n",
			       op->type);
			break;
		}
	}

	return ret;
}

J
Jeff Dike 已提交
84
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
J
Jeff Dike 已提交
85
		    unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
86
{
J
Jeff Dike 已提交
87
	__u64 offset;
J
Jeff Dike 已提交
88
	struct host_vm_op *last;
89
	int fd, ret = 0;
J
Jeff Dike 已提交
90 91

	fd = phys_mapping(phys, &offset);
J
Jeff Dike 已提交
92 93
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
94
		if ((last->type == MMAP) &&
J
Jeff Dike 已提交
95
		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
J
Jeff Dike 已提交
96
		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
J
Jeff Dike 已提交
97
		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
J
Jeff Dike 已提交
98
			last->u.mmap.len += len;
99
			return 0;
J
Jeff Dike 已提交
100 101 102
		}
	}

J
Jeff Dike 已提交
103 104 105
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
106 107
	}

J
Jeff Dike 已提交
108 109 110 111 112 113 114
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MMAP,
				    .u = { .mmap = { .addr	= virt,
						     .len	= len,
						     .prot	= prot,
						     .fd	= fd,
						     .offset	= offset }
115 116
			   } });
	return ret;
J
Jeff Dike 已提交
117 118 119
}

static int add_munmap(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
120
		      struct host_vm_change *hvc)
J
Jeff Dike 已提交
121 122
{
	struct host_vm_op *last;
123
	int ret = 0;
J
Jeff Dike 已提交
124

J
Jeff Dike 已提交
125 126
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
127 128
		if ((last->type == MUNMAP) &&
		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
J
Jeff Dike 已提交
129
			last->u.munmap.len += len;
130
			return 0;
J
Jeff Dike 已提交
131 132 133
		}
	}

J
Jeff Dike 已提交
134 135 136
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
137 138
	}

J
Jeff Dike 已提交
139 140 141 142
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MUNMAP,
			     	    .u = { .munmap = { .addr	= addr,
						       .len	= len } } });
143
	return ret;
J
Jeff Dike 已提交
144 145
}

J
Jeff Dike 已提交
146
static int add_mprotect(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
147
			unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
148 149
{
	struct host_vm_op *last;
150
	int ret = 0;
J
Jeff Dike 已提交
151

J
Jeff Dike 已提交
152 153
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
154
		if ((last->type == MPROTECT) &&
J
Jeff Dike 已提交
155
		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
J
Jeff Dike 已提交
156
		   (last->u.mprotect.prot == prot)) {
J
Jeff Dike 已提交
157
			last->u.mprotect.len += len;
158
			return 0;
J
Jeff Dike 已提交
159 160 161
		}
	}

J
Jeff Dike 已提交
162 163 164
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
165 166
	}

J
Jeff Dike 已提交
167 168 169 170 171
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MPROTECT,
			     	    .u = { .mprotect = { .addr	= addr,
							 .len	= len,
							 .prot	= prot } } });
172
	return ret;
J
Jeff Dike 已提交
173 174
}

L
Linus Torvalds 已提交
175 176
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))

J
Jeff Dike 已提交
177
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
J
Jeff Dike 已提交
178 179
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
180 181
{
	pte_t *pte;
J
Jeff Dike 已提交
182
	int r, w, x, prot, ret = 0;
J
Jeff Dike 已提交
183 184 185

	pte = pte_offset_kernel(pmd, addr);
	do {
J
Jeff Dike 已提交
186 187 188
		if ((addr >= STUB_START) && (addr < STUB_END))
			continue;

J
Jeff Dike 已提交
189 190 191 192 193 194
		r = pte_read(*pte);
		w = pte_write(*pte);
		x = pte_exec(*pte);
		if (!pte_young(*pte)) {
			r = 0;
			w = 0;
J
Jeff Dike 已提交
195
		} else if (!pte_dirty(*pte))
J
Jeff Dike 已提交
196
			w = 0;
J
Jeff Dike 已提交
197

J
Jeff Dike 已提交
198 199
		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
			(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
200
		if (hvc->force || pte_newpage(*pte)) {
J
Jeff Dike 已提交
201
			if (pte_present(*pte))
J
Jeff Dike 已提交
202
				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
J
Jeff Dike 已提交
203
					       PAGE_SIZE, prot, hvc);
J
Jeff Dike 已提交
204 205 206
			else
				ret = add_munmap(addr, PAGE_SIZE, hvc);
		} else if (pte_newprot(*pte))
J
Jeff Dike 已提交
207
			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
J
Jeff Dike 已提交
208
		*pte = pte_mkuptodate(*pte);
J
Jeff Dike 已提交
209
	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
J
Jeff Dike 已提交
210 211 212 213
	return ret;
}

static inline int update_pmd_range(pud_t *pud, unsigned long addr,
J
Jeff Dike 已提交
214 215
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
216 217 218 219 220 221 222 223
{
	pmd_t *pmd;
	unsigned long next;
	int ret = 0;

	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
J
Jeff Dike 已提交
224
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
225 226
			if (hvc->force || pmd_newpage(*pmd)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
227 228 229
				pmd_mkuptodate(*pmd);
			}
		}
J
Jeff Dike 已提交
230
		else ret = update_pte_range(pmd, addr, next, hvc);
J
Jeff Dike 已提交
231
	} while (pmd++, addr = next, ((addr < end) && !ret));
J
Jeff Dike 已提交
232 233 234 235
	return ret;
}

static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
J
Jeff Dike 已提交
236 237
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
238 239 240 241 242 243 244 245
{
	pud_t *pud;
	unsigned long next;
	int ret = 0;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
J
Jeff Dike 已提交
246
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
247 248
			if (hvc->force || pud_newpage(*pud)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
249 250 251
				pud_mkuptodate(*pud);
			}
		}
J
Jeff Dike 已提交
252
		else ret = update_pmd_range(pud, addr, next, hvc);
J
Jeff Dike 已提交
253
	} while (pud++, addr = next, ((addr < end) && !ret));
J
Jeff Dike 已提交
254 255 256
	return ret;
}

L
Linus Torvalds 已提交
257
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
J
Jeff Dike 已提交
258
		      unsigned long end_addr, int force)
L
Linus Torvalds 已提交
259
{
J
Jeff Dike 已提交
260
	pgd_t *pgd;
J
Jeff Dike 已提交
261
	struct host_vm_change hvc;
J
Jeff Dike 已提交
262
	unsigned long addr = start_addr, next;
J
Jeff Dike 已提交
263
	int ret = 0;
L
Linus Torvalds 已提交
264

J
Jeff Dike 已提交
265
	hvc = INIT_HVC(mm, force);
J
Jeff Dike 已提交
266 267 268
	pgd = pgd_offset(mm, addr);
	do {
		next = pgd_addr_end(addr, end_addr);
J
Jeff Dike 已提交
269 270
		if (!pgd_present(*pgd)) {
			if (force || pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
271
				ret = add_munmap(addr, next - addr, &hvc);
J
Jeff Dike 已提交
272
				pgd_mkuptodate(*pgd);
J
Jeff Dike 已提交
273 274
			}
		}
J
Jeff Dike 已提交
275
		else ret = update_pud_range(pgd, addr, next, &hvc);
J
Jeff Dike 已提交
276
	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
J
Jeff Dike 已提交
277

J
Jeff Dike 已提交
278
	if (!ret)
J
Jeff Dike 已提交
279
		ret = do_ops(&hvc, hvc.index, 1);
280

J
Jeff Dike 已提交
281
	/* This is not an else because ret is modified above */
J
Jeff Dike 已提交
282 283 284
	if (ret) {
		printk(KERN_ERR "fix_range_common: failed, killing current "
		       "process\n");
285 286
		force_sig(SIGKILL, current);
	}
L
Linus Torvalds 已提交
287 288
}

289
static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
L
Linus Torvalds 已提交
290
{
J
Jeff Dike 已提交
291 292 293 294 295 296 297 298 299
	struct mm_struct *mm;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned long addr, last;
	int updated = 0, err;

	mm = &init_mm;
J
Jeff Dike 已提交
300
	for (addr = start; addr < end;) {
J
Jeff Dike 已提交
301
		pgd = pgd_offset(mm, addr);
J
Jeff Dike 已提交
302
		if (!pgd_present(*pgd)) {
J
Jeff Dike 已提交
303
			last = ADD_ROUND(addr, PGDIR_SIZE);
J
Jeff Dike 已提交
304
			if (last > end)
J
Jeff Dike 已提交
305
				last = end;
J
Jeff Dike 已提交
306
			if (pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
307 308 309
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
310
				if (err < 0)
J
Jeff Dike 已提交
311 312 313 314 315 316 317 318
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pud = pud_offset(pgd, addr);
J
Jeff Dike 已提交
319
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
320
			last = ADD_ROUND(addr, PUD_SIZE);
J
Jeff Dike 已提交
321
			if (last > end)
J
Jeff Dike 已提交
322
				last = end;
J
Jeff Dike 已提交
323
			if (pud_newpage(*pud)) {
J
Jeff Dike 已提交
324 325 326
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
327
				if (err < 0)
J
Jeff Dike 已提交
328 329 330 331 332 333 334 335
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pmd = pmd_offset(pud, addr);
J
Jeff Dike 已提交
336
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
337
			last = ADD_ROUND(addr, PMD_SIZE);
J
Jeff Dike 已提交
338
			if (last > end)
J
Jeff Dike 已提交
339
				last = end;
J
Jeff Dike 已提交
340
			if (pmd_newpage(*pmd)) {
J
Jeff Dike 已提交
341 342 343
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
344
				if (err < 0)
J
Jeff Dike 已提交
345 346 347 348 349 350 351 352
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pte = pte_offset_kernel(pmd, addr);
J
Jeff Dike 已提交
353
		if (!pte_present(*pte) || pte_newpage(*pte)) {
J
Jeff Dike 已提交
354 355 356
			updated = 1;
			err = os_unmap_memory((void *) addr,
					      PAGE_SIZE);
J
Jeff Dike 已提交
357
			if (err < 0)
J
Jeff Dike 已提交
358 359
				panic("munmap failed, errno = %d\n",
				      -err);
J
Jeff Dike 已提交
360
			if (pte_present(*pte))
J
Jeff Dike 已提交
361 362 363 364
				map_memory(addr,
					   pte_val(*pte) & PAGE_MASK,
					   PAGE_SIZE, 1, 1, 1);
		}
J
Jeff Dike 已提交
365
		else if (pte_newprot(*pte)) {
J
Jeff Dike 已提交
366 367 368 369 370
			updated = 1;
			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
		}
		addr += PAGE_SIZE;
	}
J
Jeff Dike 已提交
371
	return updated;
L
Linus Torvalds 已提交
372 373
}

374 375 376 377 378 379 380 381 382 383 384 385 386
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	struct mm_struct *mm = vma->vm_mm;
	void *flush = NULL;
	int r, w, x, prot, err = 0;
	struct mm_id *mm_id;

	address &= PAGE_MASK;
	pgd = pgd_offset(mm, address);
J
Jeff Dike 已提交
387
	if (!pgd_present(*pgd))
388 389 390
		goto kill;

	pud = pud_offset(pgd, address);
J
Jeff Dike 已提交
391
	if (!pud_present(*pud))
392 393 394
		goto kill;

	pmd = pmd_offset(pud, address);
J
Jeff Dike 已提交
395
	if (!pmd_present(*pmd))
396 397 398 399 400 401 402 403 404 405 406 407 408 409
		goto kill;

	pte = pte_offset_kernel(pmd, address);

	r = pte_read(*pte);
	w = pte_write(*pte);
	x = pte_exec(*pte);
	if (!pte_young(*pte)) {
		r = 0;
		w = 0;
	} else if (!pte_dirty(*pte)) {
		w = 0;
	}

410
	mm_id = &mm->context.id;
411 412
	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
		(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
413 414
	if (pte_newpage(*pte)) {
		if (pte_present(*pte)) {
415 416 417 418 419 420 421 422 423
			unsigned long long offset;
			int fd;

			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
				  1, &flush);
		}
		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
	}
J
Jeff Dike 已提交
424
	else if (pte_newprot(*pte))
425 426
		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);

J
Jeff Dike 已提交
427
	if (err)
428 429 430 431 432 433 434
		goto kill;

	*pte = pte_mkuptodate(*pte);

	return;

kill:
J
Jeff Dike 已提交
435
	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
436 437 438
	force_sig(SIGKILL, current);
}

L
Linus Torvalds 已提交
439 440
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
{
J
Jeff Dike 已提交
441
	return pgd_offset(mm, address);
L
Linus Torvalds 已提交
442 443 444 445
}

pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
{
J
Jeff Dike 已提交
446
	return pud_offset(pgd, address);
L
Linus Torvalds 已提交
447 448 449 450
}

pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
{
J
Jeff Dike 已提交
451
	return pmd_offset(pud, address);
L
Linus Torvalds 已提交
452 453 454 455
}

pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
{
J
Jeff Dike 已提交
456
	return pte_offset_kernel(pmd, address);
L
Linus Torvalds 已提交
457 458 459 460
}

pte_t *addr_pte(struct task_struct *task, unsigned long addr)
{
J
Jeff Dike 已提交
461 462 463
	pgd_t *pgd = pgd_offset(task->mm, addr);
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);
L
Linus Torvalds 已提交
464

J
Jeff Dike 已提交
465
	return pte_offset_map(pmd, addr);
L
Linus Torvalds 已提交
466 467
}

468 469
void flush_tlb_all(void)
{
J
Jeff Dike 已提交
470
	flush_tlb_mm(current->mm);
471 472 473 474
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
J
Jeff Dike 已提交
475
	flush_tlb_kernel_range_common(start, end);
476 477 478 479
}

void flush_tlb_kernel_vm(void)
{
J
Jeff Dike 已提交
480
	flush_tlb_kernel_range_common(start_vm, end_vm);
481 482 483 484
}

void __flush_tlb_one(unsigned long addr)
{
J
Jeff Dike 已提交
485
	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
486 487 488 489 490
}

static void fix_range(struct mm_struct *mm, unsigned long start_addr,
		      unsigned long end_addr, int force)
{
J
Jeff Dike 已提交
491
	fix_range_common(mm, start_addr, end_addr, force);
492 493 494 495 496
}

void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
		     unsigned long end)
{
J
Jeff Dike 已提交
497 498 499
	if (vma->vm_mm == NULL)
		flush_tlb_kernel_range_common(start, end);
	else fix_range(vma->vm_mm, start, end, 0);
500 501
}

J
Jeff Dike 已提交
502 503
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
			unsigned long end)
504
{
J
Jeff Dike 已提交
505 506 507 508 509 510
	/*
	 * Don't bother flushing if this address space is about to be
	 * destroyed.
	 */
	if (atomic_read(&mm->mm_users) == 0)
		return;
511

J
Jeff Dike 已提交
512 513 514 515 516 517 518 519 520 521 522
	fix_range(mm, start, end, 0);
}

void flush_tlb_mm(struct mm_struct *mm)
{
	struct vm_area_struct *vma = mm->mmap;

	while (vma != NULL) {
		fix_range(mm, vma->vm_start, vma->vm_end, 0);
		vma = vma->vm_next;
	}
523 524 525 526
}

void force_flush_all(void)
{
527 528 529
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma = mm->mmap;

J
Jeff Dike 已提交
530
	while (vma != NULL) {
531 532 533
		fix_range(mm, vma->vm_start, vma->vm_end, 1);
		vma = vma->vm_next;
	}
534
}