tlb.c 11.7 KB
Newer Older
J
Jeff Dike 已提交
1
/*
J
Jeff Dike 已提交
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
L
Linus Torvalds 已提交
3 4 5
 * Licensed under the GPL
 */

J
Jeff Dike 已提交
6
#include <linux/mm.h>
7
#include <linux/module.h>
J
Jeff Dike 已提交
8 9 10
#include <linux/sched.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
J
Jeff Dike 已提交
11
#include "as-layout.h"
L
Linus Torvalds 已提交
12 13
#include "mem_user.h"
#include "os.h"
14
#include "skas.h"
L
Linus Torvalds 已提交
15

J
Jeff Dike 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
struct host_vm_change {
	struct host_vm_op {
		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
		union {
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
				int fd;
				__u64 offset;
			} mmap;
			struct {
				unsigned long addr;
				unsigned long len;
			} munmap;
			struct {
				unsigned long addr;
				unsigned long len;
				unsigned int prot;
			} mprotect;
		} u;
	} ops[1];
	int index;
	struct mm_id *id;
	void *data;
	int force;
};

#define INIT_HVC(mm, force) \
	((struct host_vm_change) \
	 { .ops		= { { .type = NONE } },	\
	   .id		= &mm->context.id, \
       	   .data	= NULL, \
	   .index	= 0, \
	   .force	= force })

static int do_ops(struct host_vm_change *hvc, int end,
		  int finished)
{
	struct host_vm_op *op;
	int i, ret = 0;

	for (i = 0; i < end && !ret; i++) {
		op = &hvc->ops[i];
J
Jeff Dike 已提交
60
		switch (op->type) {
J
Jeff Dike 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
		case MMAP:
			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
				  op->u.mmap.prot, op->u.mmap.fd,
				  op->u.mmap.offset, finished, &hvc->data);
			break;
		case MUNMAP:
			ret = unmap(hvc->id, op->u.munmap.addr,
				    op->u.munmap.len, finished, &hvc->data);
			break;
		case MPROTECT:
			ret = protect(hvc->id, op->u.mprotect.addr,
				      op->u.mprotect.len, op->u.mprotect.prot,
				      finished, &hvc->data);
			break;
		default:
			printk(KERN_ERR "Unknown op type %d in do_ops\n",
			       op->type);
			break;
		}
	}

	return ret;
}

J
Jeff Dike 已提交
85
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
J
Jeff Dike 已提交
86
		    unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
87
{
J
Jeff Dike 已提交
88
	__u64 offset;
J
Jeff Dike 已提交
89
	struct host_vm_op *last;
90
	int fd, ret = 0;
J
Jeff Dike 已提交
91 92

	fd = phys_mapping(phys, &offset);
J
Jeff Dike 已提交
93 94
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
95
		if ((last->type == MMAP) &&
J
Jeff Dike 已提交
96
		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
J
Jeff Dike 已提交
97
		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
J
Jeff Dike 已提交
98
		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
J
Jeff Dike 已提交
99
			last->u.mmap.len += len;
100
			return 0;
J
Jeff Dike 已提交
101 102 103
		}
	}

J
Jeff Dike 已提交
104 105 106
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
107 108
	}

J
Jeff Dike 已提交
109 110 111 112 113 114 115
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MMAP,
				    .u = { .mmap = { .addr	= virt,
						     .len	= len,
						     .prot	= prot,
						     .fd	= fd,
						     .offset	= offset }
116 117
			   } });
	return ret;
J
Jeff Dike 已提交
118 119 120
}

static int add_munmap(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
121
		      struct host_vm_change *hvc)
J
Jeff Dike 已提交
122 123
{
	struct host_vm_op *last;
124
	int ret = 0;
J
Jeff Dike 已提交
125

J
Jeff Dike 已提交
126 127
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
128 129
		if ((last->type == MUNMAP) &&
		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
J
Jeff Dike 已提交
130
			last->u.munmap.len += len;
131
			return 0;
J
Jeff Dike 已提交
132 133 134
		}
	}

J
Jeff Dike 已提交
135 136 137
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
138 139
	}

J
Jeff Dike 已提交
140 141 142 143
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MUNMAP,
			     	    .u = { .munmap = { .addr	= addr,
						       .len	= len } } });
144
	return ret;
J
Jeff Dike 已提交
145 146
}

J
Jeff Dike 已提交
147
static int add_mprotect(unsigned long addr, unsigned long len,
J
Jeff Dike 已提交
148
			unsigned int prot, struct host_vm_change *hvc)
J
Jeff Dike 已提交
149 150
{
	struct host_vm_op *last;
151
	int ret = 0;
J
Jeff Dike 已提交
152

J
Jeff Dike 已提交
153 154
	if (hvc->index != 0) {
		last = &hvc->ops[hvc->index - 1];
J
Jeff Dike 已提交
155
		if ((last->type == MPROTECT) &&
J
Jeff Dike 已提交
156
		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
J
Jeff Dike 已提交
157
		   (last->u.mprotect.prot == prot)) {
J
Jeff Dike 已提交
158
			last->u.mprotect.len += len;
159
			return 0;
J
Jeff Dike 已提交
160 161 162
		}
	}

J
Jeff Dike 已提交
163 164 165
	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
		hvc->index = 0;
J
Jeff Dike 已提交
166 167
	}

J
Jeff Dike 已提交
168 169 170 171 172
	hvc->ops[hvc->index++] = ((struct host_vm_op)
				  { .type	= MPROTECT,
			     	    .u = { .mprotect = { .addr	= addr,
							 .len	= len,
							 .prot	= prot } } });
173
	return ret;
J
Jeff Dike 已提交
174 175
}

L
Linus Torvalds 已提交
176 177
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))

J
Jeff Dike 已提交
178
static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
J
Jeff Dike 已提交
179 180
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
181 182
{
	pte_t *pte;
J
Jeff Dike 已提交
183
	int r, w, x, prot, ret = 0;
J
Jeff Dike 已提交
184 185 186

	pte = pte_offset_kernel(pmd, addr);
	do {
J
Jeff Dike 已提交
187 188 189
		if ((addr >= STUB_START) && (addr < STUB_END))
			continue;

J
Jeff Dike 已提交
190 191 192 193 194 195
		r = pte_read(*pte);
		w = pte_write(*pte);
		x = pte_exec(*pte);
		if (!pte_young(*pte)) {
			r = 0;
			w = 0;
J
Jeff Dike 已提交
196
		} else if (!pte_dirty(*pte))
J
Jeff Dike 已提交
197
			w = 0;
J
Jeff Dike 已提交
198

J
Jeff Dike 已提交
199 200
		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
			(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
201
		if (hvc->force || pte_newpage(*pte)) {
J
Jeff Dike 已提交
202
			if (pte_present(*pte))
J
Jeff Dike 已提交
203
				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
J
Jeff Dike 已提交
204
					       PAGE_SIZE, prot, hvc);
J
Jeff Dike 已提交
205 206 207
			else
				ret = add_munmap(addr, PAGE_SIZE, hvc);
		} else if (pte_newprot(*pte))
J
Jeff Dike 已提交
208
			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
J
Jeff Dike 已提交
209
		*pte = pte_mkuptodate(*pte);
J
Jeff Dike 已提交
210
	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
J
Jeff Dike 已提交
211 212 213 214
	return ret;
}

static inline int update_pmd_range(pud_t *pud, unsigned long addr,
J
Jeff Dike 已提交
215 216
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
217 218 219 220 221 222 223 224
{
	pmd_t *pmd;
	unsigned long next;
	int ret = 0;

	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
J
Jeff Dike 已提交
225
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
226 227
			if (hvc->force || pmd_newpage(*pmd)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
228 229 230
				pmd_mkuptodate(*pmd);
			}
		}
J
Jeff Dike 已提交
231
		else ret = update_pte_range(pmd, addr, next, hvc);
J
Jeff Dike 已提交
232
	} while (pmd++, addr = next, ((addr < end) && !ret));
J
Jeff Dike 已提交
233 234 235 236
	return ret;
}

static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
J
Jeff Dike 已提交
237 238
				   unsigned long end,
				   struct host_vm_change *hvc)
J
Jeff Dike 已提交
239 240 241 242 243 244 245 246
{
	pud_t *pud;
	unsigned long next;
	int ret = 0;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
J
Jeff Dike 已提交
247
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
248 249
			if (hvc->force || pud_newpage(*pud)) {
				ret = add_munmap(addr, next - addr, hvc);
J
Jeff Dike 已提交
250 251 252
				pud_mkuptodate(*pud);
			}
		}
J
Jeff Dike 已提交
253
		else ret = update_pmd_range(pud, addr, next, hvc);
J
Jeff Dike 已提交
254
	} while (pud++, addr = next, ((addr < end) && !ret));
J
Jeff Dike 已提交
255 256 257
	return ret;
}

L
Linus Torvalds 已提交
258
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
J
Jeff Dike 已提交
259
		      unsigned long end_addr, int force)
L
Linus Torvalds 已提交
260
{
J
Jeff Dike 已提交
261
	pgd_t *pgd;
J
Jeff Dike 已提交
262
	struct host_vm_change hvc;
J
Jeff Dike 已提交
263
	unsigned long addr = start_addr, next;
J
Jeff Dike 已提交
264
	int ret = 0;
L
Linus Torvalds 已提交
265

J
Jeff Dike 已提交
266
	hvc = INIT_HVC(mm, force);
J
Jeff Dike 已提交
267 268 269
	pgd = pgd_offset(mm, addr);
	do {
		next = pgd_addr_end(addr, end_addr);
J
Jeff Dike 已提交
270 271
		if (!pgd_present(*pgd)) {
			if (force || pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
272
				ret = add_munmap(addr, next - addr, &hvc);
J
Jeff Dike 已提交
273
				pgd_mkuptodate(*pgd);
J
Jeff Dike 已提交
274 275
			}
		}
J
Jeff Dike 已提交
276
		else ret = update_pud_range(pgd, addr, next, &hvc);
J
Jeff Dike 已提交
277
	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
J
Jeff Dike 已提交
278

J
Jeff Dike 已提交
279
	if (!ret)
J
Jeff Dike 已提交
280
		ret = do_ops(&hvc, hvc.index, 1);
281

J
Jeff Dike 已提交
282
	/* This is not an else because ret is modified above */
J
Jeff Dike 已提交
283 284 285
	if (ret) {
		printk(KERN_ERR "fix_range_common: failed, killing current "
		       "process\n");
286 287
		force_sig(SIGKILL, current);
	}
L
Linus Torvalds 已提交
288 289
}

290
static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
L
Linus Torvalds 已提交
291
{
J
Jeff Dike 已提交
292 293 294 295 296 297 298 299 300
	struct mm_struct *mm;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned long addr, last;
	int updated = 0, err;

	mm = &init_mm;
J
Jeff Dike 已提交
301
	for (addr = start; addr < end;) {
J
Jeff Dike 已提交
302
		pgd = pgd_offset(mm, addr);
J
Jeff Dike 已提交
303
		if (!pgd_present(*pgd)) {
J
Jeff Dike 已提交
304
			last = ADD_ROUND(addr, PGDIR_SIZE);
J
Jeff Dike 已提交
305
			if (last > end)
J
Jeff Dike 已提交
306
				last = end;
J
Jeff Dike 已提交
307
			if (pgd_newpage(*pgd)) {
J
Jeff Dike 已提交
308 309 310
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
311
				if (err < 0)
J
Jeff Dike 已提交
312 313 314 315 316 317 318 319
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pud = pud_offset(pgd, addr);
J
Jeff Dike 已提交
320
		if (!pud_present(*pud)) {
J
Jeff Dike 已提交
321
			last = ADD_ROUND(addr, PUD_SIZE);
J
Jeff Dike 已提交
322
			if (last > end)
J
Jeff Dike 已提交
323
				last = end;
J
Jeff Dike 已提交
324
			if (pud_newpage(*pud)) {
J
Jeff Dike 已提交
325 326 327
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
328
				if (err < 0)
J
Jeff Dike 已提交
329 330 331 332 333 334 335 336
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pmd = pmd_offset(pud, addr);
J
Jeff Dike 已提交
337
		if (!pmd_present(*pmd)) {
J
Jeff Dike 已提交
338
			last = ADD_ROUND(addr, PMD_SIZE);
J
Jeff Dike 已提交
339
			if (last > end)
J
Jeff Dike 已提交
340
				last = end;
J
Jeff Dike 已提交
341
			if (pmd_newpage(*pmd)) {
J
Jeff Dike 已提交
342 343 344
				updated = 1;
				err = os_unmap_memory((void *) addr,
						      last - addr);
J
Jeff Dike 已提交
345
				if (err < 0)
J
Jeff Dike 已提交
346 347 348 349 350 351 352 353
					panic("munmap failed, errno = %d\n",
					      -err);
			}
			addr = last;
			continue;
		}

		pte = pte_offset_kernel(pmd, addr);
J
Jeff Dike 已提交
354
		if (!pte_present(*pte) || pte_newpage(*pte)) {
J
Jeff Dike 已提交
355 356 357
			updated = 1;
			err = os_unmap_memory((void *) addr,
					      PAGE_SIZE);
J
Jeff Dike 已提交
358
			if (err < 0)
J
Jeff Dike 已提交
359 360
				panic("munmap failed, errno = %d\n",
				      -err);
J
Jeff Dike 已提交
361
			if (pte_present(*pte))
J
Jeff Dike 已提交
362 363 364 365
				map_memory(addr,
					   pte_val(*pte) & PAGE_MASK,
					   PAGE_SIZE, 1, 1, 1);
		}
J
Jeff Dike 已提交
366
		else if (pte_newprot(*pte)) {
J
Jeff Dike 已提交
367 368 369 370 371
			updated = 1;
			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
		}
		addr += PAGE_SIZE;
	}
J
Jeff Dike 已提交
372
	return updated;
L
Linus Torvalds 已提交
373 374
}

375 376 377 378 379 380 381 382 383 384 385 386 387
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	struct mm_struct *mm = vma->vm_mm;
	void *flush = NULL;
	int r, w, x, prot, err = 0;
	struct mm_id *mm_id;

	address &= PAGE_MASK;
	pgd = pgd_offset(mm, address);
J
Jeff Dike 已提交
388
	if (!pgd_present(*pgd))
389 390 391
		goto kill;

	pud = pud_offset(pgd, address);
J
Jeff Dike 已提交
392
	if (!pud_present(*pud))
393 394 395
		goto kill;

	pmd = pmd_offset(pud, address);
J
Jeff Dike 已提交
396
	if (!pmd_present(*pmd))
397 398 399 400 401 402 403 404 405 406 407 408 409 410
		goto kill;

	pte = pte_offset_kernel(pmd, address);

	r = pte_read(*pte);
	w = pte_write(*pte);
	x = pte_exec(*pte);
	if (!pte_young(*pte)) {
		r = 0;
		w = 0;
	} else if (!pte_dirty(*pte)) {
		w = 0;
	}

411
	mm_id = &mm->context.id;
412 413
	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
		(x ? UM_PROT_EXEC : 0));
J
Jeff Dike 已提交
414 415
	if (pte_newpage(*pte)) {
		if (pte_present(*pte)) {
416 417 418 419 420 421 422 423 424
			unsigned long long offset;
			int fd;

			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
				  1, &flush);
		}
		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
	}
J
Jeff Dike 已提交
425
	else if (pte_newprot(*pte))
426 427
		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);

J
Jeff Dike 已提交
428
	if (err)
429 430 431 432 433 434 435
		goto kill;

	*pte = pte_mkuptodate(*pte);

	return;

kill:
J
Jeff Dike 已提交
436
	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
437 438 439
	force_sig(SIGKILL, current);
}

L
Linus Torvalds 已提交
440 441
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
{
J
Jeff Dike 已提交
442
	return pgd_offset(mm, address);
L
Linus Torvalds 已提交
443 444 445 446
}

pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
{
J
Jeff Dike 已提交
447
	return pud_offset(pgd, address);
L
Linus Torvalds 已提交
448 449 450 451
}

pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
{
J
Jeff Dike 已提交
452
	return pmd_offset(pud, address);
L
Linus Torvalds 已提交
453 454 455 456
}

pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
{
J
Jeff Dike 已提交
457
	return pte_offset_kernel(pmd, address);
L
Linus Torvalds 已提交
458 459 460 461
}

pte_t *addr_pte(struct task_struct *task, unsigned long addr)
{
J
Jeff Dike 已提交
462 463 464
	pgd_t *pgd = pgd_offset(task->mm, addr);
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);
L
Linus Torvalds 已提交
465

J
Jeff Dike 已提交
466
	return pte_offset_map(pmd, addr);
L
Linus Torvalds 已提交
467 468
}

469 470
void flush_tlb_all(void)
{
J
Jeff Dike 已提交
471
	flush_tlb_mm(current->mm);
472 473 474 475
}

void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
J
Jeff Dike 已提交
476
	flush_tlb_kernel_range_common(start, end);
477 478 479 480
}

void flush_tlb_kernel_vm(void)
{
J
Jeff Dike 已提交
481
	flush_tlb_kernel_range_common(start_vm, end_vm);
482 483 484 485
}

void __flush_tlb_one(unsigned long addr)
{
J
Jeff Dike 已提交
486
	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
487 488 489 490 491
}

static void fix_range(struct mm_struct *mm, unsigned long start_addr,
		      unsigned long end_addr, int force)
{
J
Jeff Dike 已提交
492
	fix_range_common(mm, start_addr, end_addr, force);
493 494 495 496 497
}

void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
		     unsigned long end)
{
J
Jeff Dike 已提交
498 499 500
	if (vma->vm_mm == NULL)
		flush_tlb_kernel_range_common(start, end);
	else fix_range(vma->vm_mm, start, end, 0);
501
}
502
EXPORT_SYMBOL(flush_tlb_range);
503

J
Jeff Dike 已提交
504 505
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
			unsigned long end)
506
{
J
Jeff Dike 已提交
507 508 509 510 511 512
	/*
	 * Don't bother flushing if this address space is about to be
	 * destroyed.
	 */
	if (atomic_read(&mm->mm_users) == 0)
		return;
513

J
Jeff Dike 已提交
514 515 516 517 518 519 520 521 522 523 524
	fix_range(mm, start, end, 0);
}

void flush_tlb_mm(struct mm_struct *mm)
{
	struct vm_area_struct *vma = mm->mmap;

	while (vma != NULL) {
		fix_range(mm, vma->vm_start, vma->vm_end, 0);
		vma = vma->vm_next;
	}
525 526 527 528
}

void force_flush_all(void)
{
529 530 531
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma = mm->mmap;

J
Jeff Dike 已提交
532
	while (vma != NULL) {
533 534 535
		fix_range(mm, vma->vm_start, vma->vm_end, 1);
		vma = vma->vm_next;
	}
536
}