pagewalk.c 5.8 KB
Newer Older
1 2 3
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/sched.h>
4
#include <linux/hugetlb.h>
5 6

static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
D
Dave Hansen 已提交
7
			  struct mm_walk *walk)
8 9 10 11 12
{
	pte_t *pte;
	int err = 0;

	pte = pte_offset_map(pmd, addr);
13
	for (;;) {
D
Dave Hansen 已提交
14
		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
15 16
		if (err)
		       break;
17 18 19 20 21
		addr += PAGE_SIZE;
		if (addr == end)
			break;
		pte++;
	}
22 23 24 25 26 27

	pte_unmap(pte);
	return err;
}

static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
D
Dave Hansen 已提交
28
			  struct mm_walk *walk)
29 30 31 32 33 34 35
{
	pmd_t *pmd;
	unsigned long next;
	int err = 0;

	pmd = pmd_offset(pud, addr);
	do {
36
again:
37
		next = pmd_addr_end(addr, end);
38
		if (pmd_none(*pmd)) {
39
			if (walk->pte_hole)
D
Dave Hansen 已提交
40
				err = walk->pte_hole(addr, next, walk);
41 42 43 44
			if (err)
				break;
			continue;
		}
45 46 47 48
		/*
		 * This implies that each ->pmd_entry() handler
		 * needs to know about pmd_trans_huge() pmds
		 */
49
		if (walk->pmd_entry)
D
Dave Hansen 已提交
50
			err = walk->pmd_entry(pmd, addr, next, walk);
51 52 53 54 55 56 57 58 59 60 61
		if (err)
			break;

		/*
		 * Check this here so we only break down trans_huge
		 * pages when we _need_ to
		 */
		if (!walk->pte_entry)
			continue;

		split_huge_page_pmd(walk->mm, pmd);
62
		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
63 64
			goto again;
		err = walk_pte_range(pmd, addr, next, walk);
65 66 67 68 69 70 71 72
		if (err)
			break;
	} while (pmd++, addr = next, addr != end);

	return err;
}

static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
D
Dave Hansen 已提交
73
			  struct mm_walk *walk)
74 75 76 77 78 79 80 81 82 83
{
	pud_t *pud;
	unsigned long next;
	int err = 0;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
		if (pud_none_or_clear_bad(pud)) {
			if (walk->pte_hole)
D
Dave Hansen 已提交
84
				err = walk->pte_hole(addr, next, walk);
85 86 87 88 89
			if (err)
				break;
			continue;
		}
		if (walk->pud_entry)
D
Dave Hansen 已提交
90
			err = walk->pud_entry(pud, addr, next, walk);
91
		if (!err && (walk->pmd_entry || walk->pte_entry))
D
Dave Hansen 已提交
92
			err = walk_pmd_range(pud, addr, next, walk);
93 94 95 96 97 98 99
		if (err)
			break;
	} while (pud++, addr = next, addr != end);

	return err;
}

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
#ifdef CONFIG_HUGETLB_PAGE
static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
				       unsigned long end)
{
	unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
	return boundary < end ? boundary : end;
}

static int walk_hugetlb_range(struct vm_area_struct *vma,
			      unsigned long addr, unsigned long end,
			      struct mm_walk *walk)
{
	struct hstate *h = hstate_vma(vma);
	unsigned long next;
	unsigned long hmask = huge_page_mask(h);
	pte_t *pte;
	int err = 0;

	do {
		next = hugetlb_entry_end(h, addr, end);
		pte = huge_pte_offset(walk->mm, addr & hmask);
		if (pte && walk->hugetlb_entry)
			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
		if (err)
			return err;
	} while (addr = next, addr != end);

	return 0;
}
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
{
	struct vm_area_struct *vma;

	/* We don't need vma lookup at all. */
	if (!walk->hugetlb_entry)
		return NULL;

	VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
	vma = find_vma(walk->mm, addr);
	if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
		return vma;

	return NULL;
}

#else /* CONFIG_HUGETLB_PAGE */
static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
{
	return NULL;
}

static int walk_hugetlb_range(struct vm_area_struct *vma,
			      unsigned long addr, unsigned long end,
			      struct mm_walk *walk)
{
	return 0;
}

#endif /* CONFIG_HUGETLB_PAGE */


162

163 164
/**
 * walk_page_range - walk a memory map's page tables with a callback
165 166 167 168
 * @mm: memory map to walk
 * @addr: starting address
 * @end: ending address
 * @walk: set of callbacks to invoke for each level of the tree
169 170 171 172 173 174
 *
 * Recursively walk the page table for the memory area in a VMA,
 * calling supplied callbacks. Callbacks are called in-order (first
 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
 *
D
Dave Hansen 已提交
175 176 177
 * Each callback receives an entry pointer and the start and end of the
 * associated range, and a copy of the original mm_walk for access to
 * the ->private or ->mm fields.
178
 *
179 180
 * Usually no locks are taken, but splitting transparent huge page may
 * take page table lock. And the bottom level iterator will map PTE
181 182 183 184
 * directories from highmem if necessary.
 *
 * If any callback returns a non-zero value, the walk is aborted and
 * the return value is propagated back to the caller. Otherwise 0 is returned.
185 186 187
 *
 * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
 * is !NULL.
188
 */
D
Dave Hansen 已提交
189 190
int walk_page_range(unsigned long addr, unsigned long end,
		    struct mm_walk *walk)
191 192 193 194 195 196 197 198
{
	pgd_t *pgd;
	unsigned long next;
	int err = 0;

	if (addr >= end)
		return err;

D
Dave Hansen 已提交
199 200 201 202
	if (!walk->mm)
		return -EINVAL;

	pgd = pgd_offset(walk->mm, addr);
203
	do {
204
		struct vm_area_struct *vma;
205

206
		next = pgd_addr_end(addr, end);
207

208 209 210 211 212
		/*
		 * handle hugetlb vma individually because pagetable walk for
		 * the hugetlb page is dependent on the architecture and
		 * we can't handled it in the same manner as non-huge pages.
		 */
213 214
		vma = hugetlb_vma(addr, walk);
		if (vma) {
215 216
			if (vma->vm_end < next)
				next = vma->vm_end;
217 218 219 220 221
			/*
			 * Hugepage is very tightly coupled with vma, so
			 * walk through hugetlb entries within a given vma.
			 */
			err = walk_hugetlb_range(vma, addr, next, walk);
222 223
			if (err)
				break;
224
			pgd = pgd_offset(walk->mm, next);
225 226
			continue;
		}
227

228 229
		if (pgd_none_or_clear_bad(pgd)) {
			if (walk->pte_hole)
D
Dave Hansen 已提交
230
				err = walk->pte_hole(addr, next, walk);
231 232
			if (err)
				break;
233
			pgd++;
234 235 236
			continue;
		}
		if (walk->pgd_entry)
D
Dave Hansen 已提交
237
			err = walk->pgd_entry(pgd, addr, next, walk);
238 239
		if (!err &&
		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
D
Dave Hansen 已提交
240
			err = walk_pud_range(pgd, addr, next, walk);
241 242
		if (err)
			break;
243 244
		pgd++;
	} while (addr = next, addr != end);
245 246 247

	return err;
}