mincore.c 5.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 *	linux/mm/mincore.c
 *
4
 * Copyright (C) 1994-2006  Linus Torvalds
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14
 */

/*
 * The mincore() system call.
 */
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/syscalls.h>
N
Nick Piggin 已提交
15 16
#include <linux/swap.h>
#include <linux/swapops.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26

#include <asm/uaccess.h>
#include <asm/pgtable.h>

/*
 * Later we can get more picky about what "in core" means precisely.
 * For now, simply check to see if the page is in the page cache,
 * and is up to date; i.e. that no page-in operation would be required
 * at this time if an application were to map and access this page.
 */
N
Nick Piggin 已提交
27
static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
L
Linus Torvalds 已提交
28 29
{
	unsigned char present = 0;
N
Nick Piggin 已提交
30
	struct page *page;
L
Linus Torvalds 已提交
31

N
Nick Piggin 已提交
32 33 34 35 36 37 38 39 40 41 42
	/*
	 * When tmpfs swaps out a page from a file, any process mapping that
	 * file will not get a swp_entry_t in its pte, but rather it is like
	 * any other file mapping (ie. marked !present and faulted in with
	 * tmpfs's .nopage). So swapped out tmpfs mappings are tested here.
	 *
	 * However when tmpfs moves the page from pagecache and into swapcache,
	 * it is still in core, but the find_get_page below won't find it.
	 * No big deal, but make a note of it.
	 */
	page = find_get_page(mapping, pgoff);
L
Linus Torvalds 已提交
43 44 45 46 47 48 49 50
	if (page) {
		present = PageUptodate(page);
		page_cache_release(page);
	}

	return present;
}

51 52 53 54 55 56
/*
 * Do a chunk of "sys_mincore()". We've already checked
 * all the arguments, we hold the mmap semaphore: we should
 * just return the amount of info we're asked for.
 */
static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
L
Linus Torvalds 已提交
57
{
N
Nick Piggin 已提交
58 59 60 61 62 63 64 65
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *ptep;
	spinlock_t *ptl;
	unsigned long nr;
	int i;
	pgoff_t pgoff;
66
	struct vm_area_struct *vma = find_vma(current->mm, addr);
L
Linus Torvalds 已提交
67

68
	/*
69 70
	 * find_vma() didn't find anything above us, or we're
	 * in an unmapped hole in the address space: ENOMEM.
71
	 */
72 73
	if (!vma || addr < vma->vm_start)
		return -ENOMEM;
L
Linus Torvalds 已提交
74

75
	/*
N
Nick Piggin 已提交
76 77
	 * Calculate how many pages there are left in the last level of the
	 * PTE array for our address.
78
	 */
N
Nick Piggin 已提交
79
	nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1));
N
Nick Piggin 已提交
80 81 82 83 84 85 86 87 88 89

	/*
	 * Don't overrun this vma
	 */
	nr = min(nr, (vma->vm_end - addr) >> PAGE_SHIFT);

	/*
	 * Don't return more than the caller asked for
	 */
	nr = min(nr, pages);
L
Linus Torvalds 已提交
90

N
Nick Piggin 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104
	pgd = pgd_offset(vma->vm_mm, addr);
	if (pgd_none_or_clear_bad(pgd))
		goto none_mapped;
	pud = pud_offset(pgd, addr);
	if (pud_none_or_clear_bad(pud))
		goto none_mapped;
	pmd = pmd_offset(pud, addr);
	if (pmd_none_or_clear_bad(pmd))
		goto none_mapped;

	ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) {
		unsigned char present;
		pte_t pte = *ptep;
L
Linus Torvalds 已提交
105

N
Nick Piggin 已提交
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
		if (pte_present(pte)) {
			present = 1;

		} else if (pte_none(pte)) {
			if (vma->vm_file) {
				pgoff = linear_page_index(vma, addr);
				present = mincore_page(vma->vm_file->f_mapping,
							pgoff);
			} else
				present = 0;

		} else if (pte_file(pte)) {
			pgoff = pte_to_pgoff(pte);
			present = mincore_page(vma->vm_file->f_mapping, pgoff);

		} else { /* pte is a swap entry */
			swp_entry_t entry = pte_to_swp_entry(pte);
			if (is_migration_entry(entry)) {
				/* migration entries are always uptodate */
				present = 1;
			} else {
N
Nick Piggin 已提交
127
#ifdef CONFIG_SWAP
N
Nick Piggin 已提交
128 129
				pgoff = entry.val;
				present = mincore_page(&swapper_space, pgoff);
N
Nick Piggin 已提交
130 131 132 133
#else
				WARN_ON(1);
				present = 1;
#endif
N
Nick Piggin 已提交
134 135
			}
		}
136 137

		vec[i] = present;
N
Nick Piggin 已提交
138 139 140 141 142 143 144 145 146 147
	}
	pte_unmap_unlock(ptep-1, ptl);

	return nr;

none_mapped:
	if (vma->vm_file) {
		pgoff = linear_page_index(vma, addr);
		for (i = 0; i < nr; i++, pgoff++)
			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
148 149 150
	} else {
		for (i = 0; i < nr; i++)
			vec[i] = 0;
N
Nick Piggin 已提交
151
	}
L
Linus Torvalds 已提交
152

153
	return nr;
L
Linus Torvalds 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
}

/*
 * The mincore(2) system call.
 *
 * mincore() returns the memory residency status of the pages in the
 * current process's address space specified by [addr, addr + len).
 * The status is returned in a vector of bytes.  The least significant
 * bit of each byte is 1 if the referenced page is in memory, otherwise
 * it is zero.
 *
 * Because the status of a page can change after mincore() checks it
 * but before it returns to the application, the returned vector may
 * contain stale information.  Only locked pages are guaranteed to
 * remain in memory.
 *
 * return values:
 *  zero    - success
 *  -EFAULT - vec points to an illegal address
 *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
 *  -ENOMEM - Addresses in the range [addr, addr + len] are
 *		invalid for the address space of this process, or
 *		specify one or more pages which are not currently
 *		mapped
 *  -EAGAIN - A kernel resource was temporarily unavailable.
 */
asmlinkage long sys_mincore(unsigned long start, size_t len,
	unsigned char __user * vec)
{
183 184 185
	long retval;
	unsigned long pages;
	unsigned char *tmp;
L
Linus Torvalds 已提交
186

187 188 189
	/* Check the start address: needs to be page-aligned.. */
 	if (start & ~PAGE_CACHE_MASK)
		return -EINVAL;
L
Linus Torvalds 已提交
190

191 192 193
	/* ..and we need to be passed a valid user-space range */
	if (!access_ok(VERIFY_READ, (void __user *) start, len))
		return -ENOMEM;
L
Linus Torvalds 已提交
194

195 196 197
	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
	pages = len >> PAGE_SHIFT;
	pages += (len & ~PAGE_MASK) != 0;
L
Linus Torvalds 已提交
198

199 200
	if (!access_ok(VERIFY_WRITE, vec, pages))
		return -EFAULT;
L
Linus Torvalds 已提交
201

202 203
	tmp = (void *) __get_free_page(GFP_USER);
	if (!tmp)
204
		return -EAGAIN;
205 206 207 208 209 210 211 212

	retval = 0;
	while (pages) {
		/*
		 * Do at most PAGE_SIZE entries per iteration, due to
		 * the temporary buffer size.
		 */
		down_read(&current->mm->mmap_sem);
O
Oleg Nesterov 已提交
213
		retval = do_mincore(start, tmp, min(pages, PAGE_SIZE));
214 215 216 217 218 219 220
		up_read(&current->mm->mmap_sem);

		if (retval <= 0)
			break;
		if (copy_to_user(vec, tmp, retval)) {
			retval = -EFAULT;
			break;
L
Linus Torvalds 已提交
221
		}
222 223 224 225
		pages -= retval;
		vec += retval;
		start += retval << PAGE_SHIFT;
		retval = 0;
L
Linus Torvalds 已提交
226
	}
227 228
	free_page((unsigned long) tmp);
	return retval;
L
Linus Torvalds 已提交
229
}