pageattr.c 6.5 KB
Newer Older
1 2
/*
 * Copyright 2002 Andi Kleen, SuSE Labs.
L
Linus Torvalds 已提交
3
 * Thanks to Ben LaHaise for precious feedback.
4
 */
L
Linus Torvalds 已提交
5 6 7

#include <linux/highmem.h>
#include <linux/module.h>
8
#include <linux/sched.h>
L
Linus Torvalds 已提交
9
#include <linux/slab.h>
10 11
#include <linux/mm.h>

L
Linus Torvalds 已提交
12 13
#include <asm/processor.h>
#include <asm/tlbflush.h>
D
Dave Jones 已提交
14
#include <asm/sections.h>
15 16
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
L
Linus Torvalds 已提交
17

18
pte_t *lookup_address(unsigned long address, int *level)
19
{
L
Linus Torvalds 已提交
20 21 22
	pgd_t *pgd = pgd_offset_k(address);
	pud_t *pud;
	pmd_t *pmd;
23

L
Linus Torvalds 已提交
24 25 26 27 28 29 30 31
	if (pgd_none(*pgd))
		return NULL;
	pud = pud_offset(pgd, address);
	if (pud_none(*pud))
		return NULL;
	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return NULL;
32
	*level = 3;
L
Linus Torvalds 已提交
33 34
	if (pmd_large(*pmd))
		return (pte_t *)pmd;
35
	*level = 4;
L
Linus Torvalds 已提交
36

37 38 39
	return pte_offset_kernel(pmd, address);
}

I
Ingo Molnar 已提交
40
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
41 42 43
{
	/* change init_mm */
	set_pte_atomic(kpte, pte);
44
#ifdef CONFIG_X86_32
45
	if (SHARED_KERNEL_PMD)
L
Linus Torvalds 已提交
46
		return;
47 48 49 50 51 52 53 54 55 56 57 58 59
	{
		struct page *page;

		for (page = pgd_list; page; page = (struct page *)page->index) {
			pgd_t *pgd;
			pud_t *pud;
			pmd_t *pmd;

			pgd = (pgd_t *)page_address(page) + pgd_index(address);
			pud = pud_offset(pgd, address);
			pmd = pmd_offset(pud, address);
			set_pte_atomic((pte_t *)pmd, pte);
		}
L
Linus Torvalds 已提交
60
	}
61
#endif
L
Linus Torvalds 已提交
62 63
}

64
static int split_large_page(pte_t *kpte, unsigned long address)
65
{
66
	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
67
	gfp_t gfp_flags = GFP_KERNEL;
I
Ingo Molnar 已提交
68
	unsigned long flags;
69 70 71
	unsigned long addr;
	pte_t *pbase, *tmp;
	struct page *base;
72
	int i, level;
73

74 75 76 77
#ifdef CONFIG_DEBUG_PAGEALLOC
	gfp_flags = GFP_ATOMIC;
#endif
	base = alloc_pages(gfp_flags, 0);
78 79 80
	if (!base)
		return -ENOMEM;

I
Ingo Molnar 已提交
81
	spin_lock_irqsave(&pgd_lock, flags);
82 83 84 85 86
	/*
	 * Check for races, another CPU might have split this page
	 * up for us already:
	 */
	tmp = lookup_address(address, &level);
I
Ingo Molnar 已提交
87 88
	if (tmp != kpte) {
		WARN_ON_ONCE(1);
89
		goto out_unlock;
I
Ingo Molnar 已提交
90
	}
91 92 93 94

	address = __pa(address);
	addr = address & LARGE_PAGE_MASK;
	pbase = (pte_t *)page_address(base);
95
#ifdef CONFIG_X86_32
96
	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
97
#endif
98 99 100 101 102 103 104

	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));

	/*
	 * Install the new, split up pagetable:
	 */
I
Ingo Molnar 已提交
105
	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
106 107 108
	base = NULL;

out_unlock:
I
Ingo Molnar 已提交
109
	spin_unlock_irqrestore(&pgd_lock, flags);
110 111 112 113 114 115 116

	if (base)
		__free_pages(base, 0);

	return 0;
}

117 118
static int
__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
119
{
L
Linus Torvalds 已提交
120
	struct page *kpte_page;
121
	int level, err = 0;
122
	pte_t *kpte;
L
Linus Torvalds 已提交
123 124 125

	BUG_ON(PageHighMem(page));

126
repeat:
127
	kpte = lookup_address(address, &level);
L
Linus Torvalds 已提交
128 129
	if (!kpte)
		return -EINVAL;
130

L
Linus Torvalds 已提交
131
	kpte_page = virt_to_page(kpte);
132 133 134
	BUG_ON(PageLRU(kpte_page));
	BUG_ON(PageCompound(kpte_page));

L
Linus Torvalds 已提交
135
	/*
I
Ingo Molnar 已提交
136 137
	 * Better fail early if someone sets the kernel text to NX.
	 * Does not cover __inittext
L
Linus Torvalds 已提交
138
	 */
I
Ingo Molnar 已提交
139 140 141
	BUG_ON(address >= (unsigned long)&_text &&
		address < (unsigned long)&_etext &&
	       (pgprot_val(prot) & _PAGE_NX));
142

143
	if (level == 4) {
144
		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
I
Ingo Molnar 已提交
145
	} else {
146
		err = split_large_page(kpte, address);
147 148
		if (!err)
			goto repeat;
L
Linus Torvalds 已提交
149
	}
150
	return err;
151
}
L
Linus Torvalds 已提交
152

153 154 155 156 157
/**
 * change_page_attr_addr - Change page table attributes in linear mapping
 * @address: Virtual address in linear mapping.
 * @numpages: Number of pages to change
 * @prot:    New page table attribute (PAGE_*)
L
Linus Torvalds 已提交
158
 *
159 160 161
 * Change page attributes of a page in the direct mapping. This is a variant
 * of change_page_attr() that also works on memory holes that do not have
 * mem_map entry (pfn_valid() is false).
162
 *
163
 * See change_page_attr() documentation for more details.
L
Linus Torvalds 已提交
164
 */
165 166

int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
L
Linus Torvalds 已提交
167
{
168 169 170 171 172
	int err = 0, kernel_map = 0, i;

#ifdef CONFIG_X86_64
	if (address >= __START_KERNEL_map &&
			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
L
Linus Torvalds 已提交
173

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
		address = (unsigned long)__va(__pa(address));
		kernel_map = 1;
	}
#endif

	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
		unsigned long pfn = __pa(address) >> PAGE_SHIFT;

		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
			err = __change_page_attr(address, pfn_to_page(pfn), prot);
			if (err)
				break;
		}
#ifdef CONFIG_X86_64
		/*
		 * Handle kernel mapping too which aliases part of
		 * lowmem:
		 */
		if (__pa(address) < KERNEL_TEXT_SIZE) {
			unsigned long addr2;
			pgprot_t prot2;

			addr2 = __START_KERNEL_map + __pa(address);
			/* Make sure the kernel mappings stay executable */
			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
		}
#endif
202 203
	}

L
Linus Torvalds 已提交
204 205 206
	return err;
}

207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
/**
 * change_page_attr - Change page table attributes in the linear mapping.
 * @page: First page to change
 * @numpages: Number of pages to change
 * @prot: New protection/caching type (PAGE_*)
 *
 * Returns 0 on success, otherwise a negated errno.
 *
 * This should be used when a page is mapped with a different caching policy
 * than write-back somewhere - some CPUs do not like it when mappings with
 * different caching policies exist. This changes the page attributes of the
 * in kernel linear mapping too.
 *
 * Caller must call global_flush_tlb() later to make the changes active.
 *
 * The caller needs to ensure that there are no conflicting mappings elsewhere
 * (e.g. in user space) * This function only deals with the kernel linear map.
 *
 * For MMIO areas without mem_map use change_page_attr_addr() instead.
 */
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
228
{
229
	unsigned long addr = (unsigned long)page_address(page);
I
Ingo Molnar 已提交
230

231
	return change_page_attr_addr(addr, numpages, prot);
I
Ingo Molnar 已提交
232
}
233
EXPORT_SYMBOL(change_page_attr);
I
Ingo Molnar 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

static void flush_kernel_map(void *arg)
{
	/*
	 * Flush all to work around Errata in early athlons regarding
	 * large page flushing.
	 */
	__flush_tlb_all();

	if (boot_cpu_data.x86_model >= 4)
		wbinvd();
}

void global_flush_tlb(void)
{
L
Linus Torvalds 已提交
249 250
	BUG_ON(irqs_disabled());

I
Ingo Molnar 已提交
251
	on_each_cpu(flush_kernel_map, NULL, 1, 1);
252
}
253
EXPORT_SYMBOL(global_flush_tlb);
L
Linus Torvalds 已提交
254 255 256 257 258 259

#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
	if (PageHighMem(page))
		return;
260
	if (!enable) {
261 262
		debug_check_no_locks_freed(page_address(page),
					   numpages * PAGE_SIZE);
263
	}
264

265 266 267 268 269 270
	/*
	 * If page allocator is not up yet then do not call c_p_a():
	 */
	if (!debug_pagealloc_enabled)
		return;

271 272
	/*
	 * the return value is ignored - the calls cannot fail,
L
Linus Torvalds 已提交
273 274 275
	 * large pages are disabled at boot time.
	 */
	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
276 277 278

	/*
	 * we should perform an IPI and flush all tlbs,
L
Linus Torvalds 已提交
279 280 281 282 283
	 * but that can deadlock->flush only current cpu.
	 */
	__flush_tlb_all();
}
#endif