pageattr.c 6.7 KB
Newer Older
1 2
/*
 * Copyright 2002 Andi Kleen, SuSE Labs.
L
Linus Torvalds 已提交
3
 * Thanks to Ben LaHaise for precious feedback.
4
 */
L
Linus Torvalds 已提交
5 6 7

#include <linux/highmem.h>
#include <linux/module.h>
8
#include <linux/sched.h>
L
Linus Torvalds 已提交
9
#include <linux/slab.h>
10 11
#include <linux/mm.h>

I
Ingo Molnar 已提交
12 13 14 15 16 17 18 19
void clflush_cache_range(void *addr, int size)
{
	int i;

	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
		clflush(addr+i);
}

L
Linus Torvalds 已提交
20 21
#include <asm/processor.h>
#include <asm/tlbflush.h>
D
Dave Jones 已提交
22
#include <asm/sections.h>
23 24
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
L
Linus Torvalds 已提交
25

26
pte_t *lookup_address(unsigned long address, int *level)
27
{
L
Linus Torvalds 已提交
28 29 30
	pgd_t *pgd = pgd_offset_k(address);
	pud_t *pud;
	pmd_t *pmd;
31

L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39
	if (pgd_none(*pgd))
		return NULL;
	pud = pud_offset(pgd, address);
	if (pud_none(*pud))
		return NULL;
	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return NULL;
40
	*level = 3;
L
Linus Torvalds 已提交
41 42
	if (pmd_large(*pmd))
		return (pte_t *)pmd;
43
	*level = 4;
L
Linus Torvalds 已提交
44

45 46 47
	return pte_offset_kernel(pmd, address);
}

I
Ingo Molnar 已提交
48
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
49 50 51
{
	/* change init_mm */
	set_pte_atomic(kpte, pte);
52
#ifdef CONFIG_X86_32
53
	if (SHARED_KERNEL_PMD)
L
Linus Torvalds 已提交
54
		return;
55 56 57 58 59 60 61 62 63 64 65 66 67
	{
		struct page *page;

		for (page = pgd_list; page; page = (struct page *)page->index) {
			pgd_t *pgd;
			pud_t *pud;
			pmd_t *pmd;

			pgd = (pgd_t *)page_address(page) + pgd_index(address);
			pud = pud_offset(pgd, address);
			pmd = pmd_offset(pud, address);
			set_pte_atomic((pte_t *)pmd, pte);
		}
L
Linus Torvalds 已提交
68
	}
69
#endif
L
Linus Torvalds 已提交
70 71
}

72
static int split_large_page(pte_t *kpte, unsigned long address)
73
{
74
	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
75
	gfp_t gfp_flags = GFP_KERNEL;
I
Ingo Molnar 已提交
76
	unsigned long flags;
77 78 79
	unsigned long addr;
	pte_t *pbase, *tmp;
	struct page *base;
80
	int i, level;
81

82 83 84 85
#ifdef CONFIG_DEBUG_PAGEALLOC
	gfp_flags = GFP_ATOMIC;
#endif
	base = alloc_pages(gfp_flags, 0);
86 87 88
	if (!base)
		return -ENOMEM;

I
Ingo Molnar 已提交
89
	spin_lock_irqsave(&pgd_lock, flags);
90 91 92 93 94
	/*
	 * Check for races, another CPU might have split this page
	 * up for us already:
	 */
	tmp = lookup_address(address, &level);
I
Ingo Molnar 已提交
95 96
	if (tmp != kpte) {
		WARN_ON_ONCE(1);
97
		goto out_unlock;
I
Ingo Molnar 已提交
98
	}
99 100 101 102

	address = __pa(address);
	addr = address & LARGE_PAGE_MASK;
	pbase = (pte_t *)page_address(base);
103
#ifdef CONFIG_X86_32
104
	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
105
#endif
106 107 108 109 110 111 112

	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));

	/*
	 * Install the new, split up pagetable:
	 */
I
Ingo Molnar 已提交
113
	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
114 115 116
	base = NULL;

out_unlock:
I
Ingo Molnar 已提交
117
	spin_unlock_irqrestore(&pgd_lock, flags);
118 119 120 121 122 123 124

	if (base)
		__free_pages(base, 0);

	return 0;
}

125 126
static int
__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
127
{
L
Linus Torvalds 已提交
128
	struct page *kpte_page;
129
	int level, err = 0;
130
	pte_t *kpte;
L
Linus Torvalds 已提交
131 132 133

	BUG_ON(PageHighMem(page));

134
repeat:
135
	kpte = lookup_address(address, &level);
L
Linus Torvalds 已提交
136 137
	if (!kpte)
		return -EINVAL;
138

L
Linus Torvalds 已提交
139
	kpte_page = virt_to_page(kpte);
140 141 142
	BUG_ON(PageLRU(kpte_page));
	BUG_ON(PageCompound(kpte_page));

L
Linus Torvalds 已提交
143
	/*
I
Ingo Molnar 已提交
144 145
	 * Better fail early if someone sets the kernel text to NX.
	 * Does not cover __inittext
L
Linus Torvalds 已提交
146
	 */
I
Ingo Molnar 已提交
147 148 149
	BUG_ON(address >= (unsigned long)&_text &&
		address < (unsigned long)&_etext &&
	       (pgprot_val(prot) & _PAGE_NX));
150

151
	if (level == 4) {
152
		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
I
Ingo Molnar 已提交
153
	} else {
154
		err = split_large_page(kpte, address);
155 156
		if (!err)
			goto repeat;
L
Linus Torvalds 已提交
157
	}
158
	return err;
159
}
L
Linus Torvalds 已提交
160

161 162 163 164 165
/**
 * change_page_attr_addr - Change page table attributes in linear mapping
 * @address: Virtual address in linear mapping.
 * @numpages: Number of pages to change
 * @prot:    New page table attribute (PAGE_*)
L
Linus Torvalds 已提交
166
 *
167 168 169
 * Change page attributes of a page in the direct mapping. This is a variant
 * of change_page_attr() that also works on memory holes that do not have
 * mem_map entry (pfn_valid() is false).
170
 *
171
 * See change_page_attr() documentation for more details.
L
Linus Torvalds 已提交
172
 */
173 174

int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
L
Linus Torvalds 已提交
175
{
176 177 178 179 180
	int err = 0, kernel_map = 0, i;

#ifdef CONFIG_X86_64
	if (address >= __START_KERNEL_map &&
			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
L
Linus Torvalds 已提交
181

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
		address = (unsigned long)__va(__pa(address));
		kernel_map = 1;
	}
#endif

	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
		unsigned long pfn = __pa(address) >> PAGE_SHIFT;

		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
			err = __change_page_attr(address, pfn_to_page(pfn), prot);
			if (err)
				break;
		}
#ifdef CONFIG_X86_64
		/*
		 * Handle kernel mapping too which aliases part of
		 * lowmem:
		 */
		if (__pa(address) < KERNEL_TEXT_SIZE) {
			unsigned long addr2;
			pgprot_t prot2;

			addr2 = __START_KERNEL_map + __pa(address);
			/* Make sure the kernel mappings stay executable */
			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
		}
#endif
210 211
	}

L
Linus Torvalds 已提交
212 213 214
	return err;
}

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
/**
 * change_page_attr - Change page table attributes in the linear mapping.
 * @page: First page to change
 * @numpages: Number of pages to change
 * @prot: New protection/caching type (PAGE_*)
 *
 * Returns 0 on success, otherwise a negated errno.
 *
 * This should be used when a page is mapped with a different caching policy
 * than write-back somewhere - some CPUs do not like it when mappings with
 * different caching policies exist. This changes the page attributes of the
 * in kernel linear mapping too.
 *
 * Caller must call global_flush_tlb() later to make the changes active.
 *
 * The caller needs to ensure that there are no conflicting mappings elsewhere
 * (e.g. in user space) * This function only deals with the kernel linear map.
 *
 * For MMIO areas without mem_map use change_page_attr_addr() instead.
 */
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
236
{
237
	unsigned long addr = (unsigned long)page_address(page);
I
Ingo Molnar 已提交
238

239
	return change_page_attr_addr(addr, numpages, prot);
I
Ingo Molnar 已提交
240
}
241
EXPORT_SYMBOL(change_page_attr);
I
Ingo Molnar 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

static void flush_kernel_map(void *arg)
{
	/*
	 * Flush all to work around Errata in early athlons regarding
	 * large page flushing.
	 */
	__flush_tlb_all();

	if (boot_cpu_data.x86_model >= 4)
		wbinvd();
}

void global_flush_tlb(void)
{
L
Linus Torvalds 已提交
257 258
	BUG_ON(irqs_disabled());

I
Ingo Molnar 已提交
259
	on_each_cpu(flush_kernel_map, NULL, 1, 1);
260
}
261
EXPORT_SYMBOL(global_flush_tlb);
L
Linus Torvalds 已提交
262 263 264 265 266 267

#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
	if (PageHighMem(page))
		return;
268
	if (!enable) {
269 270
		debug_check_no_locks_freed(page_address(page),
					   numpages * PAGE_SIZE);
271
	}
272

273 274 275 276 277 278
	/*
	 * If page allocator is not up yet then do not call c_p_a():
	 */
	if (!debug_pagealloc_enabled)
		return;

279 280
	/*
	 * the return value is ignored - the calls cannot fail,
L
Linus Torvalds 已提交
281 282 283
	 * large pages are disabled at boot time.
	 */
	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
284 285 286

	/*
	 * we should perform an IPI and flush all tlbs,
L
Linus Torvalds 已提交
287 288 289 290 291
	 * but that can deadlock->flush only current cpu.
	 */
	__flush_tlb_all();
}
#endif