pageattr.c 6.6 KB
Newer Older
1 2
/*
 * Copyright 2002 Andi Kleen, SuSE Labs.
L
Linus Torvalds 已提交
3
 * Thanks to Ben LaHaise for precious feedback.
4
 */
L
Linus Torvalds 已提交
5 6
#include <linux/highmem.h>
#include <linux/module.h>
7
#include <linux/sched.h>
L
Linus Torvalds 已提交
8
#include <linux/slab.h>
9 10
#include <linux/mm.h>

I
Ingo Molnar 已提交
11 12 13 14 15 16 17 18
void clflush_cache_range(void *addr, int size)
{
	int i;

	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
		clflush(addr+i);
}

L
Linus Torvalds 已提交
19 20
#include <asm/processor.h>
#include <asm/tlbflush.h>
D
Dave Jones 已提交
21
#include <asm/sections.h>
22 23
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
L
Linus Torvalds 已提交
24

25
pte_t *lookup_address(unsigned long address, int *level)
26
{
L
Linus Torvalds 已提交
27 28 29
	pgd_t *pgd = pgd_offset_k(address);
	pud_t *pud;
	pmd_t *pmd;
30

L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38
	if (pgd_none(*pgd))
		return NULL;
	pud = pud_offset(pgd, address);
	if (pud_none(*pud))
		return NULL;
	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return NULL;
39
	*level = 3;
L
Linus Torvalds 已提交
40 41
	if (pmd_large(*pmd))
		return (pte_t *)pmd;
42
	*level = 4;
L
Linus Torvalds 已提交
43

44 45 46
	return pte_offset_kernel(pmd, address);
}

I
Ingo Molnar 已提交
47
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
48 49 50
{
	/* change init_mm */
	set_pte_atomic(kpte, pte);
51
#ifdef CONFIG_X86_32
52
	if (!SHARED_KERNEL_PMD) {
53 54 55 56 57 58 59 60 61 62 63 64
		struct page *page;

		for (page = pgd_list; page; page = (struct page *)page->index) {
			pgd_t *pgd;
			pud_t *pud;
			pmd_t *pmd;

			pgd = (pgd_t *)page_address(page) + pgd_index(address);
			pud = pud_offset(pgd, address);
			pmd = pmd_offset(pud, address);
			set_pte_atomic((pte_t *)pmd, pte);
		}
L
Linus Torvalds 已提交
65
	}
66
#endif
L
Linus Torvalds 已提交
67 68
}

69
static int split_large_page(pte_t *kpte, unsigned long address)
70
{
71
	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
72
	gfp_t gfp_flags = GFP_KERNEL;
I
Ingo Molnar 已提交
73
	unsigned long flags;
74 75 76
	unsigned long addr;
	pte_t *pbase, *tmp;
	struct page *base;
77
	int i, level;
78

79 80 81 82
#ifdef CONFIG_DEBUG_PAGEALLOC
	gfp_flags = GFP_ATOMIC;
#endif
	base = alloc_pages(gfp_flags, 0);
83 84 85
	if (!base)
		return -ENOMEM;

I
Ingo Molnar 已提交
86
	spin_lock_irqsave(&pgd_lock, flags);
87 88 89 90 91
	/*
	 * Check for races, another CPU might have split this page
	 * up for us already:
	 */
	tmp = lookup_address(address, &level);
I
Ingo Molnar 已提交
92 93
	if (tmp != kpte) {
		WARN_ON_ONCE(1);
94
		goto out_unlock;
I
Ingo Molnar 已提交
95
	}
96 97 98 99

	address = __pa(address);
	addr = address & LARGE_PAGE_MASK;
	pbase = (pte_t *)page_address(base);
100
#ifdef CONFIG_X86_32
101
	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
102
#endif
103 104 105 106 107 108 109

	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));

	/*
	 * Install the new, split up pagetable:
	 */
I
Ingo Molnar 已提交
110
	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
111 112 113
	base = NULL;

out_unlock:
I
Ingo Molnar 已提交
114
	spin_unlock_irqrestore(&pgd_lock, flags);
115 116 117 118 119 120 121

	if (base)
		__free_pages(base, 0);

	return 0;
}

122 123
static int
__change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
124
{
L
Linus Torvalds 已提交
125
	struct page *kpte_page;
126
	int level, err = 0;
127
	pte_t *kpte;
L
Linus Torvalds 已提交
128 129 130

	BUG_ON(PageHighMem(page));

131
repeat:
132
	kpte = lookup_address(address, &level);
L
Linus Torvalds 已提交
133 134
	if (!kpte)
		return -EINVAL;
135

L
Linus Torvalds 已提交
136
	kpte_page = virt_to_page(kpte);
137 138 139
	BUG_ON(PageLRU(kpte_page));
	BUG_ON(PageCompound(kpte_page));

L
Linus Torvalds 已提交
140
	/*
I
Ingo Molnar 已提交
141 142
	 * Better fail early if someone sets the kernel text to NX.
	 * Does not cover __inittext
L
Linus Torvalds 已提交
143
	 */
I
Ingo Molnar 已提交
144 145 146
	BUG_ON(address >= (unsigned long)&_text &&
		address < (unsigned long)&_etext &&
	       (pgprot_val(prot) & _PAGE_NX));
147

148
	if (level == 4) {
149
		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
I
Ingo Molnar 已提交
150
	} else {
151
		err = split_large_page(kpte, address);
152 153
		if (!err)
			goto repeat;
L
Linus Torvalds 已提交
154
	}
155
	return err;
156
}
L
Linus Torvalds 已提交
157

158 159 160 161 162
/**
 * change_page_attr_addr - Change page table attributes in linear mapping
 * @address: Virtual address in linear mapping.
 * @numpages: Number of pages to change
 * @prot:    New page table attribute (PAGE_*)
L
Linus Torvalds 已提交
163
 *
164 165 166
 * Change page attributes of a page in the direct mapping. This is a variant
 * of change_page_attr() that also works on memory holes that do not have
 * mem_map entry (pfn_valid() is false).
167
 *
168
 * See change_page_attr() documentation for more details.
L
Linus Torvalds 已提交
169
 */
170 171

int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
L
Linus Torvalds 已提交
172
{
173 174 175 176 177
	int err = 0, kernel_map = 0, i;

#ifdef CONFIG_X86_64
	if (address >= __START_KERNEL_map &&
			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
L
Linus Torvalds 已提交
178

179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
		address = (unsigned long)__va(__pa(address));
		kernel_map = 1;
	}
#endif

	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
		unsigned long pfn = __pa(address) >> PAGE_SHIFT;

		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
			err = __change_page_attr(address, pfn_to_page(pfn), prot);
			if (err)
				break;
		}
#ifdef CONFIG_X86_64
		/*
		 * Handle kernel mapping too which aliases part of
		 * lowmem:
		 */
		if (__pa(address) < KERNEL_TEXT_SIZE) {
			unsigned long addr2;
			pgprot_t prot2;

			addr2 = __START_KERNEL_map + __pa(address);
			/* Make sure the kernel mappings stay executable */
			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
			err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
		}
#endif
207 208
	}

L
Linus Torvalds 已提交
209 210 211
	return err;
}

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
/**
 * change_page_attr - Change page table attributes in the linear mapping.
 * @page: First page to change
 * @numpages: Number of pages to change
 * @prot: New protection/caching type (PAGE_*)
 *
 * Returns 0 on success, otherwise a negated errno.
 *
 * This should be used when a page is mapped with a different caching policy
 * than write-back somewhere - some CPUs do not like it when mappings with
 * different caching policies exist. This changes the page attributes of the
 * in kernel linear mapping too.
 *
 * Caller must call global_flush_tlb() later to make the changes active.
 *
 * The caller needs to ensure that there are no conflicting mappings elsewhere
 * (e.g. in user space) * This function only deals with the kernel linear map.
 *
 * For MMIO areas without mem_map use change_page_attr_addr() instead.
 */
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
233
{
234
	unsigned long addr = (unsigned long)page_address(page);
I
Ingo Molnar 已提交
235

236
	return change_page_attr_addr(addr, numpages, prot);
I
Ingo Molnar 已提交
237
}
238
EXPORT_SYMBOL(change_page_attr);
I
Ingo Molnar 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253

static void flush_kernel_map(void *arg)
{
	/*
	 * Flush all to work around Errata in early athlons regarding
	 * large page flushing.
	 */
	__flush_tlb_all();

	if (boot_cpu_data.x86_model >= 4)
		wbinvd();
}

void global_flush_tlb(void)
{
L
Linus Torvalds 已提交
254 255
	BUG_ON(irqs_disabled());

I
Ingo Molnar 已提交
256
	on_each_cpu(flush_kernel_map, NULL, 1, 1);
257
}
258
EXPORT_SYMBOL(global_flush_tlb);
L
Linus Torvalds 已提交
259 260 261 262 263 264

#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
	if (PageHighMem(page))
		return;
265
	if (!enable) {
266 267
		debug_check_no_locks_freed(page_address(page),
					   numpages * PAGE_SIZE);
268
	}
269

270 271 272 273 274 275
	/*
	 * If page allocator is not up yet then do not call c_p_a():
	 */
	if (!debug_pagealloc_enabled)
		return;

276
	/*
277 278
	 * The return value is ignored - the calls cannot fail,
	 * large pages are disabled at boot time:
L
Linus Torvalds 已提交
279 280
	 */
	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
281 282

	/*
283 284
	 * We should perform an IPI and flush all tlbs,
	 * but that can deadlock->flush only current cpu:
L
Linus Torvalds 已提交
285 286 287 288
	 */
	__flush_tlb_all();
}
#endif