提交 639ab3eb 编写于 作者: L Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm changes from Ingo Molnar:
 "The main changes are: continued PAT work by Toshi Kani, plus a new
  boot time warning about insecure RWX kernel mappings, by Stephen
  Smalley.

  The new CONFIG_DEBUG_WX=y warning is marked default-y if
  CONFIG_DEBUG_RODATA=y is already eanbled, as a special exception, as
  these bugs are hard to notice and this check already found several
  live bugs"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Warn on W^X mappings
  x86/mm: Fix no-change case in try_preserve_large_page()
  x86/mm: Fix __split_large_page() to handle large PAT bit
  x86/mm: Fix try_preserve_large_page() to handle large PAT bit
  x86/mm: Fix gup_huge_p?d() to handle large PAT bit
  x86/mm: Fix slow_virt_to_phys() to handle large PAT bit
  x86/mm: Fix page table dump to show PAT bit
  x86/asm: Add pud_pgprot() and pmd_pgprot()
  x86/asm: Fix pud/pmd interfaces to handle large PAT bit
  x86/asm: Add pud/pmd mask interfaces to handle large PAT bit
  x86/asm: Move PUD_PAGE macros to page_types.h
  x86/vdso32: Define PGTABLE_LEVELS to 32bit VDSO
......@@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized.
config X86_PTDUMP_CORE
def_bool n
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
select X86_PTDUMP_CORE
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
......@@ -79,7 +83,8 @@ config X86_PTDUMP
config EFI_PGT_DUMP
bool "Dump the EFI pagetable"
depends on EFI && X86_PTDUMP
depends on EFI
select X86_PTDUMP_CORE
---help---
Enable this if you want to dump the EFI page table before
enabling virtual mode. This can be used to debug miscellaneous
......@@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_WX
bool "Warn on W+X mappings at boot"
depends on DEBUG_RODATA
default y
select X86_PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
This is useful for discovering cases where the kernel is leaving
W+X mappings after applying NX, as such mappings are a security risk.
Look for a message in dmesg output like this:
x86/mm: Checked W+X mappings: passed, no W+X pages found.
or like this, if the check failed:
x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
Note that even if the check fails, your kernel is possibly
still fine, as W+X mappings are not a security hole in
themselves, what they do is that they make the exploitation
of other unfixed kernel bugs easier.
There is no runtime or memory usage effect of this option
once the kernel has booted up - it's a one time check.
If in doubt, say "Y".
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
......
......@@ -14,11 +14,13 @@
*/
#undef CONFIG_64BIT
#undef CONFIG_X86_64
#undef CONFIG_PGTABLE_LEVELS
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
#define CONFIG_X86_32 1
#define CONFIG_PGTABLE_LEVELS 2
#define CONFIG_PAGE_OFFSET 0
#define CONFIG_ILLEGAL_POINTER_VALUE 0
#define CONFIG_NR_CPUS 1
......
......@@ -26,9 +26,6 @@
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
/*
* Set __PAGE_OFFSET to the most negative possible address +
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
......
......@@ -20,6 +20,9 @@
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
......
......@@ -19,6 +19,13 @@
#include <asm/x86_init.h>
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_checkwx(void);
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
#else
#define debug_checkwx() do { } while (0)
#endif
/*
* ZERO_PAGE is a global shared page that is always zero: used
......@@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_pfn(pud_t pud)
{
return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
......@@ -379,7 +386,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
return __pgprot(preservebits | addbits);
}
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
#define pte_pgprot(x) __pgprot(pte_flags(x))
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
#define pud_pgprot(x) __pgprot(pud_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
......@@ -502,14 +511,15 @@ static inline int pmd_none(pmd_t pmd)
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
#define pmd_page(pmd) \
pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
......@@ -570,14 +580,15 @@ static inline int pud_present(pud_t pud)
static inline unsigned long pud_page_vaddr(pud_t pud)
{
return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
#define pud_page(pud) \
pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
......
......@@ -209,10 +209,10 @@ enum page_cache_mode {
#include <linux/types.h>
/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
......@@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
}
#endif
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
else
return PTE_PFN_MASK;
}
static inline pudval_t pud_flags_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
else
return ~PTE_PFN_MASK;
}
static inline pudval_t pud_flags(pud_t pud)
{
return native_pud_val(pud) & PTE_FLAGS_MASK;
return native_pud_val(pud) & pud_flags_mask(pud);
}
static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
{
if (native_pmd_val(pmd) & _PAGE_PSE)
return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
else
return PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags_mask(pmd_t pmd)
{
if (native_pmd_val(pmd) & _PAGE_PSE)
return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
else
return ~PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
return native_pmd_val(pmd) & PTE_FLAGS_MASK;
return native_pmd_val(pmd) & pmd_flags_mask(pmd);
}
static inline pte_t native_make_pte(pteval_t val)
......
......@@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
......
......@@ -32,6 +32,8 @@ struct pg_state {
const struct addr_marker *marker;
unsigned long lines;
bool to_dmesg;
bool check_wx;
unsigned long wx_pages;
};
struct addr_marker {
......@@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
if ((level == 4 && pr & _PAGE_PAT) ||
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
pt_dump_cont_printf(m, dmsg, "pat ");
pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_GLOBAL)
......@@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
* we have now. "break" is either changing perms, levels or
* address space marker.
*/
prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
prot = pgprot_val(new_prot);
cur = pgprot_val(st->current_prot);
if (!st->level) {
/* First entry */
......@@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st,
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
pgprotval_t pr = pgprot_val(st->current_prot);
if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
WARN_ONCE(1,
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
(void *)st->start_address,
(void *)st->start_address);
st->wx_pages += (st->current_address -
st->start_address) / PAGE_SIZE;
}
/*
* Now print the actual finished series
......@@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
{
int i;
pte_t *start;
pgprotval_t prot;
start = (pte_t *) pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
pgprot_t prot = pte_pgprot(*start);
prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
note_page(m, st, prot, 4);
note_page(m, st, __pgprot(prot), 4);
start++;
}
}
......@@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
{
int i;
pmd_t *start;
pgprotval_t prot;
start = (pmd_t *) pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
if (pmd_large(*start) || !pmd_present(*start))
if (pmd_large(*start) || !pmd_present(*start)) {
prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 3);
else
} else {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
}
} else
note_page(m, st, __pgprot(0), 3);
start++;
......@@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
{
int i;
pud_t *start;
pgprotval_t prot;
start = (pud_t *) pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
if (pud_large(*start) || !pud_present(*start))
if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 2);
else
} else {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
}
} else
note_page(m, st, __pgprot(0), 2);
......@@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
pgd_t *start = swapper_pg_dir;
#endif
pgprotval_t prot;
int i;
struct pg_state st = {};
......@@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
st.to_dmesg = true;
}
st.check_wx = checkwx;
if (checkwx)
st.wx_pages = 0;
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start)) {
pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
if (pgd_large(*start) || !pgd_present(*start))
if (pgd_large(*start) || !pgd_present(*start)) {
prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
else
} else {
walk_pud_level(m, &st, *start,
i * PGD_LEVEL_MULT);
}
} else
note_page(m, &st, __pgprot(0), 1);
......@@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
note_page(m, &st, __pgprot(0), 0);
if (!checkwx)
return;
if (st.wx_pages)
pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
st.wx_pages);
else
pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
}
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
}
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
}
#ifdef CONFIG_X86_PTDUMP
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
......@@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = {
.llseek = seq_lseek,
.release = single_release,
};
#endif
static int pt_dump_init(void)
{
#ifdef CONFIG_X86_PTDUMP
struct dentry *pe;
#endif
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
......@@ -412,10 +453,12 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
#ifdef CONFIG_X86_PTDUMP
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
#endif
return 0;
}
......
......@@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
pte_t pte = *(pte_t *)&pmd;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
if ((pte_flags(pte) & mask) != mask)
if ((pmd_flags(pmd) & mask) != mask)
return 0;
/* hugepages are never "special" */
VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
refs = 0;
head = pte_page(pte);
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
......@@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
pte_t pte = *(pte_t *)&pud;
struct page *head, *page;
int refs;
mask = _PAGE_PRESENT|_PAGE_USER;
if (write)
mask |= _PAGE_RW;
if ((pte_flags(pte) & mask) != mask)
if ((pud_flags(pud) & mask) != mask)
return 0;
/* hugepages are never "special" */
VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
refs = 0;
head = pte_page(pte);
head = pud_page(pud);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
......
......@@ -957,6 +957,8 @@ void mark_rodata_ro(void)
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
#endif
mark_nxdata_nx();
if (__supported_pte_mask & _PAGE_NX)
debug_checkwx();
}
#endif
......@@ -1150,6 +1150,8 @@ void mark_rodata_ro(void)
free_init_pages("unused kernel",
(unsigned long) __va(__pa_symbol(rodata_end)),
(unsigned long) __va(__pa_symbol(_sdata)));
debug_checkwx();
}
#endif
......
......@@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
phys_addr_t phys_addr;
unsigned long offset;
unsigned long phys_addr, offset;
enum pg_level level;
unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
pmask = page_level_mask(level);
offset = virt_addr & ~pmask;
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
return (phys_addr | offset);
switch (level) {
case PG_LEVEL_1G:
phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PUD_PAGE_MASK;
break;
case PG_LEVEL_2M:
phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PMD_PAGE_MASK;
break;
default:
phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
offset = virt_addr & ~PAGE_MASK;
}
return (phys_addr_t)(phys_addr | offset);
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
......@@ -458,7 +468,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
......@@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
switch (level) {
case PG_LEVEL_2M:
#ifdef CONFIG_X86_64
old_prot = pmd_pgprot(*(pmd_t *)kpte);
old_pfn = pmd_pfn(*(pmd_t *)kpte);
break;
case PG_LEVEL_1G:
#endif
psize = page_level_size(level);
pmask = page_level_mask(level);
old_prot = pud_pgprot(*(pud_t *)kpte);
old_pfn = pud_pfn(*(pud_t *)kpte);
break;
default:
do_split = -EINVAL;
goto out_unlock;
}
psize = page_level_size(level);
pmask = page_level_mask(level);
/*
* Calculate the number of pages, which fit into this large
* page starting at address:
......@@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* up accordingly.
*/
old_pte = *kpte;
old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
req_prot = pgprot_large_2_4k(old_prot);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
......@@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
req_prot = canon_pgprot(req_prot);
/*
* old_pte points to the large page base address. So we need
* old_pfn points to the large page base pfn. So we need
* to add the offset of the virtual address:
*/
pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
cpa->pfn = pfn;
new_prot = static_protections(req_prot, address, pfn);
......@@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* the pages in the range we try to preserve:
*/
addr = address & pmask;
pfn = pte_pfn(old_pte);
pfn = old_pfn;
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
......@@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* The address is aligned and the number of pages
* covers the full page.
*/
new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
new_pte = pfn_pte(old_pfn, new_prot);
__set_pmd_pte(kpte, address, new_pte);
cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
......@@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
struct page *base)
{
pte_t *pbase = (pte_t *)page_address(base);
unsigned long pfn, pfninc = 1;
unsigned long ref_pfn, pfn, pfninc = 1;
unsigned int i, level;
pte_t *tmp;
pgprot_t ref_prot;
......@@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
}
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
/* promote PAT bit to correct position */
if (level == PG_LEVEL_2M)
switch (level) {
case PG_LEVEL_2M:
ref_prot = pmd_pgprot(*(pmd_t *)kpte);
/* clear PSE and promote PAT bit to correct position */
ref_prot = pgprot_large_2_4k(ref_prot);
ref_pfn = pmd_pfn(*(pmd_t *)kpte);
break;
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
case PG_LEVEL_1G:
ref_prot = pud_pgprot(*(pud_t *)kpte);
ref_pfn = pud_pfn(*(pud_t *)kpte);
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
/*
* Set the PSE flags only if the PRESENT flag is set
* Clear the PSE flags if the PRESENT flag is not set
* otherwise pmd_present/pmd_huge will return true
* even on a non present pmd.
*/
if (pgprot_val(ref_prot) & _PAGE_PRESENT)
pgprot_val(ref_prot) |= _PAGE_PSE;
else
if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
pgprot_val(ref_prot) &= ~_PAGE_PSE;
break;
default:
spin_unlock(&pgd_lock);
return 1;
}
#endif
/*
* Set the GLOBAL flags only if the PRESENT flag is set
......@@ -643,7 +664,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
/*
* Get the target pfn from the original entry:
*/
pfn = pte_pfn(*kpte);
pfn = ref_pfn;
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册