提交 13c76ad8 编写于 作者: L Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Enable full ASLR randomization for 32-bit programs (Hector
     Marco-Gisbert)

   - Add initial minimal INVPCI support, to flush global mappings (Andy
     Lutomirski)

   - Add KASAN enhancements (Andrey Ryabinin)

   - Fix mmiotrace for huge pages (Karol Herbst)

   - ... misc cleanups and small enhancements"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/32: Enable full randomization on i386 and X86_32
  x86/mm/kmmio: Fix mmiotrace for hugepages
  x86/mm: Avoid premature success when changing page attributes
  x86/mm/ptdump: Remove paravirt_enabled()
  x86/mm: Fix INVPCID asm constraint
  x86/dmi: Switch dmi_remap() from ioremap() [uncached] to ioremap_cache()
  x86/mm: If INVPCID is available, use it to flush global mappings
  x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID
  x86/mm: Add INVPCID helpers
  x86/kasan: Write protect kasan zero shadow
  x86/kasan: Clear kasan_zero_page after TLB flush
  x86/mm/numa: Check for failures in numa_clear_kernel_node_hotplug()
  x86/mm/numa: Clean up numa_clear_kernel_node_hotplug()
  x86/mm: Make kmap_prot into a #define
  x86/mm/32: Set NX in __supported_pte_mask before enabling paging
  x86/mm: Streamline and restore probe_memory_block_size()
...@@ -2566,6 +2566,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -2566,6 +2566,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nointroute [IA-64] nointroute [IA-64]
noinvpcid [X86] Disable the INVPCID cpu feature.
nojitter [IA-64] Disables jitter checking for ITC timers. nojitter [IA-64] Disables jitter checking for ITC timers.
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
......
...@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) ...@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
/* Use early IO mappings for DMI because it's initialized early */ /* Use early IO mappings for DMI because it's initialized early */
#define dmi_early_remap early_ioremap #define dmi_early_remap early_ioremap
#define dmi_early_unmap early_iounmap #define dmi_early_unmap early_iounmap
#define dmi_remap ioremap #define dmi_remap ioremap_cache
#define dmi_unmap iounmap #define dmi_unmap iounmap
#endif /* _ASM_X86_DMI_H */ #endif /* _ASM_X86_DMI_H */
...@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); ...@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set; extern int fixmaps_set;
extern pte_t *kmap_pte; extern pte_t *kmap_pte;
extern pgprot_t kmap_prot; #define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table; extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
......
...@@ -8,6 +8,54 @@ ...@@ -8,6 +8,54 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/special_insns.h> #include <asm/special_insns.h>
static inline void __invpcid(unsigned long pcid, unsigned long addr,
unsigned long type)
{
struct { u64 d[2]; } desc = { { pcid, addr } };
/*
* The memory clobber is because the whole point is to invalidate
* stale TLB entries and, especially if we're flushing global
* mappings, we don't want the compiler to reorder any subsequent
* memory accesses before the TLB flush.
*
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
* invpcid (%rcx), %rax in long mode.
*/
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
}
#define INVPCID_TYPE_INDIV_ADDR 0
#define INVPCID_TYPE_SINGLE_CTXT 1
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
/* Flush all mappings for a given pcid and addr, not including globals. */
static inline void invpcid_flush_one(unsigned long pcid,
unsigned long addr)
{
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
}
/* Flush all mappings for a given PCID, not including globals. */
static inline void invpcid_flush_single_context(unsigned long pcid)
{
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
}
/* Flush all mappings, including globals, for all PCIDs. */
static inline void invpcid_flush_all(void)
{
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
}
/* Flush all mappings for all PCIDs except globals. */
static inline void invpcid_flush_all_nonglobals(void)
{
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
}
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h> #include <asm/paravirt.h>
#else #else
...@@ -105,6 +153,15 @@ static inline void __native_flush_tlb_global(void) ...@@ -105,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
{ {
unsigned long flags; unsigned long flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
*/
invpcid_flush_all();
return;
}
/* /*
* Read-modify-write to CR4 - protect it from preemption and * Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can * from interrupts. (Use the raw variant because this code can
......
...@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) ...@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s)
} }
__setup("nompx", x86_mpx_setup); __setup("nompx", x86_mpx_setup);
static int __init x86_noinvpcid_setup(char *s)
{
/* noinvpcid doesn't accept parameters */
if (s)
return -EINVAL;
/* do not emit a message if the feature is not present */
if (!boot_cpu_has(X86_FEATURE_INVPCID))
return 0;
setup_clear_cpu_cap(X86_FEATURE_INVPCID);
pr_info("noinvpcid: INVPCID feature disabled\n");
return 0;
}
early_param("noinvpcid", x86_noinvpcid_setup);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
static int cachesize_override = -1; static int cachesize_override = -1;
static int disable_x86_serial_nr = 1; static int disable_x86_serial_nr = 1;
......
...@@ -389,6 +389,12 @@ default_entry: ...@@ -389,6 +389,12 @@ default_entry:
/* Make changes effective */ /* Make changes effective */
wrmsr wrmsr
/*
* And make sure that all the mappings we set up have NX set from
* the beginning.
*/
orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
enable_paging: enable_paging:
/* /*
......
...@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, ...@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a))) #define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif #endif
#ifdef CONFIG_X86_64
static inline bool is_hypervisor_range(int idx) static inline bool is_hypervisor_range(int idx)
{ {
#ifdef CONFIG_X86_64
/* /*
* ffff800000000000 - ffff87ffffffffff is reserved for * ffff800000000000 - ffff87ffffffffff is reserved for
* the hypervisor. * the hypervisor.
*/ */
return paravirt_enabled() && return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
(idx < pgd_index(__PAGE_OFFSET)); (idx < pgd_index(__PAGE_OFFSET));
}
#else #else
static inline bool is_hypervisor_range(int idx) { return false; } return false;
#endif #endif
}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx) bool checkwx)
......
...@@ -388,7 +388,6 @@ kernel_physical_mapping_init(unsigned long start, ...@@ -388,7 +388,6 @@ kernel_physical_mapping_init(unsigned long start,
} }
pte_t *kmap_pte; pte_t *kmap_pte;
pgprot_t kmap_prot;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{ {
...@@ -405,8 +404,6 @@ static void __init kmap_init(void) ...@@ -405,8 +404,6 @@ static void __init kmap_init(void)
*/ */
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart); kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
kmap_prot = PAGE_KERNEL;
} }
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include <asm/numa.h> #include <asm/numa.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/init.h> #include <asm/init.h>
#include <asm/uv/uv.h>
#include <asm/setup.h> #include <asm/setup.h>
#include "mm_internal.h" #include "mm_internal.h"
...@@ -1203,26 +1204,13 @@ int kern_addr_valid(unsigned long addr) ...@@ -1203,26 +1204,13 @@ int kern_addr_valid(unsigned long addr)
static unsigned long probe_memory_block_size(void) static unsigned long probe_memory_block_size(void)
{ {
/* start from 2g */ unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
unsigned long bz = 1UL<<31;
if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { /* if system is UV or has 64GB of RAM or more, use large blocks */
pr_info("Using 2GB memory block size for large-memory system\n"); if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
return 2UL * 1024 * 1024 * 1024; bz = 2UL << 30; /* 2GB */
}
/* less than 64g installed */
if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
return MIN_MEMORY_BLOCK_SIZE;
/* get the tail size */
while (bz > MIN_MEMORY_BLOCK_SIZE) {
if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
break;
bz >>= 1;
}
printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz; return bz;
} }
......
...@@ -120,11 +120,22 @@ void __init kasan_init(void) ...@@ -120,11 +120,22 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END); (void *)KASAN_SHADOW_END);
memset(kasan_zero_page, 0, PAGE_SIZE);
load_cr3(init_level4_pgt); load_cr3(init_level4_pgt);
__flush_tlb_all(); __flush_tlb_all();
init_task.kasan_depth = 0;
/*
* kasan_zero_page has been used as early shadow memory, thus it may
* contain some garbage. Now we can clear and write protect it, since
* after the TLB flush no one should write to it.
*/
memset(kasan_zero_page, 0, PAGE_SIZE);
for (i = 0; i < PTRS_PER_PTE; i++) {
pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
set_pte(&kasan_zero_pte[i], pte);
}
/* Flush TLBs again to be sure that write protection applied. */
__flush_tlb_all();
init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n"); pr_info("KernelAddressSanitizer initialized\n");
} }
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
struct kmmio_fault_page { struct kmmio_fault_page {
struct list_head list; struct list_head list;
struct kmmio_fault_page *release_next; struct kmmio_fault_page *release_next;
unsigned long page; /* location of the fault page */ unsigned long addr; /* the requested address */
pteval_t old_presence; /* page presence prior to arming */ pteval_t old_presence; /* page presence prior to arming */
bool armed; bool armed;
...@@ -70,9 +70,16 @@ unsigned int kmmio_count; ...@@ -70,9 +70,16 @@ unsigned int kmmio_count;
static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
static LIST_HEAD(kmmio_probes); static LIST_HEAD(kmmio_probes);
static struct list_head *kmmio_page_list(unsigned long page) static struct list_head *kmmio_page_list(unsigned long addr)
{ {
return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; unsigned int l;
pte_t *pte = lookup_address(addr, &l);
if (!pte)
return NULL;
addr &= page_level_mask(l);
return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
} }
/* Accessed per-cpu */ /* Accessed per-cpu */
...@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) ...@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
} }
/* You must be holding RCU read lock. */ /* You must be holding RCU read lock. */
static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
{ {
struct list_head *head; struct list_head *head;
struct kmmio_fault_page *f; struct kmmio_fault_page *f;
unsigned int l;
pte_t *pte = lookup_address(addr, &l);
page &= PAGE_MASK; if (!pte)
head = kmmio_page_list(page); return NULL;
addr &= page_level_mask(l);
head = kmmio_page_list(addr);
list_for_each_entry_rcu(f, head, list) { list_for_each_entry_rcu(f, head, list) {
if (f->page == page) if (f->addr == addr)
return f; return f;
} }
return NULL; return NULL;
...@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) ...@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
static int clear_page_presence(struct kmmio_fault_page *f, bool clear) static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
{ {
unsigned int level; unsigned int level;
pte_t *pte = lookup_address(f->page, &level); pte_t *pte = lookup_address(f->addr, &level);
if (!pte) { if (!pte) {
pr_err("no pte for page 0x%08lx\n", f->page); pr_err("no pte for addr 0x%08lx\n", f->addr);
return -1; return -1;
} }
...@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) ...@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
return -1; return -1;
} }
__flush_tlb_one(f->page); __flush_tlb_one(f->addr);
return 0; return 0;
} }
...@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) ...@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret; int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) { if (f->armed) {
pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
f->page, f->count, !!f->old_presence); f->addr, f->count, !!f->old_presence);
} }
ret = clear_page_presence(f, true); ret = clear_page_presence(f, true);
WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
f->page); f->addr);
f->armed = true; f->armed = true;
return ret; return ret;
} }
...@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) ...@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{ {
int ret = clear_page_presence(f, false); int ret = clear_page_presence(f, false);
WARN_ONCE(ret < 0, WARN_ONCE(ret < 0,
KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
f->armed = false; f->armed = false;
} }
...@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ...@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
struct kmmio_context *ctx; struct kmmio_context *ctx;
struct kmmio_fault_page *faultpage; struct kmmio_fault_page *faultpage;
int ret = 0; /* default to fault not handled */ int ret = 0; /* default to fault not handled */
unsigned long page_base = addr;
unsigned int l;
pte_t *pte = lookup_address(addr, &l);
if (!pte)
return -EINVAL;
page_base &= page_level_mask(l);
/* /*
* Preemption is now disabled to prevent process switch during * Preemption is now disabled to prevent process switch during
...@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ...@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
preempt_disable(); preempt_disable();
rcu_read_lock(); rcu_read_lock();
faultpage = get_kmmio_fault_page(addr); faultpage = get_kmmio_fault_page(page_base);
if (!faultpage) { if (!faultpage) {
/* /*
* Either this page fault is not caused by kmmio, or * Either this page fault is not caused by kmmio, or
...@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ...@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx); ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) { if (ctx->active) {
if (addr == ctx->addr) { if (page_base == ctx->addr) {
/* /*
* A second fault on the same page means some other * A second fault on the same page means some other
* condition needs handling by do_page_fault(), the * condition needs handling by do_page_fault(), the
...@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ...@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx->active++; ctx->active++;
ctx->fpage = faultpage; ctx->fpage = faultpage;
ctx->probe = get_kmmio_probe(addr); ctx->probe = get_kmmio_probe(page_base);
ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
ctx->addr = addr; ctx->addr = page_base;
if (ctx->probe && ctx->probe->pre_handler) if (ctx->probe && ctx->probe->pre_handler)
ctx->probe->pre_handler(ctx->probe, regs, addr); ctx->probe->pre_handler(ctx->probe, regs, addr);
...@@ -354,12 +371,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) ...@@ -354,12 +371,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
} }
/* You must be holding kmmio_lock. */ /* You must be holding kmmio_lock. */
static int add_kmmio_fault_page(unsigned long page) static int add_kmmio_fault_page(unsigned long addr)
{ {
struct kmmio_fault_page *f; struct kmmio_fault_page *f;
page &= PAGE_MASK; f = get_kmmio_fault_page(addr);
f = get_kmmio_fault_page(page);
if (f) { if (f) {
if (!f->count) if (!f->count)
arm_kmmio_fault_page(f); arm_kmmio_fault_page(f);
...@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) ...@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
return -1; return -1;
f->count = 1; f->count = 1;
f->page = page; f->addr = addr;
if (arm_kmmio_fault_page(f)) { if (arm_kmmio_fault_page(f)) {
kfree(f); kfree(f);
return -1; return -1;
} }
list_add_rcu(&f->list, kmmio_page_list(f->page)); list_add_rcu(&f->list, kmmio_page_list(f->addr));
return 0; return 0;
} }
/* You must be holding kmmio_lock. */ /* You must be holding kmmio_lock. */
static void release_kmmio_fault_page(unsigned long page, static void release_kmmio_fault_page(unsigned long addr,
struct kmmio_fault_page **release_list) struct kmmio_fault_page **release_list)
{ {
struct kmmio_fault_page *f; struct kmmio_fault_page *f;
page &= PAGE_MASK; f = get_kmmio_fault_page(addr);
f = get_kmmio_fault_page(page);
if (!f) if (!f)
return; return;
...@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) ...@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
int ret = 0; int ret = 0;
unsigned long size = 0; unsigned long size = 0;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags); spin_lock_irqsave(&kmmio_lock, flags);
if (get_kmmio_probe(p->addr)) { if (get_kmmio_probe(p->addr)) {
ret = -EEXIST; ret = -EEXIST;
goto out; goto out;
} }
pte = lookup_address(p->addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
}
kmmio_count++; kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes); list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) { while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size)) if (add_kmmio_fault_page(p->addr + size))
pr_err("Unable to set page fault.\n"); pr_err("Unable to set page fault.\n");
size += PAGE_SIZE; size += page_level_size(l);
} }
out: out:
spin_unlock_irqrestore(&kmmio_lock, flags); spin_unlock_irqrestore(&kmmio_lock, flags);
...@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) ...@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL; struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease; struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
pte = lookup_address(p->addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags); spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) { while (size < size_lim) {
release_kmmio_fault_page(p->addr + size, &release_list); release_kmmio_fault_page(p->addr + size, &release_list);
size += PAGE_SIZE; size += page_level_size(l);
} }
list_del_rcu(&p->list); list_del_rcu(&p->list);
kmmio_count--; kmmio_count--;
......
...@@ -93,18 +93,6 @@ static unsigned long mmap_base(unsigned long rnd) ...@@ -93,18 +93,6 @@ static unsigned long mmap_base(unsigned long rnd)
return PAGE_ALIGN(TASK_SIZE - gap - rnd); return PAGE_ALIGN(TASK_SIZE - gap - rnd);
} }
/*
* Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
* does, but not when emulating X86_32
*/
static unsigned long mmap_legacy_base(unsigned long rnd)
{
if (mmap_is_ia32())
return TASK_UNMAPPED_BASE;
else
return TASK_UNMAPPED_BASE + rnd;
}
/* /*
* This function, called very early during the creation of a new * This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use: * process VM image, sets up which VM layout function to use:
...@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) ...@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (current->flags & PF_RANDOMIZE) if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd(); random_factor = arch_mmap_rnd();
mm->mmap_legacy_base = mmap_legacy_base(random_factor); mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
if (mmap_is_legacy()) { if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base; mm->mmap_base = mm->mmap_legacy_base;
......
...@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) ...@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
return true; return true;
} }
/*
* Mark all currently memblock-reserved physical memory (which covers the
* kernel's own memory ranges) as hot-unswappable.
*/
static void __init numa_clear_kernel_node_hotplug(void) static void __init numa_clear_kernel_node_hotplug(void)
{ {
int i, nid; nodemask_t reserved_nodemask = NODE_MASK_NONE;
nodemask_t numa_kernel_nodes = NODE_MASK_NONE; struct memblock_region *mb_region;
phys_addr_t start, end; int i;
struct memblock_region *r;
/* /*
* We have to do some preprocessing of memblock regions, to
* make them suitable for reservation.
*
* At this time, all memory regions reserved by memblock are * At this time, all memory regions reserved by memblock are
* used by the kernel. Set the nid in memblock.reserved will * used by the kernel, but those regions are not split up
* mark out all the nodes the kernel resides in. * along node boundaries yet, and don't necessarily have their
* node ID set yet either.
*
* So iterate over all memory known to the x86 architecture,
* and use those ranges to set the nid in memblock.reserved.
* This will split up the memblock regions along node
* boundaries and will set the node IDs as well.
*/ */
for (i = 0; i < numa_meminfo.nr_blks; i++) { for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = &numa_meminfo.blk[i]; struct numa_memblk *mb = numa_meminfo.blk + i;
int ret;
memblock_set_node(mb->start, mb->end - mb->start, ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
&memblock.reserved, mb->nid); WARN_ON_ONCE(ret);
} }
/* /*
* Mark all kernel nodes. * Now go over all reserved memblock regions, to construct a
* node mask of all kernel reserved memory areas.
* *
* When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
* may not include all the memblock.reserved memory ranges because * numa_meminfo might not include all memblock.reserved
* trim_snb_memory() reserves specific pages for Sandy Bridge graphics. * memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ]
*/ */
for_each_memblock(reserved, r) for_each_memblock(reserved, mb_region) {
if (r->nid != MAX_NUMNODES) if (mb_region->nid != MAX_NUMNODES)
node_set(r->nid, numa_kernel_nodes); node_set(mb_region->nid, reserved_nodemask);
}
/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ /*
* Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
* belonging to the reserved node mask.
*
* Note that this will include memory regions that reside
* on nodes that contain kernel memory - entire nodes
* become hot-unpluggable:
*/
for (i = 0; i < numa_meminfo.nr_blks; i++) { for (i = 0; i < numa_meminfo.nr_blks; i++) {
nid = numa_meminfo.blk[i].nid; struct numa_memblk *mb = numa_meminfo.blk + i;
if (!node_isset(nid, numa_kernel_nodes))
continue;
start = numa_meminfo.blk[i].start; if (!node_isset(mb->nid, reserved_nodemask))
end = numa_meminfo.blk[i].end; continue;
memblock_clear_hotplug(start, end - start); memblock_clear_hotplug(mb->start, mb->end - mb->start);
} }
} }
......
...@@ -1128,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, ...@@ -1128,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
/* /*
* Ignore all non primary paths. * Ignore all non primary paths.
*/ */
if (!primary) if (!primary) {
cpa->numpages = 1;
return 0; return 0;
}
/* /*
* Ignore the NULL PTE for kernel identity mapping, as it is expected * Ignore the NULL PTE for kernel identity mapping, as it is expected
......
...@@ -32,9 +32,8 @@ early_param("noexec", noexec_setup); ...@@ -32,9 +32,8 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void) void x86_configure_nx(void)
{ {
if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) /* If disable_nx is set, clear NX on all new mappings going forward. */
__supported_pte_mask |= _PAGE_NX; if (disable_nx)
else
__supported_pte_mask &= ~_PAGE_NX; __supported_pte_mask &= ~_PAGE_NX;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册