提交 86c65a78 编写于 作者: L Linus Torvalds

Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc:
  vmlinux.lds: fix .data..init_task output section (fix popwerpc boot)
  powerpc: Fix erroneous lmb->memblock conversions
  powerpc/mm: Add some debug output when hash insertion fails
  powerpc/mm: Fix bugs in huge page hashing
  powerpc/mm: Move around testing of _PAGE_PRESENT in hash code
  powerpc/mm: Handle hypervisor pte insert failure in __hash_page_huge
  powerpc/kexec: Fix boundary case for book-e kexec memory limits
...@@ -8,9 +8,9 @@ ...@@ -8,9 +8,9 @@
* On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory
* and therefore we can only deal with memory within this range * and therefore we can only deal with memory within this range
*/ */
#define KEXEC_SOURCE_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) #define KEXEC_SOURCE_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1)
#define KEXEC_DESTINATION_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) #define KEXEC_DESTINATION_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1)
#define KEXEC_CONTROL_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL) #define KEXEC_CONTROL_MEMORY_LIMIT (2 * 1024 * 1024 * 1024UL - 1)
#else #else
......
...@@ -250,7 +250,9 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -250,7 +250,9 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local, int ssize, pte_t *ptep, unsigned long trap, int local, int ssize,
unsigned int shift, unsigned int mmu_psize); unsigned int shift, unsigned int mmu_psize);
extern void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
int ssize, int psize, unsigned long pte);
extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long pstart, unsigned long prot, unsigned long pstart, unsigned long prot,
int psize, int ssize); int psize, int ssize);
......
...@@ -414,7 +414,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) ...@@ -414,7 +414,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
u64 base, size, memblock_size; u64 base, size, memblock_size;
unsigned int is_kexec_kdump = 0, rngs; unsigned int is_kexec_kdump = 0, rngs;
ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l); ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
return 0; return 0;
memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
......
...@@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K) ...@@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K)
std r8,STK_PARM(r8)(r1) std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1) std r9,STK_PARM(r9)(r1)
/* Add _PAGE_PRESENT to access */
ori r4,r4,_PAGE_PRESENT
/* Save non-volatile registers. /* Save non-volatile registers.
* r31 will hold "old PTE" * r31 will hold "old PTE"
* r30 is "new PTE" * r30 is "new PTE"
...@@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K) ...@@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K)
std r8,STK_PARM(r8)(r1) std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1) std r9,STK_PARM(r9)(r1)
/* Add _PAGE_PRESENT to access */
ori r4,r4,_PAGE_PRESENT
/* Save non-volatile registers. /* Save non-volatile registers.
* r31 will hold "old PTE" * r31 will hold "old PTE"
* r30 is "new PTE" * r30 is "new PTE"
...@@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K) ...@@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K)
std r8,STK_PARM(r8)(r1) std r8,STK_PARM(r8)(r1)
std r9,STK_PARM(r9)(r1) std r9,STK_PARM(r9)(r1)
/* Add _PAGE_PRESENT to access */
ori r4,r4,_PAGE_PRESENT
/* Save non-volatile registers. /* Save non-volatile registers.
* r31 will hold "old PTE" * r31 will hold "old PTE"
* r30 is "new PTE" * r30 is "new PTE"
......
...@@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) ...@@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
} }
#endif #endif
void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
int ssize, int psize, unsigned long pte)
{
if (!printk_ratelimit())
return;
pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
ea, access, current->comm);
pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n",
trap, vsid, ssize, psize, pte);
}
/* Result code is: /* Result code is:
* 0 - handled * 0 - handled
* 1 - normal page fault * 1 - normal page fault
...@@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
return 1; return 1;
} }
/* Add _PAGE_PRESENT to the required access perm */
access |= _PAGE_PRESENT;
/* Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
if (access & ~pte_val(*ptep)) {
DBG_LOW(" no access !\n");
return 1;
}
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
if (hugeshift) if (hugeshift)
return __hash_page_huge(ea, access, vsid, ptep, trap, local, return __hash_page_huge(ea, access, vsid, ptep, trap, local,
...@@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
pte_val(*(ptep + PTRS_PER_PTE))); pte_val(*(ptep + PTRS_PER_PTE)));
#endif #endif
/* Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
if (access & ~pte_val(*ptep)) {
DBG_LOW(" no access !\n");
return 1;
}
/* Do actual hashing */ /* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES #ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
...@@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ...@@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
local, ssize, spp); local, ssize, spp);
} }
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize, psize,
pte_val(*ptep));
#ifndef CONFIG_PPC_64K_PAGES #ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
#else #else
...@@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, ...@@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
void *pgdir; void *pgdir;
pte_t *ptep; pte_t *ptep;
unsigned long flags; unsigned long flags;
int local = 0; int rc, ssize, local = 0;
int ssize;
BUG_ON(REGION_ID(ea) != USER_REGION_ID); BUG_ON(REGION_ID(ea) != USER_REGION_ID);
...@@ -1098,12 +1118,19 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, ...@@ -1098,12 +1118,19 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Hash it in */ /* Hash it in */
#ifdef CONFIG_PPC_HAS_HASH_64K #ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K) if (mm->context.user_psize == MMU_PAGE_64K)
__hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else else
#endif /* CONFIG_PPC_HAS_HASH_64K */ #endif /* CONFIG_PPC_HAS_HASH_64K */
__hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
subpage_protection(pgdir, ea)); subpage_protection(pgdir, ea));
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
mm->context.user_psize, pte_val(*ptep));
local_irq_restore(flags); local_irq_restore(flags);
} }
......
...@@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long old_pte, new_pte; unsigned long old_pte, new_pte;
unsigned long va, rflags, pa, sz; unsigned long va, rflags, pa, sz;
long slot; long slot;
int err = 1;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
/* Search the Linux page table for a match with va */ /* Search the Linux page table for a match with va */
va = hpt_va(ea, vsid, ssize); va = hpt_va(ea, vsid, ssize);
/* /* At this point, we have a pte (old_pte) which can be used to build
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
if (unlikely(access & ~pte_val(*ptep)))
goto out;
/*
* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases: * or update an HPTE. There are 2 cases:
* *
* 1. There is a valid (present) pte with no associated HPTE (this is * 1. There is a valid (present) pte with no associated HPTE (this is
...@@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
do { do {
old_pte = pte_val(*ptep); old_pte = pte_val(*ptep);
if (old_pte & _PAGE_BUSY) /* If PTE busy, retry the access */
goto out; if (unlikely(old_pte & _PAGE_BUSY))
return 0;
/* If PTE permissions don't match, take page fault */
if (unlikely(access & ~old_pte))
return 1;
/* Try to lock the PTE, add ACCESSED and DIRTY if it was
* a write access */
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
new_pte |= _PAGE_DIRTY;
} while(old_pte != __cmpxchg_u64((unsigned long *)ptep, } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
old_pte, new_pte)); old_pte, new_pte));
...@@ -121,8 +121,16 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -121,8 +121,16 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} }
} }
if (unlikely(slot == -2)) /*
panic("hash_huge_page: pte_insert failed\n"); * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
*ptep = __pte(old_pte);
hash_failure_debug(ea, access, vsid, trap, ssize,
mmu_psize, old_pte);
return -1;
}
new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
} }
...@@ -131,9 +139,5 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, ...@@ -131,9 +139,5 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
* No need to use ldarx/stdcx here * No need to use ldarx/stdcx here
*/ */
*ptep = __pte(new_pte & ~_PAGE_BUSY); *ptep = __pte(new_pte & ~_PAGE_BUSY);
return 0;
err = 0;
out:
return err;
} }
...@@ -398,15 +398,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) ...@@ -398,15 +398,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
} }
/* /*
* Retreive and validate the ibm,memblock-size property for drconf memory * Retreive and validate the ibm,lmb-size property for drconf memory
* from the device tree. * from the device tree.
*/ */
static u64 of_get_memblock_size(struct device_node *memory) static u64 of_get_lmb_size(struct device_node *memory)
{ {
const u32 *prop; const u32 *prop;
u32 len; u32 len;
prop = of_get_property(memory, "ibm,memblock-size", &len); prop = of_get_property(memory, "ibm,lmb-size", &len);
if (!prop || len < sizeof(unsigned int)) if (!prop || len < sizeof(unsigned int))
return 0; return 0;
...@@ -562,7 +562,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, ...@@ -562,7 +562,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
static inline int __init read_usm_ranges(const u32 **usm) static inline int __init read_usm_ranges(const u32 **usm)
{ {
/* /*
* For each memblock in ibm,dynamic-memory a corresponding * For each lmb in ibm,dynamic-memory a corresponding
* entry in linux,drconf-usable-memory property contains * entry in linux,drconf-usable-memory property contains
* a counter followed by that many (base, size) duple. * a counter followed by that many (base, size) duple.
* read the counter from linux,drconf-usable-memory * read the counter from linux,drconf-usable-memory
...@@ -578,7 +578,7 @@ static void __init parse_drconf_memory(struct device_node *memory) ...@@ -578,7 +578,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
{ {
const u32 *dm, *usm; const u32 *dm, *usm;
unsigned int n, rc, ranges, is_kexec_kdump = 0; unsigned int n, rc, ranges, is_kexec_kdump = 0;
unsigned long memblock_size, base, size, sz; unsigned long lmb_size, base, size, sz;
int nid; int nid;
struct assoc_arrays aa; struct assoc_arrays aa;
...@@ -586,8 +586,8 @@ static void __init parse_drconf_memory(struct device_node *memory) ...@@ -586,8 +586,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
if (!n) if (!n)
return; return;
memblock_size = of_get_memblock_size(memory); lmb_size = of_get_lmb_size(memory);
if (!memblock_size) if (!lmb_size)
return; return;
rc = of_get_assoc_arrays(memory, &aa); rc = of_get_assoc_arrays(memory, &aa);
...@@ -611,7 +611,7 @@ static void __init parse_drconf_memory(struct device_node *memory) ...@@ -611,7 +611,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
continue; continue;
base = drmem.base_addr; base = drmem.base_addr;
size = memblock_size; size = lmb_size;
ranges = 1; ranges = 1;
if (is_kexec_kdump) { if (is_kexec_kdump) {
...@@ -1072,7 +1072,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, ...@@ -1072,7 +1072,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
{ {
const u32 *dm; const u32 *dm;
unsigned int drconf_cell_cnt, rc; unsigned int drconf_cell_cnt, rc;
unsigned long memblock_size; unsigned long lmb_size;
struct assoc_arrays aa; struct assoc_arrays aa;
int nid = -1; int nid = -1;
...@@ -1080,8 +1080,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, ...@@ -1080,8 +1080,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
if (!drconf_cell_cnt) if (!drconf_cell_cnt)
return -1; return -1;
memblock_size = of_get_memblock_size(memory); lmb_size = of_get_lmb_size(memory);
if (!memblock_size) if (!lmb_size)
return -1; return -1;
rc = of_get_assoc_arrays(memory, &aa); rc = of_get_assoc_arrays(memory, &aa);
...@@ -1100,7 +1100,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, ...@@ -1100,7 +1100,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
continue; continue;
if ((scn_addr < drmem.base_addr) if ((scn_addr < drmem.base_addr)
|| (scn_addr >= (drmem.base_addr + memblock_size))) || (scn_addr >= (drmem.base_addr + lmb_size)))
continue; continue;
nid = of_drconf_to_nid_single(&drmem, &aa); nid = of_drconf_to_nid_single(&drmem, &aa);
......
...@@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np) ...@@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np)
const char *type; const char *type;
const unsigned int *regs; const unsigned int *regs;
unsigned long base; unsigned long base;
unsigned int memblock_size; unsigned int lmb_size;
int ret = -EINVAL; int ret = -EINVAL;
/* /*
...@@ -87,9 +87,9 @@ static int pseries_remove_memory(struct device_node *np) ...@@ -87,9 +87,9 @@ static int pseries_remove_memory(struct device_node *np)
return ret; return ret;
base = *(unsigned long *)regs; base = *(unsigned long *)regs;
memblock_size = regs[3]; lmb_size = regs[3];
ret = pseries_remove_memblock(base, memblock_size); ret = pseries_remove_memblock(base, lmb_size);
return ret; return ret;
} }
...@@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np) ...@@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np)
const char *type; const char *type;
const unsigned int *regs; const unsigned int *regs;
unsigned long base; unsigned long base;
unsigned int memblock_size; unsigned int lmb_size;
int ret = -EINVAL; int ret = -EINVAL;
/* /*
...@@ -116,36 +116,36 @@ static int pseries_add_memory(struct device_node *np) ...@@ -116,36 +116,36 @@ static int pseries_add_memory(struct device_node *np)
return ret; return ret;
base = *(unsigned long *)regs; base = *(unsigned long *)regs;
memblock_size = regs[3]; lmb_size = regs[3];
/* /*
* Update memory region to represent the memory add * Update memory region to represent the memory add
*/ */
ret = memblock_add(base, memblock_size); ret = memblock_add(base, lmb_size);
return (ret < 0) ? -EINVAL : 0; return (ret < 0) ? -EINVAL : 0;
} }
static int pseries_drconf_memory(unsigned long *base, unsigned int action) static int pseries_drconf_memory(unsigned long *base, unsigned int action)
{ {
struct device_node *np; struct device_node *np;
const unsigned long *memblock_size; const unsigned long *lmb_size;
int rc; int rc;
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (!np) if (!np)
return -EINVAL; return -EINVAL;
memblock_size = of_get_property(np, "ibm,memblock-size", NULL); lmb_size = of_get_property(np, "ibm,lmb-size", NULL);
if (!memblock_size) { if (!lmb_size) {
of_node_put(np); of_node_put(np);
return -EINVAL; return -EINVAL;
} }
if (action == PSERIES_DRCONF_MEM_ADD) { if (action == PSERIES_DRCONF_MEM_ADD) {
rc = memblock_add(*base, *memblock_size); rc = memblock_add(*base, *lmb_size);
rc = (rc < 0) ? -EINVAL : 0; rc = (rc < 0) ? -EINVAL : 0;
} else if (action == PSERIES_DRCONF_MEM_REMOVE) { } else if (action == PSERIES_DRCONF_MEM_REMOVE) {
rc = pseries_remove_memblock(*base, *memblock_size); rc = pseries_remove_memblock(*base, *lmb_size);
} else { } else {
rc = -EINVAL; rc = -EINVAL;
} }
......
...@@ -445,7 +445,7 @@ ...@@ -445,7 +445,7 @@
*/ */
#define INIT_TASK_DATA_SECTION(align) \ #define INIT_TASK_DATA_SECTION(align) \
. = ALIGN(align); \ . = ALIGN(align); \
.data..init_task : { \ .data..init_task : AT(ADDR(.data..init_task) - LOAD_OFFSET) { \
INIT_TASK_DATA(align) \ INIT_TASK_DATA(align) \
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册