diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 4afd3b3b658fa09a7dc931f0416b66a0757533b9..e985d6bf7d3a25d849a7db87c32824e68d70226e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -129,7 +129,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); -extern void efi_setup_page_tables(void); +extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 37f20d71ec4bd6a310559adaeaf755939a97639b..576bb126593a6a011dca6a27e906174728358f71 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -926,14 +926,36 @@ static void __init efi_map_regions_fixed(void) } +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; + + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + /* - * Map efi memory ranges for runtime serivce and update new_memmap with virtual - * addresses. + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. */ -static void * __init efi_map_regions(int *count) +static void * __init efi_map_regions(int *count, int *pg_shift) { + void *p, *new_memmap = NULL; + unsigned long left = 0; efi_memory_desc_t *md; - void *p, *tmp, *new_memmap = NULL; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; @@ -948,20 +970,23 @@ static void * __init efi_map_regions(int *count) efi_map_region(md); get_systab_virt_addr(md); - tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, - GFP_KERNEL); - if (!tmp) - goto out; - new_memmap = tmp; + if (left < memmap.desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; + } + memcpy(new_memmap + (*count * memmap.desc_size), md, memmap.desc_size); + + left -= memmap.desc_size; (*count)++; } return new_memmap; -out: - kfree(new_memmap); - return NULL; } /* @@ -987,9 +1012,9 @@ static void * __init efi_map_regions(int *count) */ void __init efi_enter_virtual_mode(void) { - efi_status_t status; + int err, count = 0, pg_shift = 0; void *new_memmap = NULL; - int err, count = 0; + efi_status_t status; efi.systab = NULL; @@ -1006,20 +1031,24 @@ void __init efi_enter_virtual_mode(void) efi_map_regions_fixed(); } else { efi_merge_regions(); - new_memmap = efi_map_regions(&count); + new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); return; } - } - err = save_runtime_map(); - if (err) - pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + err = save_runtime_map(); + if (err) + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + } BUG_ON(!efi.systab); - efi_setup_page_tables(); + if (!efi_setup) { + if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) + return; + } + efi_sync_low_kernel_mappings(); efi_dump_pagetable(); @@ -1060,7 +1089,35 @@ void __init efi_enter_virtual_mode(void) efi_runtime_mkexec(); - kfree(new_memmap); + + /* + * We mapped the descriptor array into the EFI pagetable above but we're + * not unmapping it here. Here's why: + * + * We're copying select PGDs from the kernel page table to the EFI page + * table and when we do so and make changes to those PGDs like unmapping + * stuff from them, those changes appear in the kernel page table and we + * go boom. + * + * From setup_real_mode(): + * + * ... + * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + * + * In this particular case, our allocation is in PGD 0 of the EFI page + * table but we've copied that PGD from PGD[272] of the EFI page table: + * + * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 + * + * where the direct memory mapping in kernel space is. + * + * new_memmap's VA comes from that direct mapping and thus clearing it, + * it would get cleared in the kernel page table too. + * + * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); + */ + if (!efi_setup) + free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 39496ae3928a964199f142ec66cee29d4b53caa8..9ee3491e31fbab7f6643462395d31c6f9f36f1b4 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -40,8 +40,12 @@ static unsigned long efi_rt_eflags; void efi_sync_low_kernel_mappings(void) {} -void efi_setup_page_tables(void) {} void __init efi_dump_pagetable(void) {} +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e05c69b46f05af55c92a5e560dba7ad83b2c1d0b..19280900ec25e63b3a02b9c90e3371f81858bdfe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void) sizeof(pgd_t) * num_pgds); } -void efi_setup_page_tables(void) +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { + pgd_t *pgd; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; + pgd = __va(efi_scratch.efi_pgt); - if (!efi_enabled(EFI_OLD_MEMMAP)) - efi_scratch.use_pgd = true; + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + efi_scratch.use_pgd = true; + + + return 0; +} + +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va)