efi.c 25.1 KB
Newer Older
H
Huang, Ying 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Common EFI (Extensible Firmware Interface) support functions
 * Based on Extensible Firmware Interface Specification version 1.0
 *
 * Copyright (C) 1999 VA Linux Systems
 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
 * Copyright (C) 1999-2002 Hewlett-Packard Co.
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 *	Stephane Eranian <eranian@hpl.hp.com>
 * Copyright (C) 2005-2008 Intel Co.
 *	Fenghua Yu <fenghua.yu@intel.com>
 *	Bibo Mao <bibo.mao@intel.com>
 *	Chandramouli Narayanan <mouli@linux.intel.com>
 *	Huang Ying <ying.huang@intel.com>
15 16
 * Copyright (C) 2013 SuSE Labs
 *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
H
Huang, Ying 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Copied from efi_32.c to eliminate the duplicated code between EFI
 * 32/64 support code. --ying 2007-10-26
 *
 * All EFI Runtime Services are not implemented yet as EFI only
 * supports physical mode addressing on SoftSDV. This is to be fixed
 * in a future version.  --drummond 1999-07-20
 *
 * Implemented EFI runtime services and virtual mode calls.  --davidm
 *
 * Goutham Rao: <goutham.rao@intel.com>
 *	Skip non-WB memory and ignore empty memory ranges.
 */

31 32
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

H
Huang, Ying 已提交
33 34 35
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
36
#include <linux/efi-bgrt.h>
37
#include <linux/export.h>
H
Huang, Ying 已提交
38
#include <linux/bootmem.h>
39
#include <linux/slab.h>
40
#include <linux/memblock.h>
H
Huang, Ying 已提交
41 42 43 44 45 46 47 48 49 50
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/time.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>

#include <asm/setup.h>
#include <asm/efi.h>
#include <asm/time.h>
51 52
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
53
#include <asm/x86_init.h>
B
Borislav Petkov 已提交
54
#include <asm/uv/uv.h>
H
Huang, Ying 已提交
55

H
Harvey Harrison 已提交
56
static struct efi efi_phys __initdata;
H
Huang, Ying 已提交
57 58
static efi_system_table_t efi_systab __initdata;

J
Joe Perches 已提交
59
static efi_config_table_type_t arch_tables[] __initdata = {
60 61 62
#ifdef CONFIG_X86_UV
	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
#endif
63
	{NULL_GUID, NULL, NULL},
64 65
};

66
u64 efi_setup;		/* efi setup_data physical address */
67

68
static int add_efi_memmap __initdata;
69 70 71 72 73 74 75
static int __init setup_add_efi_memmap(char *arg)
{
	add_efi_memmap = 1;
	return 0;
}
early_param("add_efi_memmap", setup_add_efi_memmap);

H
Huang, Ying 已提交
76 77 78 79 80 81 82
static efi_status_t __init phys_efi_set_virtual_address_map(
	unsigned long memory_map_size,
	unsigned long descriptor_size,
	u32 descriptor_version,
	efi_memory_desc_t *virtual_map)
{
	efi_status_t status;
83
	unsigned long flags;
84
	pgd_t *save_pgd;
H
Huang, Ying 已提交
85

86
	save_pgd = efi_call_phys_prolog();
87 88 89

	/* Disable interrupts around EFI calls: */
	local_irq_save(flags);
90 91 92
	status = efi_call_phys(efi_phys.set_virtual_address_map,
			       memory_map_size, descriptor_size,
			       descriptor_version, virtual_map);
93 94
	local_irq_restore(flags);

95
	efi_call_phys_epilog(save_pgd);
96

H
Huang, Ying 已提交
97 98 99
	return status;
}

100 101
void __init efi_find_mirror(void)
{
102
	efi_memory_desc_t *md;
103 104
	u64 mirror_size = 0, total_size = 0;

105
	for_each_efi_memory_desc(md) {
106 107 108 109 110 111 112 113 114 115 116 117 118 119
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;

		total_size += size;
		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
			memblock_mark_mirror(start, size);
			mirror_size += size;
		}
	}
	if (mirror_size)
		pr_info("Memory: %lldM/%lldM mirrored memory\n",
			mirror_size>>20, total_size>>20);
}

120 121 122 123 124 125
/*
 * Tell the kernel about the EFI memory map.  This might include
 * more than the max 128 entries that can fit in the e820 legacy
 * (zeropage) memory map.
 */

126
static void __init do_add_efi_memmap(void)
127
{
128
	efi_memory_desc_t *md;
129

130
	for_each_efi_memory_desc(md) {
131 132 133 134
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
		int e820_type;

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
		switch (md->type) {
		case EFI_LOADER_CODE:
		case EFI_LOADER_DATA:
		case EFI_BOOT_SERVICES_CODE:
		case EFI_BOOT_SERVICES_DATA:
		case EFI_CONVENTIONAL_MEMORY:
			if (md->attribute & EFI_MEMORY_WB)
				e820_type = E820_RAM;
			else
				e820_type = E820_RESERVED;
			break;
		case EFI_ACPI_RECLAIM_MEMORY:
			e820_type = E820_ACPI;
			break;
		case EFI_ACPI_MEMORY_NVS:
			e820_type = E820_NVS;
			break;
		case EFI_UNUSABLE_MEMORY:
			e820_type = E820_UNUSABLE;
			break;
155 156 157
		case EFI_PERSISTENT_MEMORY:
			e820_type = E820_PMEM;
			break;
158 159 160 161 162 163
		default:
			/*
			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
			 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
			 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
			 */
164
			e820_type = E820_RESERVED;
165 166
			break;
		}
167
		e820_add_region(start, size, e820_type);
168 169 170 171
	}
	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}

172
int __init efi_memblock_x86_reserve_range(void)
173
{
174
	struct efi_info *e = &boot_params.efi_info;
175
	phys_addr_t pmap;
176

D
Daniel Kiper 已提交
177 178 179
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

180
#ifdef CONFIG_X86_32
181
	/* Can't handle data above 4GB at this time */
182
	if (e->efi_memmap_hi) {
183 184 185
		pr_err("Memory map is above 4GB, disabling EFI.\n");
		return -EINVAL;
	}
186
	pmap =  e->efi_memmap;
187
#else
188
	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
189
#endif
190 191
	efi.memmap.phys_map	= pmap;
	efi.memmap.nr_map	= e->efi_memmap_size /
192
				  e->efi_memdesc_size;
193 194
	efi.memmap.desc_size	= e->efi_memdesc_size;
	efi.memmap.desc_version	= e->efi_memdesc_version;
195

196 197 198 199
	WARN(efi.memmap.desc_version != 1,
	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
	     efi.memmap.desc_version);

200
	memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
201

202
	return 0;
203 204
}

205
void __init efi_print_memmap(void)
H
Huang, Ying 已提交
206 207
{
	efi_memory_desc_t *md;
208
	int i = 0;
H
Huang, Ying 已提交
209

210
	for_each_efi_memory_desc(md) {
211 212
		char buf[64];

213
		pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
214
			i++, efi_md_typeattr_format(buf, sizeof(buf), md),
215
			md->phys_addr,
216
			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
H
Huang, Ying 已提交
217 218
			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
	}
B
Borislav Petkov 已提交
219
}
H
Huang, Ying 已提交
220

221
void __init efi_unmap_memmap(void)
222
{
223 224
	unsigned long size;

225
	clear_bit(EFI_MEMMAP, &efi.flags);
226 227 228 229 230

	size = efi.memmap.nr_map * efi.memmap.desc_size;
	if (efi.memmap.map) {
		early_memunmap(efi.memmap.map, size);
		efi.memmap.map = NULL;
231 232 233
	}
}

234
static int __init efi_systab_init(void *phys)
H
Huang, Ying 已提交
235
{
236
	if (efi_enabled(EFI_64BIT)) {
237
		efi_system_table_64_t *systab64;
238
		struct efi_setup_data *data = NULL;
239 240
		u64 tmp = 0;

241 242 243 244 245
		if (efi_setup) {
			data = early_memremap(efi_setup, sizeof(*data));
			if (!data)
				return -ENOMEM;
		}
246
		systab64 = early_memremap((unsigned long)phys,
247 248 249
					 sizeof(*systab64));
		if (systab64 == NULL) {
			pr_err("Couldn't map the system table!\n");
250
			if (data)
251
				early_memunmap(data, sizeof(*data));
252 253 254 255
			return -ENOMEM;
		}

		efi_systab.hdr = systab64->hdr;
256 257 258
		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
					      systab64->fw_vendor;
		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
259 260 261 262 263 264 265 266 267 268 269 270 271
		efi_systab.fw_revision = systab64->fw_revision;
		efi_systab.con_in_handle = systab64->con_in_handle;
		tmp |= systab64->con_in_handle;
		efi_systab.con_in = systab64->con_in;
		tmp |= systab64->con_in;
		efi_systab.con_out_handle = systab64->con_out_handle;
		tmp |= systab64->con_out_handle;
		efi_systab.con_out = systab64->con_out;
		tmp |= systab64->con_out;
		efi_systab.stderr_handle = systab64->stderr_handle;
		tmp |= systab64->stderr_handle;
		efi_systab.stderr = systab64->stderr;
		tmp |= systab64->stderr;
272 273 274 275
		efi_systab.runtime = data ?
				     (void *)(unsigned long)data->runtime :
				     (void *)(unsigned long)systab64->runtime;
		tmp |= data ? data->runtime : systab64->runtime;
276 277 278
		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
		tmp |= systab64->boottime;
		efi_systab.nr_tables = systab64->nr_tables;
279 280 281
		efi_systab.tables = data ? (unsigned long)data->tables :
					   systab64->tables;
		tmp |= data ? data->tables : systab64->tables;
282

283
		early_memunmap(systab64, sizeof(*systab64));
284
		if (data)
285
			early_memunmap(data, sizeof(*data));
286 287 288 289 290 291 292 293 294
#ifdef CONFIG_X86_32
		if (tmp >> 32) {
			pr_err("EFI data located above 4GB, disabling EFI.\n");
			return -EINVAL;
		}
#endif
	} else {
		efi_system_table_32_t *systab32;

295
		systab32 = early_memremap((unsigned long)phys,
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
					 sizeof(*systab32));
		if (systab32 == NULL) {
			pr_err("Couldn't map the system table!\n");
			return -ENOMEM;
		}

		efi_systab.hdr = systab32->hdr;
		efi_systab.fw_vendor = systab32->fw_vendor;
		efi_systab.fw_revision = systab32->fw_revision;
		efi_systab.con_in_handle = systab32->con_in_handle;
		efi_systab.con_in = systab32->con_in;
		efi_systab.con_out_handle = systab32->con_out_handle;
		efi_systab.con_out = systab32->con_out;
		efi_systab.stderr_handle = systab32->stderr_handle;
		efi_systab.stderr = systab32->stderr;
		efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
		efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
		efi_systab.nr_tables = systab32->nr_tables;
		efi_systab.tables = systab32->tables;

316
		early_memunmap(systab32, sizeof(*systab32));
317
	}
318

H
Huang, Ying 已提交
319 320 321 322 323
	efi.systab = &efi_systab;

	/*
	 * Verify the EFI Table
	 */
324
	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
325
		pr_err("System table signature incorrect!\n");
326 327
		return -EINVAL;
	}
H
Huang, Ying 已提交
328
	if ((efi.systab->hdr.revision >> 16) == 0)
J
Joe Perches 已提交
329
		pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
H
Huang, Ying 已提交
330 331
		       efi.systab->hdr.revision >> 16,
		       efi.systab->hdr.revision & 0xffff);
332 333

	return 0;
334
}
H
Huang, Ying 已提交
335

336
static int __init efi_runtime_init32(void)
337
{
338 339
	efi_runtime_services_32_t *runtime;

340
	runtime = early_memremap((unsigned long)efi.systab->runtime,
341 342 343 344 345
			sizeof(efi_runtime_services_32_t));
	if (!runtime) {
		pr_err("Could not map the runtime service table!\n");
		return -ENOMEM;
	}
H
Huang, Ying 已提交
346 347

	/*
348 349 350
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
H
Huang, Ying 已提交
351
	 */
352 353 354
	efi_phys.set_virtual_address_map =
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
355
	early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
356 357 358 359 360 361 362 363

	return 0;
}

static int __init efi_runtime_init64(void)
{
	efi_runtime_services_64_t *runtime;

364
	runtime = early_memremap((unsigned long)efi.systab->runtime,
365
			sizeof(efi_runtime_services_64_t));
366
	if (!runtime) {
367
		pr_err("Could not map the runtime service table!\n");
368 369
		return -ENOMEM;
	}
370

371
	/*
372 373 374
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
375 376
	 */
	efi_phys.set_virtual_address_map =
377 378
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
379
	early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
380 381 382 383 384 385 386 387 388 389 390 391 392

	return 0;
}

static int __init efi_runtime_init(void)
{
	int rv;

	/*
	 * Check out the runtime services table. We need to map
	 * the runtime services table so that we can grab the physical
	 * address of several of the EFI runtime functions, needed to
	 * set the firmware into virtual mode.
D
Daniel Kiper 已提交
393 394 395 396 397 398 399
	 *
	 * When EFI_PARAVIRT is in force then we could not map runtime
	 * service memory region because we do not have direct access to it.
	 * However, runtime services are available through proxy functions
	 * (e.g. in case of Xen dom0 EFI implementation they call special
	 * hypercall which executes relevant EFI functions) and that is why
	 * they are always enabled.
400 401
	 */

D
Daniel Kiper 已提交
402 403 404 405 406 407 408 409 410
	if (!efi_enabled(EFI_PARAVIRT)) {
		if (efi_enabled(EFI_64BIT))
			rv = efi_runtime_init64();
		else
			rv = efi_runtime_init32();

		if (rv)
			return rv;
	}
411

412 413
	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);

414
	return 0;
415
}
H
Huang, Ying 已提交
416

417
static int __init efi_memmap_init(void)
418
{
419 420
	unsigned long addr, size;

D
Daniel Kiper 已提交
421 422 423
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

H
Huang, Ying 已提交
424
	/* Map the EFI memory map */
425 426 427 428 429
	size = efi.memmap.nr_map * efi.memmap.desc_size;
	addr = (unsigned long)efi.memmap.phys_map;

	efi.memmap.map = early_memremap(addr, size);
	if (efi.memmap.map == NULL) {
430
		pr_err("Could not map the memory map!\n");
431 432
		return -ENOMEM;
	}
433 434

	efi.memmap.map_end = efi.memmap.map + size;
R
Russ Anderson 已提交
435

436 437
	if (add_efi_memmap)
		do_add_efi_memmap();
438

439 440
	set_bit(EFI_MEMMAP, &efi.flags);

441
	return 0;
442 443 444 445 446 447 448 449 450 451
}

void __init efi_init(void)
{
	efi_char16_t *c16;
	char vendor[100] = "unknown";
	int i = 0;
	void *tmp;

#ifdef CONFIG_X86_32
452 453 454 455 456
	if (boot_params.efi_info.efi_systab_hi ||
	    boot_params.efi_info.efi_memmap_hi) {
		pr_info("Table located above 4GB, disabling EFI.\n");
		return;
	}
457 458 459
	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
#else
	efi_phys.systab = (efi_system_table_t *)
460 461
			  (boot_params.efi_info.efi_systab |
			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
462 463
#endif

464
	if (efi_systab_init(efi_phys.systab))
465
		return;
466

467 468 469 470
	efi.config_table = (unsigned long)efi.systab->tables;
	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
	efi.runtime	 = (unsigned long)efi.systab->runtime;

471 472 473
	/*
	 * Show what we know for posterity
	 */
474
	c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
475 476 477 478 479
	if (c16) {
		for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
			vendor[i] = *c16++;
		vendor[i] = '\0';
	} else
480
		pr_err("Could not map the firmware vendor!\n");
481
	early_memunmap(tmp, 2);
482

483 484 485
	pr_info("EFI v%u.%.02u by %s\n",
		efi.systab->hdr.revision >> 16,
		efi.systab->hdr.revision & 0xffff, vendor);
486

487 488 489
	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
		return;

490
	if (efi_config_init(arch_tables))
491
		return;
492

493 494 495 496 497
	/*
	 * Note: We currently don't support runtime services on an EFI
	 * that doesn't match the kernel 32/64-bit mode.
	 */

M
Matt Fleming 已提交
498
	if (!efi_runtime_supported())
499
		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
500
	else {
501
		if (efi_runtime_disabled() || efi_runtime_init())
502
			return;
503
	}
504
	if (efi_memmap_init())
505
		return;
506

507
	if (efi_enabled(EFI_DBG))
508
		efi_print_memmap();
P
Peter Jones 已提交
509 510

	efi_esrt_init();
H
Huang, Ying 已提交
511 512
}

513 514 515 516 517
void __init efi_late_init(void)
{
	efi_bgrt_init();
}

518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
	u64 addr, npages;

	addr = md->virt_addr;
	npages = md->num_pages;

	memrange_efi_to_native(&addr, &npages);

	if (executable)
		set_memory_x(addr, npages);
	else
		set_memory_nx(addr, npages);
}

B
Borislav Petkov 已提交
533
void __init runtime_code_page_mkexec(void)
534 535 536 537
{
	efi_memory_desc_t *md;

	/* Make EFI runtime service code area executable */
538
	for_each_efi_memory_desc(md) {
H
Huang, Ying 已提交
539 540 541
		if (md->type != EFI_RUNTIME_SERVICES_CODE)
			continue;

542
		efi_set_executable(md, true);
543 544 545
	}
}

546
void __init efi_memory_uc(u64 addr, unsigned long size)
547 548 549 550 551 552 553 554 555
{
	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
	u64 npages;

	npages = round_up(size, page_shift) / page_shift;
	memrange_efi_to_native(&addr, &npages);
	set_memory_uc(addr, npages);
}

556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
void __init old_map_region(efi_memory_desc_t *md)
{
	u64 start_pfn, end_pfn, end;
	unsigned long size;
	void *va;

	start_pfn = PFN_DOWN(md->phys_addr);
	size	  = md->num_pages << PAGE_SHIFT;
	end	  = md->phys_addr + size;
	end_pfn   = PFN_UP(end);

	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
		va = __va(md->phys_addr);

		if (!(md->attribute & EFI_MEMORY_WB))
			efi_memory_uc((u64)(unsigned long)va, size);
	} else
		va = efi_ioremap(md->phys_addr, size,
				 md->type, md->attribute);

	md->virt_addr = (u64) (unsigned long) va;
	if (!va)
		pr_err("ioremap of 0x%llX failed!\n",
		       (unsigned long long)md->phys_addr);
}

582 583
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
H
Huang, Ying 已提交
584
{
585 586
	efi_memory_desc_t *md, *prev_md = NULL;

587
	for_each_efi_memory_desc(md) {
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
		u64 prev_size;

		if (!prev_md) {
			prev_md = md;
			continue;
		}

		if (prev_md->type != md->type ||
		    prev_md->attribute != md->attribute) {
			prev_md = md;
			continue;
		}

		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;

		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
			prev_md->num_pages += md->num_pages;
			md->type = EFI_RESERVED_TYPE;
			md->attribute = 0;
			continue;
		}
		prev_md = md;
610 611 612 613 614 615 616
	}
}

static void __init get_systab_virt_addr(efi_memory_desc_t *md)
{
	unsigned long size;
	u64 end, systab;
617

618 619 620 621 622 623
	size = md->num_pages << EFI_PAGE_SHIFT;
	end = md->phys_addr + size;
	systab = (u64)(unsigned long)efi_phys.systab;
	if (md->phys_addr <= systab && systab < end) {
		systab += md->virt_addr - md->phys_addr;
		efi.systab = (efi_system_table_t *)(unsigned long)systab;
624
	}
625 626
}

627
static void __init save_runtime_map(void)
628
{
629
#ifdef CONFIG_KEXEC_CORE
630
	unsigned long desc_size;
631
	efi_memory_desc_t *md;
632
	void *tmp, *q = NULL;
633 634
	int count = 0;

635 636 637
	if (efi_enabled(EFI_OLD_MEMMAP))
		return;

638 639
	desc_size = efi.memmap.desc_size;

640
	for_each_efi_memory_desc(md) {
641 642 643 644
		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
		    (md->type == EFI_BOOT_SERVICES_CODE) ||
		    (md->type == EFI_BOOT_SERVICES_DATA))
			continue;
645
		tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
646 647 648 649
		if (!tmp)
			goto out;
		q = tmp;

650
		memcpy(q + count * desc_size, md, desc_size);
651 652 653
		count++;
	}

654
	efi_runtime_map_setup(q, count, desc_size);
655
	return;
656 657 658

out:
	kfree(q);
659 660
	pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
#endif
661 662
}

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
static void *realloc_pages(void *old_memmap, int old_shift)
{
	void *ret;

	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
	if (!ret)
		goto out;

	/*
	 * A first-time allocation doesn't have anything to copy.
	 */
	if (!old_memmap)
		return ret;

	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);

out:
	free_pages((unsigned long)old_memmap, old_shift);
	return ret;
}

684 685 686 687 688 689 690 691 692 693
/*
 * Iterate the EFI memory map in reverse order because the regions
 * will be mapped top-down. The end result is the same as if we had
 * mapped things forward, but doesn't require us to change the
 * existing implementation of efi_map_region().
 */
static inline void *efi_map_next_entry_reverse(void *entry)
{
	/* Initial call */
	if (!entry)
694
		return efi.memmap.map_end - efi.memmap.desc_size;
695

696 697
	entry -= efi.memmap.desc_size;
	if (entry < efi.memmap.map)
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
		return NULL;

	return entry;
}

/*
 * efi_map_next_entry - Return the next EFI memory map descriptor
 * @entry: Previous EFI memory map descriptor
 *
 * This is a helper function to iterate over the EFI memory map, which
 * we do in different orders depending on the current configuration.
 *
 * To begin traversing the memory map @entry must be %NULL.
 *
 * Returns %NULL when we reach the end of the memory map.
 */
static void *efi_map_next_entry(void *entry)
{
	if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
		/*
		 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
		 * config table feature requires us to map all entries
		 * in the same order as they appear in the EFI memory
		 * map. That is to say, entry N must have a lower
		 * virtual address than entry N+1. This is because the
		 * firmware toolchain leaves relative references in
		 * the code/data sections, which are split and become
		 * separate EFI memory regions. Mapping things
		 * out-of-order leads to the firmware accessing
		 * unmapped addresses.
		 *
		 * Since we need to map things this way whether or not
		 * the kernel actually makes use of
		 * EFI_PROPERTIES_TABLE, let's just switch to this
		 * scheme by default for 64-bit.
		 */
		return efi_map_next_entry_reverse(entry);
	}

	/* Initial call */
	if (!entry)
739
		return efi.memmap.map;
740

741 742
	entry += efi.memmap.desc_size;
	if (entry >= efi.memmap.map_end)
743 744 745 746 747
		return NULL;

	return entry;
}

748
/*
749 750
 * Map the efi memory ranges of the runtime services and update new_mmap with
 * virtual addresses.
751
 */
752
static void * __init efi_map_regions(int *count, int *pg_shift)
753
{
754 755
	void *p, *new_memmap = NULL;
	unsigned long left = 0;
756
	unsigned long desc_size;
757
	efi_memory_desc_t *md;
758

759 760
	desc_size = efi.memmap.desc_size;

761 762
	p = NULL;
	while ((p = efi_map_next_entry(p))) {
H
Huang, Ying 已提交
763
		md = p;
764 765 766 767 768 769 770
		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
#ifdef CONFIG_X86_64
			if (md->type != EFI_BOOT_SERVICES_CODE &&
			    md->type != EFI_BOOT_SERVICES_DATA)
#endif
				continue;
		}
H
Huang, Ying 已提交
771

772
		efi_map_region(md);
773 774
		get_systab_virt_addr(md);

775
		if (left < desc_size) {
776 777 778 779 780 781 782 783
			new_memmap = realloc_pages(new_memmap, *pg_shift);
			if (!new_memmap)
				return NULL;

			left += PAGE_SIZE << *pg_shift;
			(*pg_shift)++;
		}

784
		memcpy(new_memmap + (*count * desc_size), md, desc_size);
785

786
		left -= desc_size;
787 788
		(*count)++;
	}
789

790 791 792
	return new_memmap;
}

793 794
static void __init kexec_enter_virtual_mode(void)
{
795
#ifdef CONFIG_KEXEC_CORE
796
	efi_memory_desc_t *md;
797
	unsigned int num_pages;
798 799 800 801 802 803 804 805 806

	efi.systab = NULL;

	/*
	 * We don't do virtual mode, since we don't do runtime services, on
	 * non-native EFI
	 */
	if (!efi_is_native()) {
		efi_unmap_memmap();
807
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808 809 810
		return;
	}

811 812 813 814 815 816
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

817 818 819 820
	/*
	* Map efi regions which were passed via setup_data. The virt_addr is a
	* fixed addr which was used in first kernel of a kexec boot.
	*/
821
	for_each_efi_memory_desc(md) {
822 823 824 825 826 827 828 829
		efi_map_region_fixed(md); /* FIXME: add error handling */
		get_systab_virt_addr(md);
	}

	save_runtime_map();

	BUG_ON(!efi.systab);

830
	num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
831 832
	num_pages >>= PAGE_SHIFT;

833
	if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
834 835 836 837
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

838 839 840 841 842 843 844 845 846
	efi_sync_low_kernel_mappings();

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
	efi.runtime_version = efi_systab.hdr.revision;
847

848
	efi_native_runtime_setup();
849

850 851 852 853 854 855
	efi.set_virtual_address_map = NULL;

	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
		runtime_code_page_mkexec();

	/* clean DUMMY object */
856
	efi_delete_dummy_variable();
857 858 859
#endif
}

860 861 862 863
/*
 * This function will switch the EFI runtime services to virtual mode.
 * Essentially, we look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor into the
864
 * efi_pgd page table.
865 866 867 868 869 870 871 872 873
 *
 * The old method which used to update that memory descriptor with the
 * virtual address obtained from ioremap() is still supported when the
 * kernel is booted with efi=old_map on its command line. Same old
 * method enabled the runtime services to be called without having to
 * thunk back into physical mode for every invocation.
 *
 * The new method does a pagetable switch in a preemption-safe manner
 * so that we're in a different address space when calling a runtime
874 875
 * function. For function arguments passing we do copy the PUDs of the
 * kernel page table into efi_pgd prior to each call.
876 877 878
 *
 * Specially for kexec boot, efi runtime maps in previous kernel should
 * be passed in via setup_data. In that case runtime ranges will be mapped
879 880
 * to the same virtual addresses as the first kernel, see
 * kexec_enter_virtual_mode().
881
 */
882
static void __init __efi_enter_virtual_mode(void)
883
{
884
	int count = 0, pg_shift = 0;
885
	void *new_memmap = NULL;
886
	efi_status_t status;
H
Huang, Ying 已提交
887

888
	efi.systab = NULL;
889

890 891 892 893 894 895
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

896 897 898 899
	efi_merge_regions();
	new_memmap = efi_map_regions(&count, &pg_shift);
	if (!new_memmap) {
		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
900
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
901
		return;
902
	}
903

904 905
	save_runtime_map();

H
Huang, Ying 已提交
906 907
	BUG_ON(!efi.systab);

908 909
	if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
910
		return;
911
	}
912

913 914
	efi_sync_low_kernel_mappings();

915 916
	if (efi_is_native()) {
		status = phys_efi_set_virtual_address_map(
917 918 919
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
920 921 922 923
				(efi_memory_desc_t *)__pa(new_memmap));
	} else {
		status = efi_thunk_set_virtual_address_map(
				efi_phys.set_virtual_address_map,
924 925 926
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
927 928
				(efi_memory_desc_t *)__pa(new_memmap));
	}
929

930 931 932 933
	if (status != EFI_SUCCESS) {
		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
			 status);
		panic("EFI call to SetVirtualAddressMap() failed!");
H
Huang, Ying 已提交
934 935 936 937 938 939 940 941
	}

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
942
	efi.runtime_version = efi_systab.hdr.revision;
943 944

	if (efi_is_native())
945
		efi_native_runtime_setup();
946 947 948
	else
		efi_thunk_runtime_setup();

949
	efi.set_virtual_address_map = NULL;
950

951 952 953 954 955 956 957
	/*
	 * Apply more restrictive page table mapping attributes now that
	 * SVAM() has been called and the firmware has performed all
	 * necessary relocation fixups for the new virtual addresses.
	 */
	efi_runtime_update_mappings();
	efi_dump_pagetable();
958

959
	/*
960 961 962 963 964
	 * We mapped the descriptor array into the EFI pagetable above
	 * but we're not unmapping it here because if we're running in
	 * EFI mixed mode we need all of memory to be accessible when
	 * we pass parameters to the EFI runtime services in the
	 * thunking code.
965
	 */
966
	free_pages((unsigned long)new_memmap, pg_shift);
M
Matthew Garrett 已提交
967 968

	/* clean DUMMY object */
969
	efi_delete_dummy_variable();
H
Huang, Ying 已提交
970 971
}

972 973
void __init efi_enter_virtual_mode(void)
{
D
Daniel Kiper 已提交
974 975 976
	if (efi_enabled(EFI_PARAVIRT))
		return;

977 978 979 980 981 982
	if (efi_setup)
		kexec_enter_virtual_mode();
	else
		__efi_enter_virtual_mode();
}

H
Huang, Ying 已提交
983 984 985 986 987 988 989
/*
 * Convenience functions to obtain memory types and attributes
 */
u32 efi_mem_type(unsigned long phys_addr)
{
	efi_memory_desc_t *md;

990 991 992
	if (!efi_enabled(EFI_MEMMAP))
		return 0;

993
	for_each_efi_memory_desc(md) {
H
Huang, Ying 已提交
994 995 996 997 998 999 1000 1001
		if ((md->phys_addr <= phys_addr) &&
		    (phys_addr < (md->phys_addr +
				  (md->num_pages << EFI_PAGE_SHIFT))))
			return md->type;
	}
	return 0;
}

D
Dave Young 已提交
1002
static int __init arch_parse_efi_cmdline(char *str)
1003
{
1004 1005 1006 1007 1008
	if (!str) {
		pr_warn("need at least one option\n");
		return -EINVAL;
	}

1009 1010
	if (parse_option_str(str, "old_map"))
		set_bit(EFI_OLD_MEMMAP, &efi.flags);
1011 1012 1013

	return 0;
}
D
Dave Young 已提交
1014
early_param("efi", arch_parse_efi_cmdline);