efi.c 25.2 KB
Newer Older
H
Huang, Ying 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Common EFI (Extensible Firmware Interface) support functions
 * Based on Extensible Firmware Interface Specification version 1.0
 *
 * Copyright (C) 1999 VA Linux Systems
 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
 * Copyright (C) 1999-2002 Hewlett-Packard Co.
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 *	Stephane Eranian <eranian@hpl.hp.com>
 * Copyright (C) 2005-2008 Intel Co.
 *	Fenghua Yu <fenghua.yu@intel.com>
 *	Bibo Mao <bibo.mao@intel.com>
 *	Chandramouli Narayanan <mouli@linux.intel.com>
 *	Huang Ying <ying.huang@intel.com>
15 16
 * Copyright (C) 2013 SuSE Labs
 *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
H
Huang, Ying 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Copied from efi_32.c to eliminate the duplicated code between EFI
 * 32/64 support code. --ying 2007-10-26
 *
 * All EFI Runtime Services are not implemented yet as EFI only
 * supports physical mode addressing on SoftSDV. This is to be fixed
 * in a future version.  --drummond 1999-07-20
 *
 * Implemented EFI runtime services and virtual mode calls.  --davidm
 *
 * Goutham Rao: <goutham.rao@intel.com>
 *	Skip non-WB memory and ignore empty memory ranges.
 */

31 32
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

H
Huang, Ying 已提交
33 34 35
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
36
#include <linux/efi-bgrt.h>
37
#include <linux/export.h>
H
Huang, Ying 已提交
38
#include <linux/bootmem.h>
39
#include <linux/slab.h>
40
#include <linux/memblock.h>
H
Huang, Ying 已提交
41 42 43 44 45 46 47 48 49 50
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/time.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>

#include <asm/setup.h>
#include <asm/efi.h>
#include <asm/time.h>
51 52
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
53
#include <asm/x86_init.h>
54
#include <asm/rtc.h>
B
Borislav Petkov 已提交
55
#include <asm/uv/uv.h>
H
Huang, Ying 已提交
56

H
Harvey Harrison 已提交
57
static struct efi efi_phys __initdata;
H
Huang, Ying 已提交
58 59
static efi_system_table_t efi_systab __initdata;

J
Joe Perches 已提交
60
static efi_config_table_type_t arch_tables[] __initdata = {
61 62 63
#ifdef CONFIG_X86_UV
	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
#endif
64
	{NULL_GUID, NULL, NULL},
65 66
};

67
u64 efi_setup;		/* efi setup_data physical address */
68

69
static int add_efi_memmap __initdata;
70 71 72 73 74 75 76
static int __init setup_add_efi_memmap(char *arg)
{
	add_efi_memmap = 1;
	return 0;
}
early_param("add_efi_memmap", setup_add_efi_memmap);

H
Huang, Ying 已提交
77 78 79 80 81 82 83
static efi_status_t __init phys_efi_set_virtual_address_map(
	unsigned long memory_map_size,
	unsigned long descriptor_size,
	u32 descriptor_version,
	efi_memory_desc_t *virtual_map)
{
	efi_status_t status;
84
	unsigned long flags;
85
	pgd_t *save_pgd;
H
Huang, Ying 已提交
86

87
	save_pgd = efi_call_phys_prolog();
88 89 90

	/* Disable interrupts around EFI calls: */
	local_irq_save(flags);
91 92 93
	status = efi_call_phys(efi_phys.set_virtual_address_map,
			       memory_map_size, descriptor_size,
			       descriptor_version, virtual_map);
94 95
	local_irq_restore(flags);

96
	efi_call_phys_epilog(save_pgd);
97

H
Huang, Ying 已提交
98 99 100
	return status;
}

101 102
void __init efi_find_mirror(void)
{
103
	efi_memory_desc_t *md;
104 105
	u64 mirror_size = 0, total_size = 0;

106
	for_each_efi_memory_desc(md) {
107 108 109 110 111 112 113 114 115 116 117 118 119 120
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;

		total_size += size;
		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
			memblock_mark_mirror(start, size);
			mirror_size += size;
		}
	}
	if (mirror_size)
		pr_info("Memory: %lldM/%lldM mirrored memory\n",
			mirror_size>>20, total_size>>20);
}

121 122 123 124 125 126
/*
 * Tell the kernel about the EFI memory map.  This might include
 * more than the max 128 entries that can fit in the e820 legacy
 * (zeropage) memory map.
 */

127
static void __init do_add_efi_memmap(void)
128
{
129
	efi_memory_desc_t *md;
130

131
	for_each_efi_memory_desc(md) {
132 133 134 135
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
		int e820_type;

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
		switch (md->type) {
		case EFI_LOADER_CODE:
		case EFI_LOADER_DATA:
		case EFI_BOOT_SERVICES_CODE:
		case EFI_BOOT_SERVICES_DATA:
		case EFI_CONVENTIONAL_MEMORY:
			if (md->attribute & EFI_MEMORY_WB)
				e820_type = E820_RAM;
			else
				e820_type = E820_RESERVED;
			break;
		case EFI_ACPI_RECLAIM_MEMORY:
			e820_type = E820_ACPI;
			break;
		case EFI_ACPI_MEMORY_NVS:
			e820_type = E820_NVS;
			break;
		case EFI_UNUSABLE_MEMORY:
			e820_type = E820_UNUSABLE;
			break;
156 157 158
		case EFI_PERSISTENT_MEMORY:
			e820_type = E820_PMEM;
			break;
159 160 161 162 163 164
		default:
			/*
			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
			 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
			 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
			 */
165
			e820_type = E820_RESERVED;
166 167
			break;
		}
168
		e820_add_region(start, size, e820_type);
169 170 171 172
	}
	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}

173
int __init efi_memblock_x86_reserve_range(void)
174
{
175
	struct efi_info *e = &boot_params.efi_info;
176
	phys_addr_t pmap;
177

D
Daniel Kiper 已提交
178 179 180
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

181
#ifdef CONFIG_X86_32
182
	/* Can't handle data above 4GB at this time */
183
	if (e->efi_memmap_hi) {
184 185 186
		pr_err("Memory map is above 4GB, disabling EFI.\n");
		return -EINVAL;
	}
187
	pmap =  e->efi_memmap;
188
#else
189
	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
190
#endif
191 192
	efi.memmap.phys_map	= pmap;
	efi.memmap.nr_map	= e->efi_memmap_size /
193
				  e->efi_memdesc_size;
194 195
	efi.memmap.desc_size	= e->efi_memdesc_size;
	efi.memmap.desc_version	= e->efi_memdesc_version;
196

197 198 199 200
	WARN(efi.memmap.desc_version != 1,
	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
	     efi.memmap.desc_version);

201
	memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
202

203
	return 0;
204 205
}

206
void __init efi_print_memmap(void)
H
Huang, Ying 已提交
207 208
{
	efi_memory_desc_t *md;
209
	int i = 0;
H
Huang, Ying 已提交
210

211
	for_each_efi_memory_desc(md) {
212 213
		char buf[64];

214
		pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
215
			i++, efi_md_typeattr_format(buf, sizeof(buf), md),
216
			md->phys_addr,
217
			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
H
Huang, Ying 已提交
218 219
			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
	}
B
Borislav Petkov 已提交
220
}
H
Huang, Ying 已提交
221

222
void __init efi_unmap_memmap(void)
223
{
224 225
	unsigned long size;

226
	clear_bit(EFI_MEMMAP, &efi.flags);
227 228 229 230 231

	size = efi.memmap.nr_map * efi.memmap.desc_size;
	if (efi.memmap.map) {
		early_memunmap(efi.memmap.map, size);
		efi.memmap.map = NULL;
232 233 234
	}
}

235
static int __init efi_systab_init(void *phys)
H
Huang, Ying 已提交
236
{
237
	if (efi_enabled(EFI_64BIT)) {
238
		efi_system_table_64_t *systab64;
239
		struct efi_setup_data *data = NULL;
240 241
		u64 tmp = 0;

242 243 244 245 246
		if (efi_setup) {
			data = early_memremap(efi_setup, sizeof(*data));
			if (!data)
				return -ENOMEM;
		}
247
		systab64 = early_memremap((unsigned long)phys,
248 249 250
					 sizeof(*systab64));
		if (systab64 == NULL) {
			pr_err("Couldn't map the system table!\n");
251
			if (data)
252
				early_memunmap(data, sizeof(*data));
253 254 255 256
			return -ENOMEM;
		}

		efi_systab.hdr = systab64->hdr;
257 258 259
		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
					      systab64->fw_vendor;
		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
260 261 262 263 264 265 266 267 268 269 270 271 272
		efi_systab.fw_revision = systab64->fw_revision;
		efi_systab.con_in_handle = systab64->con_in_handle;
		tmp |= systab64->con_in_handle;
		efi_systab.con_in = systab64->con_in;
		tmp |= systab64->con_in;
		efi_systab.con_out_handle = systab64->con_out_handle;
		tmp |= systab64->con_out_handle;
		efi_systab.con_out = systab64->con_out;
		tmp |= systab64->con_out;
		efi_systab.stderr_handle = systab64->stderr_handle;
		tmp |= systab64->stderr_handle;
		efi_systab.stderr = systab64->stderr;
		tmp |= systab64->stderr;
273 274 275 276
		efi_systab.runtime = data ?
				     (void *)(unsigned long)data->runtime :
				     (void *)(unsigned long)systab64->runtime;
		tmp |= data ? data->runtime : systab64->runtime;
277 278 279
		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
		tmp |= systab64->boottime;
		efi_systab.nr_tables = systab64->nr_tables;
280 281 282
		efi_systab.tables = data ? (unsigned long)data->tables :
					   systab64->tables;
		tmp |= data ? data->tables : systab64->tables;
283

284
		early_memunmap(systab64, sizeof(*systab64));
285
		if (data)
286
			early_memunmap(data, sizeof(*data));
287 288 289 290 291 292 293 294 295
#ifdef CONFIG_X86_32
		if (tmp >> 32) {
			pr_err("EFI data located above 4GB, disabling EFI.\n");
			return -EINVAL;
		}
#endif
	} else {
		efi_system_table_32_t *systab32;

296
		systab32 = early_memremap((unsigned long)phys,
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
					 sizeof(*systab32));
		if (systab32 == NULL) {
			pr_err("Couldn't map the system table!\n");
			return -ENOMEM;
		}

		efi_systab.hdr = systab32->hdr;
		efi_systab.fw_vendor = systab32->fw_vendor;
		efi_systab.fw_revision = systab32->fw_revision;
		efi_systab.con_in_handle = systab32->con_in_handle;
		efi_systab.con_in = systab32->con_in;
		efi_systab.con_out_handle = systab32->con_out_handle;
		efi_systab.con_out = systab32->con_out;
		efi_systab.stderr_handle = systab32->stderr_handle;
		efi_systab.stderr = systab32->stderr;
		efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
		efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
		efi_systab.nr_tables = systab32->nr_tables;
		efi_systab.tables = systab32->tables;

317
		early_memunmap(systab32, sizeof(*systab32));
318
	}
319

H
Huang, Ying 已提交
320 321 322 323 324
	efi.systab = &efi_systab;

	/*
	 * Verify the EFI Table
	 */
325
	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
326
		pr_err("System table signature incorrect!\n");
327 328
		return -EINVAL;
	}
H
Huang, Ying 已提交
329
	if ((efi.systab->hdr.revision >> 16) == 0)
J
Joe Perches 已提交
330
		pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
H
Huang, Ying 已提交
331 332
		       efi.systab->hdr.revision >> 16,
		       efi.systab->hdr.revision & 0xffff);
333 334

	return 0;
335
}
H
Huang, Ying 已提交
336

337
static int __init efi_runtime_init32(void)
338
{
339 340
	efi_runtime_services_32_t *runtime;

341
	runtime = early_memremap((unsigned long)efi.systab->runtime,
342 343 344 345 346
			sizeof(efi_runtime_services_32_t));
	if (!runtime) {
		pr_err("Could not map the runtime service table!\n");
		return -ENOMEM;
	}
H
Huang, Ying 已提交
347 348

	/*
349 350 351
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
H
Huang, Ying 已提交
352
	 */
353 354 355
	efi_phys.set_virtual_address_map =
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
356
	early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
357 358 359 360 361 362 363 364

	return 0;
}

static int __init efi_runtime_init64(void)
{
	efi_runtime_services_64_t *runtime;

365
	runtime = early_memremap((unsigned long)efi.systab->runtime,
366
			sizeof(efi_runtime_services_64_t));
367
	if (!runtime) {
368
		pr_err("Could not map the runtime service table!\n");
369 370
		return -ENOMEM;
	}
371

372
	/*
373 374 375
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
376 377
	 */
	efi_phys.set_virtual_address_map =
378 379
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
380
	early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
381 382 383 384 385 386 387 388 389 390 391 392 393

	return 0;
}

static int __init efi_runtime_init(void)
{
	int rv;

	/*
	 * Check out the runtime services table. We need to map
	 * the runtime services table so that we can grab the physical
	 * address of several of the EFI runtime functions, needed to
	 * set the firmware into virtual mode.
D
Daniel Kiper 已提交
394 395 396 397 398 399 400
	 *
	 * When EFI_PARAVIRT is in force then we could not map runtime
	 * service memory region because we do not have direct access to it.
	 * However, runtime services are available through proxy functions
	 * (e.g. in case of Xen dom0 EFI implementation they call special
	 * hypercall which executes relevant EFI functions) and that is why
	 * they are always enabled.
401 402
	 */

D
Daniel Kiper 已提交
403 404 405 406 407 408 409 410 411
	if (!efi_enabled(EFI_PARAVIRT)) {
		if (efi_enabled(EFI_64BIT))
			rv = efi_runtime_init64();
		else
			rv = efi_runtime_init32();

		if (rv)
			return rv;
	}
412

413 414
	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);

415
	return 0;
416
}
H
Huang, Ying 已提交
417

418
static int __init efi_memmap_init(void)
419
{
420 421
	unsigned long addr, size;

D
Daniel Kiper 已提交
422 423 424
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

H
Huang, Ying 已提交
425
	/* Map the EFI memory map */
426 427 428 429 430
	size = efi.memmap.nr_map * efi.memmap.desc_size;
	addr = (unsigned long)efi.memmap.phys_map;

	efi.memmap.map = early_memremap(addr, size);
	if (efi.memmap.map == NULL) {
431
		pr_err("Could not map the memory map!\n");
432 433
		return -ENOMEM;
	}
434 435

	efi.memmap.map_end = efi.memmap.map + size;
R
Russ Anderson 已提交
436

437 438
	if (add_efi_memmap)
		do_add_efi_memmap();
439

440 441
	set_bit(EFI_MEMMAP, &efi.flags);

442
	return 0;
443 444 445 446 447 448 449 450 451 452
}

void __init efi_init(void)
{
	efi_char16_t *c16;
	char vendor[100] = "unknown";
	int i = 0;
	void *tmp;

#ifdef CONFIG_X86_32
453 454 455 456 457
	if (boot_params.efi_info.efi_systab_hi ||
	    boot_params.efi_info.efi_memmap_hi) {
		pr_info("Table located above 4GB, disabling EFI.\n");
		return;
	}
458 459 460
	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
#else
	efi_phys.systab = (efi_system_table_t *)
461 462
			  (boot_params.efi_info.efi_systab |
			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
463 464
#endif

465
	if (efi_systab_init(efi_phys.systab))
466
		return;
467

468 469 470 471
	efi.config_table = (unsigned long)efi.systab->tables;
	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
	efi.runtime	 = (unsigned long)efi.systab->runtime;

472 473 474
	/*
	 * Show what we know for posterity
	 */
475
	c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
476 477 478 479 480
	if (c16) {
		for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
			vendor[i] = *c16++;
		vendor[i] = '\0';
	} else
481
		pr_err("Could not map the firmware vendor!\n");
482
	early_memunmap(tmp, 2);
483

484 485 486
	pr_info("EFI v%u.%.02u by %s\n",
		efi.systab->hdr.revision >> 16,
		efi.systab->hdr.revision & 0xffff, vendor);
487

488 489 490
	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
		return;

491
	if (efi_config_init(arch_tables))
492
		return;
493

494 495 496 497 498
	/*
	 * Note: We currently don't support runtime services on an EFI
	 * that doesn't match the kernel 32/64-bit mode.
	 */

M
Matt Fleming 已提交
499
	if (!efi_runtime_supported())
500
		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
501
	else {
502
		if (efi_runtime_disabled() || efi_runtime_init())
503
			return;
504
	}
505
	if (efi_memmap_init())
506
		return;
507

508
	if (efi_enabled(EFI_DBG))
509
		efi_print_memmap();
P
Peter Jones 已提交
510 511

	efi_esrt_init();
H
Huang, Ying 已提交
512 513
}

514 515 516 517 518
void __init efi_late_init(void)
{
	efi_bgrt_init();
}

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
	u64 addr, npages;

	addr = md->virt_addr;
	npages = md->num_pages;

	memrange_efi_to_native(&addr, &npages);

	if (executable)
		set_memory_x(addr, npages);
	else
		set_memory_nx(addr, npages);
}

B
Borislav Petkov 已提交
534
void __init runtime_code_page_mkexec(void)
535 536 537 538
{
	efi_memory_desc_t *md;

	/* Make EFI runtime service code area executable */
539
	for_each_efi_memory_desc(md) {
H
Huang, Ying 已提交
540 541 542
		if (md->type != EFI_RUNTIME_SERVICES_CODE)
			continue;

543
		efi_set_executable(md, true);
544 545 546
	}
}

547
void __init efi_memory_uc(u64 addr, unsigned long size)
548 549 550 551 552 553 554 555 556
{
	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
	u64 npages;

	npages = round_up(size, page_shift) / page_shift;
	memrange_efi_to_native(&addr, &npages);
	set_memory_uc(addr, npages);
}

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
void __init old_map_region(efi_memory_desc_t *md)
{
	u64 start_pfn, end_pfn, end;
	unsigned long size;
	void *va;

	start_pfn = PFN_DOWN(md->phys_addr);
	size	  = md->num_pages << PAGE_SHIFT;
	end	  = md->phys_addr + size;
	end_pfn   = PFN_UP(end);

	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
		va = __va(md->phys_addr);

		if (!(md->attribute & EFI_MEMORY_WB))
			efi_memory_uc((u64)(unsigned long)va, size);
	} else
		va = efi_ioremap(md->phys_addr, size,
				 md->type, md->attribute);

	md->virt_addr = (u64) (unsigned long) va;
	if (!va)
		pr_err("ioremap of 0x%llX failed!\n",
		       (unsigned long long)md->phys_addr);
}

583 584
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
H
Huang, Ying 已提交
585
{
586 587
	efi_memory_desc_t *md, *prev_md = NULL;

588
	for_each_efi_memory_desc(md) {
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
		u64 prev_size;

		if (!prev_md) {
			prev_md = md;
			continue;
		}

		if (prev_md->type != md->type ||
		    prev_md->attribute != md->attribute) {
			prev_md = md;
			continue;
		}

		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;

		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
			prev_md->num_pages += md->num_pages;
			md->type = EFI_RESERVED_TYPE;
			md->attribute = 0;
			continue;
		}
		prev_md = md;
611 612 613 614 615 616 617
	}
}

static void __init get_systab_virt_addr(efi_memory_desc_t *md)
{
	unsigned long size;
	u64 end, systab;
618

619 620 621 622 623 624
	size = md->num_pages << EFI_PAGE_SHIFT;
	end = md->phys_addr + size;
	systab = (u64)(unsigned long)efi_phys.systab;
	if (md->phys_addr <= systab && systab < end) {
		systab += md->virt_addr - md->phys_addr;
		efi.systab = (efi_system_table_t *)(unsigned long)systab;
625
	}
626 627
}

628
static void __init save_runtime_map(void)
629
{
630
#ifdef CONFIG_KEXEC_CORE
631
	unsigned long desc_size;
632
	efi_memory_desc_t *md;
633
	void *tmp, *q = NULL;
634 635
	int count = 0;

636 637 638
	if (efi_enabled(EFI_OLD_MEMMAP))
		return;

639 640
	desc_size = efi.memmap.desc_size;

641
	for_each_efi_memory_desc(md) {
642 643 644 645
		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
		    (md->type == EFI_BOOT_SERVICES_CODE) ||
		    (md->type == EFI_BOOT_SERVICES_DATA))
			continue;
646
		tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
647 648 649 650
		if (!tmp)
			goto out;
		q = tmp;

651
		memcpy(q + count * desc_size, md, desc_size);
652 653 654
		count++;
	}

655
	efi_runtime_map_setup(q, count, desc_size);
656
	return;
657 658 659

out:
	kfree(q);
660 661
	pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
#endif
662 663
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
static void *realloc_pages(void *old_memmap, int old_shift)
{
	void *ret;

	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
	if (!ret)
		goto out;

	/*
	 * A first-time allocation doesn't have anything to copy.
	 */
	if (!old_memmap)
		return ret;

	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);

out:
	free_pages((unsigned long)old_memmap, old_shift);
	return ret;
}

685 686 687 688 689 690 691 692 693 694
/*
 * Iterate the EFI memory map in reverse order because the regions
 * will be mapped top-down. The end result is the same as if we had
 * mapped things forward, but doesn't require us to change the
 * existing implementation of efi_map_region().
 */
static inline void *efi_map_next_entry_reverse(void *entry)
{
	/* Initial call */
	if (!entry)
695
		return efi.memmap.map_end - efi.memmap.desc_size;
696

697 698
	entry -= efi.memmap.desc_size;
	if (entry < efi.memmap.map)
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
		return NULL;

	return entry;
}

/*
 * efi_map_next_entry - Return the next EFI memory map descriptor
 * @entry: Previous EFI memory map descriptor
 *
 * This is a helper function to iterate over the EFI memory map, which
 * we do in different orders depending on the current configuration.
 *
 * To begin traversing the memory map @entry must be %NULL.
 *
 * Returns %NULL when we reach the end of the memory map.
 */
static void *efi_map_next_entry(void *entry)
{
	if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
		/*
		 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
		 * config table feature requires us to map all entries
		 * in the same order as they appear in the EFI memory
		 * map. That is to say, entry N must have a lower
		 * virtual address than entry N+1. This is because the
		 * firmware toolchain leaves relative references in
		 * the code/data sections, which are split and become
		 * separate EFI memory regions. Mapping things
		 * out-of-order leads to the firmware accessing
		 * unmapped addresses.
		 *
		 * Since we need to map things this way whether or not
		 * the kernel actually makes use of
		 * EFI_PROPERTIES_TABLE, let's just switch to this
		 * scheme by default for 64-bit.
		 */
		return efi_map_next_entry_reverse(entry);
	}

	/* Initial call */
	if (!entry)
740
		return efi.memmap.map;
741

742 743
	entry += efi.memmap.desc_size;
	if (entry >= efi.memmap.map_end)
744 745 746 747 748
		return NULL;

	return entry;
}

749
/*
750 751
 * Map the efi memory ranges of the runtime services and update new_mmap with
 * virtual addresses.
752
 */
753
static void * __init efi_map_regions(int *count, int *pg_shift)
754
{
755 756
	void *p, *new_memmap = NULL;
	unsigned long left = 0;
757
	unsigned long desc_size;
758
	efi_memory_desc_t *md;
759

760 761
	desc_size = efi.memmap.desc_size;

762 763
	p = NULL;
	while ((p = efi_map_next_entry(p))) {
H
Huang, Ying 已提交
764
		md = p;
765 766 767 768 769 770 771
		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
#ifdef CONFIG_X86_64
			if (md->type != EFI_BOOT_SERVICES_CODE &&
			    md->type != EFI_BOOT_SERVICES_DATA)
#endif
				continue;
		}
H
Huang, Ying 已提交
772

773
		efi_map_region(md);
774 775
		get_systab_virt_addr(md);

776
		if (left < desc_size) {
777 778 779 780 781 782 783 784
			new_memmap = realloc_pages(new_memmap, *pg_shift);
			if (!new_memmap)
				return NULL;

			left += PAGE_SIZE << *pg_shift;
			(*pg_shift)++;
		}

785
		memcpy(new_memmap + (*count * desc_size), md, desc_size);
786

787
		left -= desc_size;
788 789
		(*count)++;
	}
790

791 792 793
	return new_memmap;
}

794 795
static void __init kexec_enter_virtual_mode(void)
{
796
#ifdef CONFIG_KEXEC_CORE
797
	efi_memory_desc_t *md;
798
	unsigned int num_pages;
799 800 801 802 803 804 805 806 807

	efi.systab = NULL;

	/*
	 * We don't do virtual mode, since we don't do runtime services, on
	 * non-native EFI
	 */
	if (!efi_is_native()) {
		efi_unmap_memmap();
808
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
809 810 811
		return;
	}

812 813 814 815 816 817
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

818 819 820 821
	/*
	* Map efi regions which were passed via setup_data. The virt_addr is a
	* fixed addr which was used in first kernel of a kexec boot.
	*/
822
	for_each_efi_memory_desc(md) {
823 824 825 826 827 828 829 830
		efi_map_region_fixed(md); /* FIXME: add error handling */
		get_systab_virt_addr(md);
	}

	save_runtime_map();

	BUG_ON(!efi.systab);

831
	num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
832 833
	num_pages >>= PAGE_SHIFT;

834
	if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
835 836 837 838
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

839 840 841 842 843 844 845 846 847
	efi_sync_low_kernel_mappings();

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
	efi.runtime_version = efi_systab.hdr.revision;
848

849
	efi_native_runtime_setup();
850

851 852 853 854 855 856
	efi.set_virtual_address_map = NULL;

	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
		runtime_code_page_mkexec();

	/* clean DUMMY object */
857
	efi_delete_dummy_variable();
858 859 860
#endif
}

861 862 863 864
/*
 * This function will switch the EFI runtime services to virtual mode.
 * Essentially, we look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor into the
865
 * efi_pgd page table.
866 867 868 869 870 871 872 873 874
 *
 * The old method which used to update that memory descriptor with the
 * virtual address obtained from ioremap() is still supported when the
 * kernel is booted with efi=old_map on its command line. Same old
 * method enabled the runtime services to be called without having to
 * thunk back into physical mode for every invocation.
 *
 * The new method does a pagetable switch in a preemption-safe manner
 * so that we're in a different address space when calling a runtime
875 876
 * function. For function arguments passing we do copy the PUDs of the
 * kernel page table into efi_pgd prior to each call.
877 878 879
 *
 * Specially for kexec boot, efi runtime maps in previous kernel should
 * be passed in via setup_data. In that case runtime ranges will be mapped
880 881
 * to the same virtual addresses as the first kernel, see
 * kexec_enter_virtual_mode().
882
 */
883
static void __init __efi_enter_virtual_mode(void)
884
{
885
	int count = 0, pg_shift = 0;
886
	void *new_memmap = NULL;
887
	efi_status_t status;
H
Huang, Ying 已提交
888

889
	efi.systab = NULL;
890

891 892 893 894 895 896
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

897 898 899 900
	efi_merge_regions();
	new_memmap = efi_map_regions(&count, &pg_shift);
	if (!new_memmap) {
		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
901
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
902
		return;
903
	}
904

905 906
	save_runtime_map();

H
Huang, Ying 已提交
907 908
	BUG_ON(!efi.systab);

909 910
	if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
911
		return;
912
	}
913

914 915
	efi_sync_low_kernel_mappings();

916 917
	if (efi_is_native()) {
		status = phys_efi_set_virtual_address_map(
918 919 920
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
921 922 923 924
				(efi_memory_desc_t *)__pa(new_memmap));
	} else {
		status = efi_thunk_set_virtual_address_map(
				efi_phys.set_virtual_address_map,
925 926 927
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
928 929
				(efi_memory_desc_t *)__pa(new_memmap));
	}
930

931 932 933 934
	if (status != EFI_SUCCESS) {
		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
			 status);
		panic("EFI call to SetVirtualAddressMap() failed!");
H
Huang, Ying 已提交
935 936 937 938 939 940 941 942
	}

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
943
	efi.runtime_version = efi_systab.hdr.revision;
944 945

	if (efi_is_native())
946
		efi_native_runtime_setup();
947 948 949
	else
		efi_thunk_runtime_setup();

950
	efi.set_virtual_address_map = NULL;
951

952 953 954 955 956 957 958
	/*
	 * Apply more restrictive page table mapping attributes now that
	 * SVAM() has been called and the firmware has performed all
	 * necessary relocation fixups for the new virtual addresses.
	 */
	efi_runtime_update_mappings();
	efi_dump_pagetable();
959

960
	/*
961 962 963 964 965
	 * We mapped the descriptor array into the EFI pagetable above
	 * but we're not unmapping it here because if we're running in
	 * EFI mixed mode we need all of memory to be accessible when
	 * we pass parameters to the EFI runtime services in the
	 * thunking code.
966 967 968
	 *
	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
	 */
969
	free_pages((unsigned long)new_memmap, pg_shift);
M
Matthew Garrett 已提交
970 971

	/* clean DUMMY object */
972
	efi_delete_dummy_variable();
H
Huang, Ying 已提交
973 974
}

975 976
void __init efi_enter_virtual_mode(void)
{
D
Daniel Kiper 已提交
977 978 979
	if (efi_enabled(EFI_PARAVIRT))
		return;

980 981 982 983 984 985
	if (efi_setup)
		kexec_enter_virtual_mode();
	else
		__efi_enter_virtual_mode();
}

H
Huang, Ying 已提交
986 987 988 989 990 991 992
/*
 * Convenience functions to obtain memory types and attributes
 */
u32 efi_mem_type(unsigned long phys_addr)
{
	efi_memory_desc_t *md;

993 994 995
	if (!efi_enabled(EFI_MEMMAP))
		return 0;

996
	for_each_efi_memory_desc(md) {
H
Huang, Ying 已提交
997 998 999 1000 1001 1002 1003 1004
		if ((md->phys_addr <= phys_addr) &&
		    (phys_addr < (md->phys_addr +
				  (md->num_pages << EFI_PAGE_SHIFT))))
			return md->type;
	}
	return 0;
}

D
Dave Young 已提交
1005
static int __init arch_parse_efi_cmdline(char *str)
1006
{
1007 1008 1009 1010 1011
	if (!str) {
		pr_warn("need at least one option\n");
		return -EINVAL;
	}

1012 1013
	if (parse_option_str(str, "old_map"))
		set_bit(EFI_OLD_MEMMAP, &efi.flags);
1014 1015 1016

	return 0;
}
D
Dave Young 已提交
1017
early_param("efi", arch_parse_efi_cmdline);