efi.c 23.1 KB
Newer Older
H
Huang, Ying 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * Common EFI (Extensible Firmware Interface) support functions
 * Based on Extensible Firmware Interface Specification version 1.0
 *
 * Copyright (C) 1999 VA Linux Systems
 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
 * Copyright (C) 1999-2002 Hewlett-Packard Co.
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 *	Stephane Eranian <eranian@hpl.hp.com>
 * Copyright (C) 2005-2008 Intel Co.
 *	Fenghua Yu <fenghua.yu@intel.com>
 *	Bibo Mao <bibo.mao@intel.com>
 *	Chandramouli Narayanan <mouli@linux.intel.com>
 *	Huang Ying <ying.huang@intel.com>
15 16
 * Copyright (C) 2013 SuSE Labs
 *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
H
Huang, Ying 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Copied from efi_32.c to eliminate the duplicated code between EFI
 * 32/64 support code. --ying 2007-10-26
 *
 * All EFI Runtime Services are not implemented yet as EFI only
 * supports physical mode addressing on SoftSDV. This is to be fixed
 * in a future version.  --drummond 1999-07-20
 *
 * Implemented EFI runtime services and virtual mode calls.  --davidm
 *
 * Goutham Rao: <goutham.rao@intel.com>
 *	Skip non-WB memory and ignore empty memory ranges.
 */

31 32
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

H
Huang, Ying 已提交
33 34 35
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
36
#include <linux/efi-bgrt.h>
37
#include <linux/export.h>
H
Huang, Ying 已提交
38
#include <linux/bootmem.h>
39
#include <linux/slab.h>
40
#include <linux/memblock.h>
H
Huang, Ying 已提交
41 42 43 44 45 46 47 48 49 50
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/time.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>

#include <asm/setup.h>
#include <asm/efi.h>
#include <asm/time.h>
51 52
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
53
#include <asm/x86_init.h>
54
#include <asm/rtc.h>
B
Borislav Petkov 已提交
55
#include <asm/uv/uv.h>
H
Huang, Ying 已提交
56

B
Borislav Petkov 已提交
57
#define EFI_DEBUG
H
Huang, Ying 已提交
58 59 60

struct efi_memory_map memmap;

H
Harvey Harrison 已提交
61
static struct efi efi_phys __initdata;
H
Huang, Ying 已提交
62 63
static efi_system_table_t efi_systab __initdata;

J
Joe Perches 已提交
64
static efi_config_table_type_t arch_tables[] __initdata = {
65 66 67
#ifdef CONFIG_X86_UV
	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
#endif
68
	{NULL_GUID, NULL, NULL},
69 70
};

71
u64 efi_setup;		/* efi setup_data physical address */
72

73 74 75 76 77 78 79 80 81 82
int add_efi_memmap;
EXPORT_SYMBOL(add_efi_memmap);

static int __init setup_add_efi_memmap(char *arg)
{
	add_efi_memmap = 1;
	return 0;
}
early_param("add_efi_memmap", setup_add_efi_memmap);

H
Huang, Ying 已提交
83 84 85 86 87 88 89 90 91
static efi_status_t __init phys_efi_set_virtual_address_map(
	unsigned long memory_map_size,
	unsigned long descriptor_size,
	u32 descriptor_version,
	efi_memory_desc_t *virtual_map)
{
	efi_status_t status;

	efi_call_phys_prelog();
92 93 94
	status = efi_call_phys(efi_phys.set_virtual_address_map,
			       memory_map_size, descriptor_size,
			       descriptor_version, virtual_map);
H
Huang, Ying 已提交
95 96 97 98
	efi_call_phys_epilog();
	return status;
}

99
void efi_get_time(struct timespec *now)
H
Huang, Ying 已提交
100 101 102 103 104 105 106
{
	efi_status_t status;
	efi_time_t eft;
	efi_time_cap_t cap;

	status = efi.get_time(&eft, &cap);
	if (status != EFI_SUCCESS)
107
		pr_err("Oops: efitime: can't read time!\n");
H
Huang, Ying 已提交
108

109 110 111
	now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
			     eft.minute, eft.second);
	now->tv_nsec = 0;
H
Huang, Ying 已提交
112 113
}

114 115 116 117 118 119
/*
 * Tell the kernel about the EFI memory map.  This might include
 * more than the max 128 entries that can fit in the e820 legacy
 * (zeropage) memory map.
 */

120
static void __init do_add_efi_memmap(void)
121 122 123 124 125 126 127 128 129
{
	void *p;

	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		efi_memory_desc_t *md = p;
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
		int e820_type;

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
		switch (md->type) {
		case EFI_LOADER_CODE:
		case EFI_LOADER_DATA:
		case EFI_BOOT_SERVICES_CODE:
		case EFI_BOOT_SERVICES_DATA:
		case EFI_CONVENTIONAL_MEMORY:
			if (md->attribute & EFI_MEMORY_WB)
				e820_type = E820_RAM;
			else
				e820_type = E820_RESERVED;
			break;
		case EFI_ACPI_RECLAIM_MEMORY:
			e820_type = E820_ACPI;
			break;
		case EFI_ACPI_MEMORY_NVS:
			e820_type = E820_NVS;
			break;
		case EFI_UNUSABLE_MEMORY:
			e820_type = E820_UNUSABLE;
			break;
		default:
			/*
			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
			 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
			 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
			 */
156
			e820_type = E820_RESERVED;
157 158
			break;
		}
159
		e820_add_region(start, size, e820_type);
160 161 162 163
	}
	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}

164
int __init efi_memblock_x86_reserve_range(void)
165
{
166
	struct efi_info *e = &boot_params.efi_info;
167 168
	unsigned long pmap;

D
Daniel Kiper 已提交
169 170 171
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

172
#ifdef CONFIG_X86_32
173
	/* Can't handle data above 4GB at this time */
174
	if (e->efi_memmap_hi) {
175 176 177
		pr_err("Memory map is above 4GB, disabling EFI.\n");
		return -EINVAL;
	}
178
	pmap =  e->efi_memmap;
179
#else
180
	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
181
#endif
182 183 184 185 186 187
	memmap.phys_map		= (void *)pmap;
	memmap.nr_map		= e->efi_memmap_size /
				  e->efi_memdesc_size;
	memmap.desc_size	= e->efi_memdesc_size;
	memmap.desc_version	= e->efi_memdesc_version;

188
	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
189

190 191
	efi.memmap = &memmap;

192
	return 0;
193 194
}

H
Huang, Ying 已提交
195 196
static void __init print_efi_memmap(void)
{
B
Borislav Petkov 已提交
197
#ifdef EFI_DEBUG
H
Huang, Ying 已提交
198 199 200 201 202 203 204 205
	efi_memory_desc_t *md;
	void *p;
	int i;

	for (p = memmap.map, i = 0;
	     p < memmap.map_end;
	     p += memmap.desc_size, i++) {
		md = p;
J
Joe Perches 已提交
206
		pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n",
H
Huang, Ying 已提交
207 208 209 210 211
			i, md->type, md->attribute, md->phys_addr,
			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
	}
#endif  /*  EFI_DEBUG  */
B
Borislav Petkov 已提交
212
}
H
Huang, Ying 已提交
213

214
void __init efi_unmap_memmap(void)
215
{
216
	clear_bit(EFI_MEMMAP, &efi.flags);
217
	if (memmap.map) {
218
		early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
219 220 221 222
		memmap.map = NULL;
	}
}

223
static int __init efi_systab_init(void *phys)
H
Huang, Ying 已提交
224
{
225
	if (efi_enabled(EFI_64BIT)) {
226
		efi_system_table_64_t *systab64;
227
		struct efi_setup_data *data = NULL;
228 229
		u64 tmp = 0;

230 231 232 233 234
		if (efi_setup) {
			data = early_memremap(efi_setup, sizeof(*data));
			if (!data)
				return -ENOMEM;
		}
235
		systab64 = early_memremap((unsigned long)phys,
236 237 238
					 sizeof(*systab64));
		if (systab64 == NULL) {
			pr_err("Couldn't map the system table!\n");
239
			if (data)
240
				early_memunmap(data, sizeof(*data));
241 242 243 244
			return -ENOMEM;
		}

		efi_systab.hdr = systab64->hdr;
245 246 247
		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
					      systab64->fw_vendor;
		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
248 249 250 251 252 253 254 255 256 257 258 259 260
		efi_systab.fw_revision = systab64->fw_revision;
		efi_systab.con_in_handle = systab64->con_in_handle;
		tmp |= systab64->con_in_handle;
		efi_systab.con_in = systab64->con_in;
		tmp |= systab64->con_in;
		efi_systab.con_out_handle = systab64->con_out_handle;
		tmp |= systab64->con_out_handle;
		efi_systab.con_out = systab64->con_out;
		tmp |= systab64->con_out;
		efi_systab.stderr_handle = systab64->stderr_handle;
		tmp |= systab64->stderr_handle;
		efi_systab.stderr = systab64->stderr;
		tmp |= systab64->stderr;
261 262 263 264
		efi_systab.runtime = data ?
				     (void *)(unsigned long)data->runtime :
				     (void *)(unsigned long)systab64->runtime;
		tmp |= data ? data->runtime : systab64->runtime;
265 266 267
		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
		tmp |= systab64->boottime;
		efi_systab.nr_tables = systab64->nr_tables;
268 269 270
		efi_systab.tables = data ? (unsigned long)data->tables :
					   systab64->tables;
		tmp |= data ? data->tables : systab64->tables;
271

272
		early_memunmap(systab64, sizeof(*systab64));
273
		if (data)
274
			early_memunmap(data, sizeof(*data));
275 276 277 278 279 280 281 282 283
#ifdef CONFIG_X86_32
		if (tmp >> 32) {
			pr_err("EFI data located above 4GB, disabling EFI.\n");
			return -EINVAL;
		}
#endif
	} else {
		efi_system_table_32_t *systab32;

284
		systab32 = early_memremap((unsigned long)phys,
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
					 sizeof(*systab32));
		if (systab32 == NULL) {
			pr_err("Couldn't map the system table!\n");
			return -ENOMEM;
		}

		efi_systab.hdr = systab32->hdr;
		efi_systab.fw_vendor = systab32->fw_vendor;
		efi_systab.fw_revision = systab32->fw_revision;
		efi_systab.con_in_handle = systab32->con_in_handle;
		efi_systab.con_in = systab32->con_in;
		efi_systab.con_out_handle = systab32->con_out_handle;
		efi_systab.con_out = systab32->con_out;
		efi_systab.stderr_handle = systab32->stderr_handle;
		efi_systab.stderr = systab32->stderr;
		efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
		efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
		efi_systab.nr_tables = systab32->nr_tables;
		efi_systab.tables = systab32->tables;

305
		early_memunmap(systab32, sizeof(*systab32));
306
	}
307

H
Huang, Ying 已提交
308 309 310 311 312
	efi.systab = &efi_systab;

	/*
	 * Verify the EFI Table
	 */
313
	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
314
		pr_err("System table signature incorrect!\n");
315 316
		return -EINVAL;
	}
H
Huang, Ying 已提交
317
	if ((efi.systab->hdr.revision >> 16) == 0)
J
Joe Perches 已提交
318
		pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
H
Huang, Ying 已提交
319 320
		       efi.systab->hdr.revision >> 16,
		       efi.systab->hdr.revision & 0xffff);
321

322 323
	set_bit(EFI_SYSTEM_TABLES, &efi.flags);

324
	return 0;
325
}
H
Huang, Ying 已提交
326

327
static int __init efi_runtime_init32(void)
328
{
329 330
	efi_runtime_services_32_t *runtime;

331
	runtime = early_memremap((unsigned long)efi.systab->runtime,
332 333 334 335 336
			sizeof(efi_runtime_services_32_t));
	if (!runtime) {
		pr_err("Could not map the runtime service table!\n");
		return -ENOMEM;
	}
H
Huang, Ying 已提交
337 338

	/*
339 340 341
	 * We will only need *early* access to the following two
	 * EFI runtime services before set_virtual_address_map
	 * is invoked.
H
Huang, Ying 已提交
342
	 */
343 344 345
	efi_phys.set_virtual_address_map =
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
346
	early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
347 348 349 350 351 352 353 354

	return 0;
}

static int __init efi_runtime_init64(void)
{
	efi_runtime_services_64_t *runtime;

355
	runtime = early_memremap((unsigned long)efi.systab->runtime,
356
			sizeof(efi_runtime_services_64_t));
357
	if (!runtime) {
358
		pr_err("Could not map the runtime service table!\n");
359 360
		return -ENOMEM;
	}
361

362
	/*
363 364
	 * We will only need *early* access to the following two
	 * EFI runtime services before set_virtual_address_map
365 366 367
	 * is invoked.
	 */
	efi_phys.set_virtual_address_map =
368 369
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
370
	early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
371 372 373 374 375 376 377 378 379 380 381 382 383

	return 0;
}

static int __init efi_runtime_init(void)
{
	int rv;

	/*
	 * Check out the runtime services table. We need to map
	 * the runtime services table so that we can grab the physical
	 * address of several of the EFI runtime functions, needed to
	 * set the firmware into virtual mode.
D
Daniel Kiper 已提交
384 385 386 387 388 389 390
	 *
	 * When EFI_PARAVIRT is in force then we could not map runtime
	 * service memory region because we do not have direct access to it.
	 * However, runtime services are available through proxy functions
	 * (e.g. in case of Xen dom0 EFI implementation they call special
	 * hypercall which executes relevant EFI functions) and that is why
	 * they are always enabled.
391 392
	 */

D
Daniel Kiper 已提交
393 394 395 396 397 398 399 400 401
	if (!efi_enabled(EFI_PARAVIRT)) {
		if (efi_enabled(EFI_64BIT))
			rv = efi_runtime_init64();
		else
			rv = efi_runtime_init32();

		if (rv)
			return rv;
	}
402

403 404
	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);

405
	return 0;
406
}
H
Huang, Ying 已提交
407

408
static int __init efi_memmap_init(void)
409
{
D
Daniel Kiper 已提交
410 411 412
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

H
Huang, Ying 已提交
413
	/* Map the EFI memory map */
414
	memmap.map = early_memremap((unsigned long)memmap.phys_map,
415
				   memmap.nr_map * memmap.desc_size);
416
	if (memmap.map == NULL) {
417
		pr_err("Could not map the memory map!\n");
418 419
		return -ENOMEM;
	}
H
Huang, Ying 已提交
420
	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
R
Russ Anderson 已提交
421

422 423
	if (add_efi_memmap)
		do_add_efi_memmap();
424

425 426
	set_bit(EFI_MEMMAP, &efi.flags);

427
	return 0;
428 429 430 431 432 433 434 435 436 437
}

void __init efi_init(void)
{
	efi_char16_t *c16;
	char vendor[100] = "unknown";
	int i = 0;
	void *tmp;

#ifdef CONFIG_X86_32
438 439 440 441 442
	if (boot_params.efi_info.efi_systab_hi ||
	    boot_params.efi_info.efi_memmap_hi) {
		pr_info("Table located above 4GB, disabling EFI.\n");
		return;
	}
443 444 445
	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
#else
	efi_phys.systab = (efi_system_table_t *)
446 447
			  (boot_params.efi_info.efi_systab |
			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
448 449
#endif

450
	if (efi_systab_init(efi_phys.systab))
451
		return;
452

453 454 455 456
	efi.config_table = (unsigned long)efi.systab->tables;
	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
	efi.runtime	 = (unsigned long)efi.systab->runtime;

457 458 459
	/*
	 * Show what we know for posterity
	 */
460
	c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
461 462 463 464 465
	if (c16) {
		for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
			vendor[i] = *c16++;
		vendor[i] = '\0';
	} else
466
		pr_err("Could not map the firmware vendor!\n");
467
	early_memunmap(tmp, 2);
468

469 470 471
	pr_info("EFI v%u.%.02u by %s\n",
		efi.systab->hdr.revision >> 16,
		efi.systab->hdr.revision & 0xffff, vendor);
472

473 474 475
	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
		return;

476
	if (efi_config_init(arch_tables))
477
		return;
478

479 480 481 482 483
	/*
	 * Note: We currently don't support runtime services on an EFI
	 * that doesn't match the kernel 32/64-bit mode.
	 */

M
Matt Fleming 已提交
484
	if (!efi_runtime_supported())
485
		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
486
	else {
487
		if (efi_runtime_disabled() || efi_runtime_init())
488
			return;
489
	}
490
	if (efi_memmap_init())
491
		return;
492

H
Huang, Ying 已提交
493 494 495
	print_efi_memmap();
}

496 497 498 499 500
void __init efi_late_init(void)
{
	efi_bgrt_init();
}

501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
	u64 addr, npages;

	addr = md->virt_addr;
	npages = md->num_pages;

	memrange_efi_to_native(&addr, &npages);

	if (executable)
		set_memory_x(addr, npages);
	else
		set_memory_nx(addr, npages);
}

B
Borislav Petkov 已提交
516
void __init runtime_code_page_mkexec(void)
517 518 519 520 521 522 523
{
	efi_memory_desc_t *md;
	void *p;

	/* Make EFI runtime service code area executable */
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;
H
Huang, Ying 已提交
524 525 526 527

		if (md->type != EFI_RUNTIME_SERVICES_CODE)
			continue;

528
		efi_set_executable(md, true);
529 530 531
	}
}

532 533 534 535 536 537 538 539 540 541
void efi_memory_uc(u64 addr, unsigned long size)
{
	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
	u64 npages;

	npages = round_up(size, page_shift) / page_shift;
	memrange_efi_to_native(&addr, &npages);
	set_memory_uc(addr, npages);
}

542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
void __init old_map_region(efi_memory_desc_t *md)
{
	u64 start_pfn, end_pfn, end;
	unsigned long size;
	void *va;

	start_pfn = PFN_DOWN(md->phys_addr);
	size	  = md->num_pages << PAGE_SHIFT;
	end	  = md->phys_addr + size;
	end_pfn   = PFN_UP(end);

	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
		va = __va(md->phys_addr);

		if (!(md->attribute & EFI_MEMORY_WB))
			efi_memory_uc((u64)(unsigned long)va, size);
	} else
		va = efi_ioremap(md->phys_addr, size,
				 md->type, md->attribute);

	md->virt_addr = (u64) (unsigned long) va;
	if (!va)
		pr_err("ioremap of 0x%llX failed!\n",
		       (unsigned long long)md->phys_addr);
}

568 569
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
H
Huang, Ying 已提交
570
{
571
	void *p;
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
	efi_memory_desc_t *md, *prev_md = NULL;

	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		u64 prev_size;
		md = p;

		if (!prev_md) {
			prev_md = md;
			continue;
		}

		if (prev_md->type != md->type ||
		    prev_md->attribute != md->attribute) {
			prev_md = md;
			continue;
		}

		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;

		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
			prev_md->num_pages += md->num_pages;
			md->type = EFI_RESERVED_TYPE;
			md->attribute = 0;
			continue;
		}
		prev_md = md;
598 599 600 601 602 603 604
	}
}

static void __init get_systab_virt_addr(efi_memory_desc_t *md)
{
	unsigned long size;
	u64 end, systab;
605

606 607 608 609 610 611
	size = md->num_pages << EFI_PAGE_SHIFT;
	end = md->phys_addr + size;
	systab = (u64)(unsigned long)efi_phys.systab;
	if (md->phys_addr <= systab && systab < end) {
		systab += md->virt_addr - md->phys_addr;
		efi.systab = (efi_system_table_t *)(unsigned long)systab;
612
	}
613 614
}

615
static void __init save_runtime_map(void)
616
{
617
#ifdef CONFIG_KEXEC
618 619 620 621
	efi_memory_desc_t *md;
	void *tmp, *p, *q = NULL;
	int count = 0;

622 623 624
	if (efi_enabled(EFI_OLD_MEMMAP))
		return;

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;

		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
		    (md->type == EFI_BOOT_SERVICES_CODE) ||
		    (md->type == EFI_BOOT_SERVICES_DATA))
			continue;
		tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
		if (!tmp)
			goto out;
		q = tmp;

		memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
		count++;
	}

641
	efi_runtime_map_setup(q, count, memmap.desc_size);
642
	return;
643 644 645

out:
	kfree(q);
646 647
	pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
#endif
648 649
}

650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
static void *realloc_pages(void *old_memmap, int old_shift)
{
	void *ret;

	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
	if (!ret)
		goto out;

	/*
	 * A first-time allocation doesn't have anything to copy.
	 */
	if (!old_memmap)
		return ret;

	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);

out:
	free_pages((unsigned long)old_memmap, old_shift);
	return ret;
}

671
/*
672 673
 * Map the efi memory ranges of the runtime services and update new_mmap with
 * virtual addresses.
674
 */
675
static void * __init efi_map_regions(int *count, int *pg_shift)
676
{
677 678
	void *p, *new_memmap = NULL;
	unsigned long left = 0;
679
	efi_memory_desc_t *md;
680

H
Huang, Ying 已提交
681 682
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;
683 684 685 686 687 688 689
		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
#ifdef CONFIG_X86_64
			if (md->type != EFI_BOOT_SERVICES_CODE &&
			    md->type != EFI_BOOT_SERVICES_DATA)
#endif
				continue;
		}
H
Huang, Ying 已提交
690

691
		efi_map_region(md);
692 693
		get_systab_virt_addr(md);

694 695 696 697 698 699 700 701 702
		if (left < memmap.desc_size) {
			new_memmap = realloc_pages(new_memmap, *pg_shift);
			if (!new_memmap)
				return NULL;

			left += PAGE_SIZE << *pg_shift;
			(*pg_shift)++;
		}

703 704
		memcpy(new_memmap + (*count * memmap.desc_size), md,
		       memmap.desc_size);
705 706

		left -= memmap.desc_size;
707 708
		(*count)++;
	}
709

710 711 712
	return new_memmap;
}

713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
static void __init kexec_enter_virtual_mode(void)
{
#ifdef CONFIG_KEXEC
	efi_memory_desc_t *md;
	void *p;

	efi.systab = NULL;

	/*
	 * We don't do virtual mode, since we don't do runtime services, on
	 * non-native EFI
	 */
	if (!efi_is_native()) {
		efi_unmap_memmap();
		return;
	}

	/*
	* Map efi regions which were passed via setup_data. The virt_addr is a
	* fixed addr which was used in first kernel of a kexec boot.
	*/
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;
		efi_map_region_fixed(md); /* FIXME: add error handling */
		get_systab_virt_addr(md);
	}

	save_runtime_map();

	BUG_ON(!efi.systab);

	efi_sync_low_kernel_mappings();

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
	efi.runtime_version = efi_systab.hdr.revision;
753

754
	efi_native_runtime_setup();
755

756 757 758 759 760 761
	efi.set_virtual_address_map = NULL;

	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
		runtime_code_page_mkexec();

	/* clean DUMMY object */
762
	efi_delete_dummy_variable();
763 764 765
#endif
}

766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
/*
 * This function will switch the EFI runtime services to virtual mode.
 * Essentially, we look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor into the
 * ->trampoline_pgd page table using a top-down VA allocation scheme.
 *
 * The old method which used to update that memory descriptor with the
 * virtual address obtained from ioremap() is still supported when the
 * kernel is booted with efi=old_map on its command line. Same old
 * method enabled the runtime services to be called without having to
 * thunk back into physical mode for every invocation.
 *
 * The new method does a pagetable switch in a preemption-safe manner
 * so that we're in a different address space when calling a runtime
 * function. For function arguments passing we do copy the PGDs of the
 * kernel page table into ->trampoline_pgd prior to each call.
782 783 784
 *
 * Specially for kexec boot, efi runtime maps in previous kernel should
 * be passed in via setup_data. In that case runtime ranges will be mapped
785 786
 * to the same virtual addresses as the first kernel, see
 * kexec_enter_virtual_mode().
787
 */
788
static void __init __efi_enter_virtual_mode(void)
789
{
790
	int count = 0, pg_shift = 0;
791
	void *new_memmap = NULL;
792
	efi_status_t status;
H
Huang, Ying 已提交
793

794
	efi.systab = NULL;
795

796 797 798 799 800
	efi_merge_regions();
	new_memmap = efi_map_regions(&count, &pg_shift);
	if (!new_memmap) {
		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
		return;
801
	}
802

803 804
	save_runtime_map();

H
Huang, Ying 已提交
805 806
	BUG_ON(!efi.systab);

807 808
	if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
		return;
809

810
	efi_sync_low_kernel_mappings();
811
	efi_dump_pagetable();
812

813 814 815 816 817 818 819 820 821 822 823 824 825 826
	if (efi_is_native()) {
		status = phys_efi_set_virtual_address_map(
				memmap.desc_size * count,
				memmap.desc_size,
				memmap.desc_version,
				(efi_memory_desc_t *)__pa(new_memmap));
	} else {
		status = efi_thunk_set_virtual_address_map(
				efi_phys.set_virtual_address_map,
				memmap.desc_size * count,
				memmap.desc_size,
				memmap.desc_version,
				(efi_memory_desc_t *)__pa(new_memmap));
	}
827

828 829 830 831
	if (status != EFI_SUCCESS) {
		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
			 status);
		panic("EFI call to SetVirtualAddressMap() failed!");
H
Huang, Ying 已提交
832 833 834 835 836 837 838 839
	}

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
840
	efi.runtime_version = efi_systab.hdr.revision;
841 842

	if (efi_is_native())
843
		efi_native_runtime_setup();
844 845 846
	else
		efi_thunk_runtime_setup();

847
	efi.set_virtual_address_map = NULL;
848

B
Borislav Petkov 已提交
849
	efi_runtime_mkexec();
850

851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
	/*
	 * We mapped the descriptor array into the EFI pagetable above but we're
	 * not unmapping it here. Here's why:
	 *
	 * We're copying select PGDs from the kernel page table to the EFI page
	 * table and when we do so and make changes to those PGDs like unmapping
	 * stuff from them, those changes appear in the kernel page table and we
	 * go boom.
	 *
	 * From setup_real_mode():
	 *
	 * ...
	 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
	 *
	 * In this particular case, our allocation is in PGD 0 of the EFI page
	 * table but we've copied that PGD from PGD[272] of the EFI page table:
	 *
	 *	pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
	 *
	 * where the direct memory mapping in kernel space is.
	 *
	 * new_memmap's VA comes from that direct mapping and thus clearing it,
	 * it would get cleared in the kernel page table too.
	 *
	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
	 */
877
	free_pages((unsigned long)new_memmap, pg_shift);
M
Matthew Garrett 已提交
878 879

	/* clean DUMMY object */
880
	efi_delete_dummy_variable();
H
Huang, Ying 已提交
881 882
}

883 884
void __init efi_enter_virtual_mode(void)
{
D
Daniel Kiper 已提交
885 886 887
	if (efi_enabled(EFI_PARAVIRT))
		return;

888 889 890 891 892 893
	if (efi_setup)
		kexec_enter_virtual_mode();
	else
		__efi_enter_virtual_mode();
}

H
Huang, Ying 已提交
894 895 896 897 898 899 900 901
/*
 * Convenience functions to obtain memory types and attributes
 */
u32 efi_mem_type(unsigned long phys_addr)
{
	efi_memory_desc_t *md;
	void *p;

902 903 904
	if (!efi_enabled(EFI_MEMMAP))
		return 0;

H
Huang, Ying 已提交
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;
		if ((md->phys_addr <= phys_addr) &&
		    (phys_addr < (md->phys_addr +
				  (md->num_pages << EFI_PAGE_SHIFT))))
			return md->type;
	}
	return 0;
}

u64 efi_mem_attributes(unsigned long phys_addr)
{
	efi_memory_desc_t *md;
	void *p;

920 921 922
	if (!efi_enabled(EFI_MEMMAP))
		return 0;

H
Huang, Ying 已提交
923 924 925 926 927 928 929 930 931
	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
		md = p;
		if ((md->phys_addr <= phys_addr) &&
		    (phys_addr < (md->phys_addr +
				  (md->num_pages << EFI_PAGE_SHIFT))))
			return md->attribute;
	}
	return 0;
}
932

D
Dave Young 已提交
933
static int __init arch_parse_efi_cmdline(char *str)
934
{
935 936
	if (parse_option_str(str, "old_map"))
		set_bit(EFI_OLD_MEMMAP, &efi.flags);
937 938 939

	return 0;
}
D
Dave Young 已提交
940
early_param("efi", arch_parse_efi_cmdline);