efi.c 27.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
H
Huang, Ying 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Common EFI (Extensible Firmware Interface) support functions
 * Based on Extensible Firmware Interface Specification version 1.0
 *
 * Copyright (C) 1999 VA Linux Systems
 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
 * Copyright (C) 1999-2002 Hewlett-Packard Co.
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 *	Stephane Eranian <eranian@hpl.hp.com>
 * Copyright (C) 2005-2008 Intel Co.
 *	Fenghua Yu <fenghua.yu@intel.com>
 *	Bibo Mao <bibo.mao@intel.com>
 *	Chandramouli Narayanan <mouli@linux.intel.com>
 *	Huang Ying <ying.huang@intel.com>
16 17
 * Copyright (C) 2013 SuSE Labs
 *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
H
Huang, Ying 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31
 *
 * Copied from efi_32.c to eliminate the duplicated code between EFI
 * 32/64 support code. --ying 2007-10-26
 *
 * All EFI Runtime Services are not implemented yet as EFI only
 * supports physical mode addressing on SoftSDV. This is to be fixed
 * in a future version.  --drummond 1999-07-20
 *
 * Implemented EFI runtime services and virtual mode calls.  --davidm
 *
 * Goutham Rao: <goutham.rao@intel.com>
 *	Skip non-WB memory and ignore empty memory ranges.
 */

32 33
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

H
Huang, Ying 已提交
34 35 36
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
37
#include <linux/efi-bgrt.h>
38
#include <linux/export.h>
39
#include <linux/memblock.h>
M
Mike Rapoport 已提交
40
#include <linux/slab.h>
H
Huang, Ying 已提交
41 42 43 44 45 46 47 48 49
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/time.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>

#include <asm/setup.h>
#include <asm/efi.h>
50
#include <asm/e820/api.h>
H
Huang, Ying 已提交
51
#include <asm/time.h>
L
Laura Abbott 已提交
52
#include <asm/set_memory.h>
53
#include <asm/tlbflush.h>
54
#include <asm/x86_init.h>
B
Borislav Petkov 已提交
55
#include <asm/uv/uv.h>
H
Huang, Ying 已提交
56

H
Harvey Harrison 已提交
57
static struct efi efi_phys __initdata;
H
Huang, Ying 已提交
58 59
static efi_system_table_t efi_systab __initdata;

J
Joe Perches 已提交
60
static efi_config_table_type_t arch_tables[] __initdata = {
61
#ifdef CONFIG_X86_UV
62
	{UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
63
#endif
64
	{NULL_GUID, NULL, NULL},
65 66
};

67 68 69 70 71 72 73 74 75
static const unsigned long * const efi_tables[] = {
	&efi.mps,
	&efi.acpi,
	&efi.acpi20,
	&efi.smbios,
	&efi.smbios3,
	&efi.boot_info,
	&efi.hcdp,
	&efi.uga,
76 77 78
#ifdef CONFIG_X86_UV
	&uv_systab_phys,
#endif
79 80 81 82 83 84 85 86
	&efi.fw_vendor,
	&efi.runtime,
	&efi.config_table,
	&efi.esrt,
	&efi.properties_table,
	&efi.mem_attr_table,
};

87
u64 efi_setup;		/* efi setup_data physical address */
88

89
static int add_efi_memmap __initdata;
90 91 92 93 94 95 96
static int __init setup_add_efi_memmap(char *arg)
{
	add_efi_memmap = 1;
	return 0;
}
early_param("add_efi_memmap", setup_add_efi_memmap);

H
Huang, Ying 已提交
97 98 99 100 101 102 103
static efi_status_t __init phys_efi_set_virtual_address_map(
	unsigned long memory_map_size,
	unsigned long descriptor_size,
	u32 descriptor_version,
	efi_memory_desc_t *virtual_map)
{
	efi_status_t status;
104
	unsigned long flags;
105
	pgd_t *save_pgd;
H
Huang, Ying 已提交
106

107
	save_pgd = efi_call_phys_prolog();
108 109
	if (!save_pgd)
		return EFI_ABORTED;
110 111 112

	/* Disable interrupts around EFI calls: */
	local_irq_save(flags);
113 114 115
	status = efi_call_phys(efi_phys.set_virtual_address_map,
			       memory_map_size, descriptor_size,
			       descriptor_version, virtual_map);
116 117
	local_irq_restore(flags);

118
	efi_call_phys_epilog(save_pgd);
119

H
Huang, Ying 已提交
120 121 122
	return status;
}

123 124
void __init efi_find_mirror(void)
{
125
	efi_memory_desc_t *md;
126 127
	u64 mirror_size = 0, total_size = 0;

128
	for_each_efi_memory_desc(md) {
129 130 131 132 133 134 135 136 137 138 139 140 141 142
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;

		total_size += size;
		if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
			memblock_mark_mirror(start, size);
			mirror_size += size;
		}
	}
	if (mirror_size)
		pr_info("Memory: %lldM/%lldM mirrored memory\n",
			mirror_size>>20, total_size>>20);
}

143 144 145 146 147 148
/*
 * Tell the kernel about the EFI memory map.  This might include
 * more than the max 128 entries that can fit in the e820 legacy
 * (zeropage) memory map.
 */

149
static void __init do_add_efi_memmap(void)
150
{
151
	efi_memory_desc_t *md;
152

153
	for_each_efi_memory_desc(md) {
154 155 156 157
		unsigned long long start = md->phys_addr;
		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
		int e820_type;

158 159 160 161 162 163 164
		switch (md->type) {
		case EFI_LOADER_CODE:
		case EFI_LOADER_DATA:
		case EFI_BOOT_SERVICES_CODE:
		case EFI_BOOT_SERVICES_DATA:
		case EFI_CONVENTIONAL_MEMORY:
			if (md->attribute & EFI_MEMORY_WB)
165
				e820_type = E820_TYPE_RAM;
166
			else
167
				e820_type = E820_TYPE_RESERVED;
168 169
			break;
		case EFI_ACPI_RECLAIM_MEMORY:
170
			e820_type = E820_TYPE_ACPI;
171 172
			break;
		case EFI_ACPI_MEMORY_NVS:
173
			e820_type = E820_TYPE_NVS;
174 175
			break;
		case EFI_UNUSABLE_MEMORY:
176
			e820_type = E820_TYPE_UNUSABLE;
177
			break;
178
		case EFI_PERSISTENT_MEMORY:
179
			e820_type = E820_TYPE_PMEM;
180
			break;
181 182 183 184 185 186
		default:
			/*
			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
			 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
			 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
			 */
187
			e820_type = E820_TYPE_RESERVED;
188 189
			break;
		}
190
		e820__range_add(start, size, e820_type);
191
	}
192
	e820__update_table(e820_table);
193 194
}

195
int __init efi_memblock_x86_reserve_range(void)
196
{
197
	struct efi_info *e = &boot_params.efi_info;
198
	struct efi_memory_map_data data;
199
	phys_addr_t pmap;
200
	int rv;
201

D
Daniel Kiper 已提交
202 203 204
	if (efi_enabled(EFI_PARAVIRT))
		return 0;

205
#ifdef CONFIG_X86_32
206
	/* Can't handle data above 4GB at this time */
207
	if (e->efi_memmap_hi) {
208 209 210
		pr_err("Memory map is above 4GB, disabling EFI.\n");
		return -EINVAL;
	}
211
	pmap =  e->efi_memmap;
212
#else
213
	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
214
#endif
215 216 217 218 219 220 221 222 223 224 225
	data.phys_map		= pmap;
	data.size 		= e->efi_memmap_size;
	data.desc_size		= e->efi_memdesc_size;
	data.desc_version	= e->efi_memdesc_version;

	rv = efi_memmap_init_early(&data);
	if (rv)
		return rv;

	if (add_efi_memmap)
		do_add_efi_memmap();
226

227 228 229 230
	WARN(efi.memmap.desc_version != 1,
	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
	     efi.memmap.desc_version);

231
	memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
232

233
	return 0;
234 235
}

236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
#define OVERFLOW_ADDR_SHIFT	(64 - EFI_PAGE_SHIFT)
#define OVERFLOW_ADDR_MASK	(U64_MAX << OVERFLOW_ADDR_SHIFT)
#define U64_HIGH_BIT		(~(U64_MAX >> 1))

static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i)
{
	u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1;
	u64 end_hi = 0;
	char buf[64];

	if (md->num_pages == 0) {
		end = 0;
	} else if (md->num_pages > EFI_PAGES_MAX ||
		   EFI_PAGES_MAX - md->num_pages <
		   (md->phys_addr >> EFI_PAGE_SHIFT)) {
		end_hi = (md->num_pages & OVERFLOW_ADDR_MASK)
			>> OVERFLOW_ADDR_SHIFT;

		if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT))
			end_hi += 1;
	} else {
		return true;
	}

	pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n");

	if (end_hi) {
		pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n",
			i, efi_md_typeattr_format(buf, sizeof(buf), md),
			md->phys_addr, end_hi, end);
	} else {
		pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n",
			i, efi_md_typeattr_format(buf, sizeof(buf), md),
			md->phys_addr, end);
	}
	return false;
}

static void __init efi_clean_memmap(void)
{
	efi_memory_desc_t *out = efi.memmap.map;
	const efi_memory_desc_t *in = out;
	const efi_memory_desc_t *end = efi.memmap.map_end;
	int i, n_removal;

	for (i = n_removal = 0; in < end; i++) {
		if (efi_memmap_entry_valid(in, i)) {
			if (out != in)
				memcpy(out, in, efi.memmap.desc_size);
			out = (void *)out + efi.memmap.desc_size;
		} else {
			n_removal++;
		}
		in = (void *)in + efi.memmap.desc_size;
	}

	if (n_removal > 0) {
		u64 size = efi.memmap.nr_map - n_removal;

		pr_warn("Removing %d invalid memory map entries.\n", n_removal);
		efi_memmap_install(efi.memmap.phys_map, size);
	}
}

300
void __init efi_print_memmap(void)
H
Huang, Ying 已提交
301 302
{
	efi_memory_desc_t *md;
303
	int i = 0;
H
Huang, Ying 已提交
304

305
	for_each_efi_memory_desc(md) {
306 307
		char buf[64];

308
		pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
309
			i++, efi_md_typeattr_format(buf, sizeof(buf), md),
310
			md->phys_addr,
311
			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
H
Huang, Ying 已提交
312 313
			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
	}
B
Borislav Petkov 已提交
314
}
H
Huang, Ying 已提交
315

316
static int __init efi_systab_init(void *phys)
H
Huang, Ying 已提交
317
{
318
	if (efi_enabled(EFI_64BIT)) {
319
		efi_system_table_64_t *systab64;
320
		struct efi_setup_data *data = NULL;
321 322
		u64 tmp = 0;

323 324 325 326 327
		if (efi_setup) {
			data = early_memremap(efi_setup, sizeof(*data));
			if (!data)
				return -ENOMEM;
		}
328
		systab64 = early_memremap((unsigned long)phys,
329 330 331
					 sizeof(*systab64));
		if (systab64 == NULL) {
			pr_err("Couldn't map the system table!\n");
332
			if (data)
333
				early_memunmap(data, sizeof(*data));
334 335 336 337
			return -ENOMEM;
		}

		efi_systab.hdr = systab64->hdr;
338 339 340
		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
					      systab64->fw_vendor;
		tmp |= data ? data->fw_vendor : systab64->fw_vendor;
341 342 343 344 345 346 347 348 349 350 351 352 353
		efi_systab.fw_revision = systab64->fw_revision;
		efi_systab.con_in_handle = systab64->con_in_handle;
		tmp |= systab64->con_in_handle;
		efi_systab.con_in = systab64->con_in;
		tmp |= systab64->con_in;
		efi_systab.con_out_handle = systab64->con_out_handle;
		tmp |= systab64->con_out_handle;
		efi_systab.con_out = systab64->con_out;
		tmp |= systab64->con_out;
		efi_systab.stderr_handle = systab64->stderr_handle;
		tmp |= systab64->stderr_handle;
		efi_systab.stderr = systab64->stderr;
		tmp |= systab64->stderr;
354 355 356 357
		efi_systab.runtime = data ?
				     (void *)(unsigned long)data->runtime :
				     (void *)(unsigned long)systab64->runtime;
		tmp |= data ? data->runtime : systab64->runtime;
358 359 360
		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
		tmp |= systab64->boottime;
		efi_systab.nr_tables = systab64->nr_tables;
361 362 363
		efi_systab.tables = data ? (unsigned long)data->tables :
					   systab64->tables;
		tmp |= data ? data->tables : systab64->tables;
364

365
		early_memunmap(systab64, sizeof(*systab64));
366
		if (data)
367
			early_memunmap(data, sizeof(*data));
368 369 370 371 372 373 374 375 376
#ifdef CONFIG_X86_32
		if (tmp >> 32) {
			pr_err("EFI data located above 4GB, disabling EFI.\n");
			return -EINVAL;
		}
#endif
	} else {
		efi_system_table_32_t *systab32;

377
		systab32 = early_memremap((unsigned long)phys,
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
					 sizeof(*systab32));
		if (systab32 == NULL) {
			pr_err("Couldn't map the system table!\n");
			return -ENOMEM;
		}

		efi_systab.hdr = systab32->hdr;
		efi_systab.fw_vendor = systab32->fw_vendor;
		efi_systab.fw_revision = systab32->fw_revision;
		efi_systab.con_in_handle = systab32->con_in_handle;
		efi_systab.con_in = systab32->con_in;
		efi_systab.con_out_handle = systab32->con_out_handle;
		efi_systab.con_out = systab32->con_out;
		efi_systab.stderr_handle = systab32->stderr_handle;
		efi_systab.stderr = systab32->stderr;
		efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
		efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
		efi_systab.nr_tables = systab32->nr_tables;
		efi_systab.tables = systab32->tables;

398
		early_memunmap(systab32, sizeof(*systab32));
399
	}
400

H
Huang, Ying 已提交
401 402 403 404 405
	efi.systab = &efi_systab;

	/*
	 * Verify the EFI Table
	 */
406
	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
407
		pr_err("System table signature incorrect!\n");
408 409
		return -EINVAL;
	}
H
Huang, Ying 已提交
410
	if ((efi.systab->hdr.revision >> 16) == 0)
J
Joe Perches 已提交
411
		pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
H
Huang, Ying 已提交
412 413
		       efi.systab->hdr.revision >> 16,
		       efi.systab->hdr.revision & 0xffff);
414 415

	return 0;
416
}
H
Huang, Ying 已提交
417

418
static int __init efi_runtime_init32(void)
419
{
420 421
	efi_runtime_services_32_t *runtime;

422
	runtime = early_memremap((unsigned long)efi.systab->runtime,
423 424 425 426 427
			sizeof(efi_runtime_services_32_t));
	if (!runtime) {
		pr_err("Could not map the runtime service table!\n");
		return -ENOMEM;
	}
H
Huang, Ying 已提交
428 429

	/*
430 431 432
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
H
Huang, Ying 已提交
433
	 */
434 435 436
	efi_phys.set_virtual_address_map =
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
437
	early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
438 439 440 441 442 443 444 445

	return 0;
}

static int __init efi_runtime_init64(void)
{
	efi_runtime_services_64_t *runtime;

446
	runtime = early_memremap((unsigned long)efi.systab->runtime,
447
			sizeof(efi_runtime_services_64_t));
448
	if (!runtime) {
449
		pr_err("Could not map the runtime service table!\n");
450 451
		return -ENOMEM;
	}
452

453
	/*
454 455 456
	 * We will only need *early* access to the SetVirtualAddressMap
	 * EFI runtime service. All other runtime services will be called
	 * via the virtual mapping.
457 458
	 */
	efi_phys.set_virtual_address_map =
459 460
			(efi_set_virtual_address_map_t *)
			(unsigned long)runtime->set_virtual_address_map;
461
	early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
462 463 464 465 466 467 468 469 470 471 472 473 474

	return 0;
}

static int __init efi_runtime_init(void)
{
	int rv;

	/*
	 * Check out the runtime services table. We need to map
	 * the runtime services table so that we can grab the physical
	 * address of several of the EFI runtime functions, needed to
	 * set the firmware into virtual mode.
D
Daniel Kiper 已提交
475 476 477 478 479 480 481
	 *
	 * When EFI_PARAVIRT is in force then we could not map runtime
	 * service memory region because we do not have direct access to it.
	 * However, runtime services are available through proxy functions
	 * (e.g. in case of Xen dom0 EFI implementation they call special
	 * hypercall which executes relevant EFI functions) and that is why
	 * they are always enabled.
482 483
	 */

D
Daniel Kiper 已提交
484 485 486 487 488 489 490 491 492
	if (!efi_enabled(EFI_PARAVIRT)) {
		if (efi_enabled(EFI_64BIT))
			rv = efi_runtime_init64();
		else
			rv = efi_runtime_init32();

		if (rv)
			return rv;
	}
493

494 495
	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);

496
	return 0;
497
}
H
Huang, Ying 已提交
498

499 500 501 502 503 504 505 506
void __init efi_init(void)
{
	efi_char16_t *c16;
	char vendor[100] = "unknown";
	int i = 0;
	void *tmp;

#ifdef CONFIG_X86_32
507 508 509 510 511
	if (boot_params.efi_info.efi_systab_hi ||
	    boot_params.efi_info.efi_memmap_hi) {
		pr_info("Table located above 4GB, disabling EFI.\n");
		return;
	}
512 513 514
	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
#else
	efi_phys.systab = (efi_system_table_t *)
515 516
			  (boot_params.efi_info.efi_systab |
			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
517 518
#endif

519
	if (efi_systab_init(efi_phys.systab))
520
		return;
521

522 523 524 525
	efi.config_table = (unsigned long)efi.systab->tables;
	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor;
	efi.runtime	 = (unsigned long)efi.systab->runtime;

526 527 528
	/*
	 * Show what we know for posterity
	 */
529
	c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
530 531 532 533 534
	if (c16) {
		for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
			vendor[i] = *c16++;
		vendor[i] = '\0';
	} else
535
		pr_err("Could not map the firmware vendor!\n");
536
	early_memunmap(tmp, 2);
537

538 539 540
	pr_info("EFI v%u.%.02u by %s\n",
		efi.systab->hdr.revision >> 16,
		efi.systab->hdr.revision & 0xffff, vendor);
541

542 543 544
	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
		return;

545
	if (efi_config_init(arch_tables))
546
		return;
547

548 549 550 551 552
	/*
	 * Note: We currently don't support runtime services on an EFI
	 * that doesn't match the kernel 32/64-bit mode.
	 */

M
Matt Fleming 已提交
553
	if (!efi_runtime_supported())
554
		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
555
	else {
556 557
		if (efi_runtime_disabled() || efi_runtime_init()) {
			efi_memmap_unmap();
558
			return;
559
		}
560
	}
561

562 563
	efi_clean_memmap();

564
	if (efi_enabled(EFI_DBG))
565
		efi_print_memmap();
H
Huang, Ying 已提交
566 567
}

568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
{
	u64 addr, npages;

	addr = md->virt_addr;
	npages = md->num_pages;

	memrange_efi_to_native(&addr, &npages);

	if (executable)
		set_memory_x(addr, npages);
	else
		set_memory_nx(addr, npages);
}

B
Borislav Petkov 已提交
583
void __init runtime_code_page_mkexec(void)
584 585 586 587
{
	efi_memory_desc_t *md;

	/* Make EFI runtime service code area executable */
588
	for_each_efi_memory_desc(md) {
H
Huang, Ying 已提交
589 590 591
		if (md->type != EFI_RUNTIME_SERVICES_CODE)
			continue;

592
		efi_set_executable(md, true);
593 594 595
	}
}

596
void __init efi_memory_uc(u64 addr, unsigned long size)
597 598 599 600 601 602 603 604 605
{
	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
	u64 npages;

	npages = round_up(size, page_shift) / page_shift;
	memrange_efi_to_native(&addr, &npages);
	set_memory_uc(addr, npages);
}

606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
void __init old_map_region(efi_memory_desc_t *md)
{
	u64 start_pfn, end_pfn, end;
	unsigned long size;
	void *va;

	start_pfn = PFN_DOWN(md->phys_addr);
	size	  = md->num_pages << PAGE_SHIFT;
	end	  = md->phys_addr + size;
	end_pfn   = PFN_UP(end);

	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
		va = __va(md->phys_addr);

		if (!(md->attribute & EFI_MEMORY_WB))
			efi_memory_uc((u64)(unsigned long)va, size);
	} else
		va = efi_ioremap(md->phys_addr, size,
				 md->type, md->attribute);

	md->virt_addr = (u64) (unsigned long) va;
	if (!va)
		pr_err("ioremap of 0x%llX failed!\n",
		       (unsigned long long)md->phys_addr);
}

632 633
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
H
Huang, Ying 已提交
634
{
635 636
	efi_memory_desc_t *md, *prev_md = NULL;

637
	for_each_efi_memory_desc(md) {
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
		u64 prev_size;

		if (!prev_md) {
			prev_md = md;
			continue;
		}

		if (prev_md->type != md->type ||
		    prev_md->attribute != md->attribute) {
			prev_md = md;
			continue;
		}

		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;

		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
			prev_md->num_pages += md->num_pages;
			md->type = EFI_RESERVED_TYPE;
			md->attribute = 0;
			continue;
		}
		prev_md = md;
660 661 662 663 664 665 666
	}
}

static void __init get_systab_virt_addr(efi_memory_desc_t *md)
{
	unsigned long size;
	u64 end, systab;
667

668 669 670 671 672 673
	size = md->num_pages << EFI_PAGE_SHIFT;
	end = md->phys_addr + size;
	systab = (u64)(unsigned long)efi_phys.systab;
	if (md->phys_addr <= systab && systab < end) {
		systab += md->virt_addr - md->phys_addr;
		efi.systab = (efi_system_table_t *)(unsigned long)systab;
674
	}
675 676
}

677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
static void *realloc_pages(void *old_memmap, int old_shift)
{
	void *ret;

	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
	if (!ret)
		goto out;

	/*
	 * A first-time allocation doesn't have anything to copy.
	 */
	if (!old_memmap)
		return ret;

	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);

out:
	free_pages((unsigned long)old_memmap, old_shift);
	return ret;
}

698 699 700 701 702 703 704 705 706 707
/*
 * Iterate the EFI memory map in reverse order because the regions
 * will be mapped top-down. The end result is the same as if we had
 * mapped things forward, but doesn't require us to change the
 * existing implementation of efi_map_region().
 */
static inline void *efi_map_next_entry_reverse(void *entry)
{
	/* Initial call */
	if (!entry)
708
		return efi.memmap.map_end - efi.memmap.desc_size;
709

710 711
	entry -= efi.memmap.desc_size;
	if (entry < efi.memmap.map)
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
		return NULL;

	return entry;
}

/*
 * efi_map_next_entry - Return the next EFI memory map descriptor
 * @entry: Previous EFI memory map descriptor
 *
 * This is a helper function to iterate over the EFI memory map, which
 * we do in different orders depending on the current configuration.
 *
 * To begin traversing the memory map @entry must be %NULL.
 *
 * Returns %NULL when we reach the end of the memory map.
 */
static void *efi_map_next_entry(void *entry)
{
	if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
		/*
		 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
		 * config table feature requires us to map all entries
		 * in the same order as they appear in the EFI memory
		 * map. That is to say, entry N must have a lower
		 * virtual address than entry N+1. This is because the
		 * firmware toolchain leaves relative references in
		 * the code/data sections, which are split and become
		 * separate EFI memory regions. Mapping things
		 * out-of-order leads to the firmware accessing
		 * unmapped addresses.
		 *
		 * Since we need to map things this way whether or not
		 * the kernel actually makes use of
		 * EFI_PROPERTIES_TABLE, let's just switch to this
		 * scheme by default for 64-bit.
		 */
		return efi_map_next_entry_reverse(entry);
	}

	/* Initial call */
	if (!entry)
753
		return efi.memmap.map;
754

755 756
	entry += efi.memmap.desc_size;
	if (entry >= efi.memmap.map_end)
757 758 759 760 761
		return NULL;

	return entry;
}

762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
static bool should_map_region(efi_memory_desc_t *md)
{
	/*
	 * Runtime regions always require runtime mappings (obviously).
	 */
	if (md->attribute & EFI_MEMORY_RUNTIME)
		return true;

	/*
	 * 32-bit EFI doesn't suffer from the bug that requires us to
	 * reserve boot services regions, and mixed mode support
	 * doesn't exist for 32-bit kernels.
	 */
	if (IS_ENABLED(CONFIG_X86_32))
		return false;

	/*
	 * Map all of RAM so that we can access arguments in the 1:1
	 * mapping when making EFI runtime calls.
	 */
	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) {
		if (md->type == EFI_CONVENTIONAL_MEMORY ||
		    md->type == EFI_LOADER_DATA ||
		    md->type == EFI_LOADER_CODE)
			return true;
	}

	/*
	 * Map boot services regions as a workaround for buggy
	 * firmware that accesses them even when they shouldn't.
	 *
	 * See efi_{reserve,free}_boot_services().
	 */
	if (md->type == EFI_BOOT_SERVICES_CODE ||
	    md->type == EFI_BOOT_SERVICES_DATA)
		return true;

	return false;
}

802
/*
803 804
 * Map the efi memory ranges of the runtime services and update new_mmap with
 * virtual addresses.
805
 */
806
static void * __init efi_map_regions(int *count, int *pg_shift)
807
{
808 809
	void *p, *new_memmap = NULL;
	unsigned long left = 0;
810
	unsigned long desc_size;
811
	efi_memory_desc_t *md;
812

813 814
	desc_size = efi.memmap.desc_size;

815 816
	p = NULL;
	while ((p = efi_map_next_entry(p))) {
H
Huang, Ying 已提交
817
		md = p;
818 819 820

		if (!should_map_region(md))
			continue;
H
Huang, Ying 已提交
821

822
		efi_map_region(md);
823 824
		get_systab_virt_addr(md);

825
		if (left < desc_size) {
826 827 828 829 830 831 832 833
			new_memmap = realloc_pages(new_memmap, *pg_shift);
			if (!new_memmap)
				return NULL;

			left += PAGE_SIZE << *pg_shift;
			(*pg_shift)++;
		}

834
		memcpy(new_memmap + (*count * desc_size), md, desc_size);
835

836
		left -= desc_size;
837 838
		(*count)++;
	}
839

840 841 842
	return new_memmap;
}

843 844
static void __init kexec_enter_virtual_mode(void)
{
845
#ifdef CONFIG_KEXEC_CORE
846
	efi_memory_desc_t *md;
847
	unsigned int num_pages;
848 849 850 851 852

	efi.systab = NULL;

	/*
	 * We don't do virtual mode, since we don't do runtime services, on
853 854 855
	 * non-native EFI. With efi=old_map, we don't do runtime services in
	 * kexec kernel because in the initial boot something else might
	 * have been mapped at these virtual addresses.
856
	 */
857
	if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
858
		efi_memmap_unmap();
859
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
860 861 862
		return;
	}

863 864 865 866 867 868
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

869 870 871 872
	/*
	* Map efi regions which were passed via setup_data. The virt_addr is a
	* fixed addr which was used in first kernel of a kexec boot.
	*/
873
	for_each_efi_memory_desc(md) {
874 875 876 877
		efi_map_region_fixed(md); /* FIXME: add error handling */
		get_systab_virt_addr(md);
	}

878 879 880 881 882 883 884 885 886 887 888 889 890
	/*
	 * Unregister the early EFI memmap from efi_init() and install
	 * the new EFI memory map.
	 */
	efi_memmap_unmap();

	if (efi_memmap_init_late(efi.memmap.phys_map,
				 efi.memmap.desc_size * efi.memmap.nr_map)) {
		pr_err("Failed to remap late EFI memory map\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

891 892
	BUG_ON(!efi.systab);

893
	num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
894 895
	num_pages >>= PAGE_SHIFT;

896
	if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
897 898 899 900
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

901 902 903 904 905 906 907 908 909
	efi_sync_low_kernel_mappings();

	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
	efi.runtime_version = efi_systab.hdr.revision;
910

911
	efi_native_runtime_setup();
912

913 914 915 916 917 918
	efi.set_virtual_address_map = NULL;

	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
		runtime_code_page_mkexec();

	/* clean DUMMY object */
919
	efi_delete_dummy_variable();
920 921 922
#endif
}

923 924 925 926
/*
 * This function will switch the EFI runtime services to virtual mode.
 * Essentially, we look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor into the
927
 * efi_pgd page table.
928 929 930 931 932 933 934 935 936
 *
 * The old method which used to update that memory descriptor with the
 * virtual address obtained from ioremap() is still supported when the
 * kernel is booted with efi=old_map on its command line. Same old
 * method enabled the runtime services to be called without having to
 * thunk back into physical mode for every invocation.
 *
 * The new method does a pagetable switch in a preemption-safe manner
 * so that we're in a different address space when calling a runtime
937 938
 * function. For function arguments passing we do copy the PUDs of the
 * kernel page table into efi_pgd prior to each call.
939 940 941
 *
 * Specially for kexec boot, efi runtime maps in previous kernel should
 * be passed in via setup_data. In that case runtime ranges will be mapped
942 943
 * to the same virtual addresses as the first kernel, see
 * kexec_enter_virtual_mode().
944
 */
945
static void __init __efi_enter_virtual_mode(void)
946
{
947
	int count = 0, pg_shift = 0;
948
	void *new_memmap = NULL;
949
	efi_status_t status;
950
	unsigned long pa;
H
Huang, Ying 已提交
951

952
	efi.systab = NULL;
953

954 955 956 957 958 959
	if (efi_alloc_page_tables()) {
		pr_err("Failed to allocate EFI page tables\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

960 961 962 963
	efi_merge_regions();
	new_memmap = efi_map_regions(&count, &pg_shift);
	if (!new_memmap) {
		pr_err("Error reallocating memory, EFI runtime non-functional!\n");
964
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
965
		return;
966
	}
967

968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
	pa = __pa(new_memmap);

	/*
	 * Unregister the early EFI memmap from efi_init() and install
	 * the new EFI memory map that we are about to pass to the
	 * firmware via SetVirtualAddressMap().
	 */
	efi_memmap_unmap();

	if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
		pr_err("Failed to remap late EFI memory map\n");
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
		return;
	}

983 984 985 986 987
	if (efi_enabled(EFI_DBG)) {
		pr_info("EFI runtime memory map:\n");
		efi_print_memmap();
	}

H
Huang, Ying 已提交
988 989
	BUG_ON(!efi.systab);

990
	if (efi_setup_page_tables(pa, 1 << pg_shift)) {
991
		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
992
		return;
993
	}
994

995 996
	efi_sync_low_kernel_mappings();

997 998
	if (efi_is_native()) {
		status = phys_efi_set_virtual_address_map(
999 1000 1001
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
1002
				(efi_memory_desc_t *)pa);
1003 1004 1005
	} else {
		status = efi_thunk_set_virtual_address_map(
				efi_phys.set_virtual_address_map,
1006 1007 1008
				efi.memmap.desc_size * count,
				efi.memmap.desc_size,
				efi.memmap.desc_version,
1009
				(efi_memory_desc_t *)pa);
1010
	}
1011

1012 1013 1014 1015
	if (status != EFI_SUCCESS) {
		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
			 status);
		panic("EFI call to SetVirtualAddressMap() failed!");
H
Huang, Ying 已提交
1016 1017
	}

1018 1019
	efi_free_boot_services();

H
Huang, Ying 已提交
1020 1021 1022 1023 1024 1025
	/*
	 * Now that EFI is in virtual mode, update the function
	 * pointers in the runtime service table to the new virtual addresses.
	 *
	 * Call EFI services through wrapper functions.
	 */
1026
	efi.runtime_version = efi_systab.hdr.revision;
1027 1028

	if (efi_is_native())
1029
		efi_native_runtime_setup();
1030 1031 1032
	else
		efi_thunk_runtime_setup();

1033
	efi.set_virtual_address_map = NULL;
1034

1035 1036 1037 1038 1039 1040
	/*
	 * Apply more restrictive page table mapping attributes now that
	 * SVAM() has been called and the firmware has performed all
	 * necessary relocation fixups for the new virtual addresses.
	 */
	efi_runtime_update_mappings();
1041

M
Matthew Garrett 已提交
1042
	/* clean DUMMY object */
1043
	efi_delete_dummy_variable();
H
Huang, Ying 已提交
1044 1045
}

1046 1047
void __init efi_enter_virtual_mode(void)
{
D
Daniel Kiper 已提交
1048 1049 1050
	if (efi_enabled(EFI_PARAVIRT))
		return;

1051 1052 1053 1054
	if (efi_setup)
		kexec_enter_virtual_mode();
	else
		__efi_enter_virtual_mode();
1055 1056

	efi_dump_pagetable();
1057 1058
}

D
Dave Young 已提交
1059
static int __init arch_parse_efi_cmdline(char *str)
1060
{
1061 1062 1063 1064 1065
	if (!str) {
		pr_warn("need at least one option\n");
		return -EINVAL;
	}

1066 1067
	if (parse_option_str(str, "old_map"))
		set_bit(EFI_OLD_MEMMAP, &efi.flags);
1068 1069 1070

	return 0;
}
D
Dave Young 已提交
1071
early_param("efi", arch_parse_efi_cmdline);
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085

bool efi_is_table_address(unsigned long phys_addr)
{
	unsigned int i;

	if (phys_addr == EFI_INVALID_TABLE_ADDR)
		return false;

	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
		if (*(efi_tables[i]) == phys_addr)
			return true;

	return false;
}