opal.c 22.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * PowerNV OPAL high level interfaces
 *
 * Copyright 2011 IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

12
#define pr_fmt(fmt)	"opal: " fmt
13

14
#include <linux/printk.h>
15 16
#include <linux/types.h>
#include <linux/of.h>
R
Rob Herring 已提交
17
#include <linux/of_fdt.h>
18
#include <linux/of_platform.h>
19
#include <linux/interrupt.h>
20
#include <linux/notifier.h>
21
#include <linux/slab.h>
22
#include <linux/sched.h>
23
#include <linux/kobject.h>
24
#include <linux/delay.h>
25
#include <linux/memblock.h>
26 27
#include <linux/kthread.h>
#include <linux/freezer.h>
28 29

#include <asm/machdep.h>
30 31
#include <asm/opal.h>
#include <asm/firmware.h>
32
#include <asm/mce.h>
33 34 35

#include "powernv.h"

36 37 38
/* /sys/firmware/opal */
struct kobject *opal_kobj;

39 40 41
struct opal {
	u64 base;
	u64 entry;
42
	u64 size;
43 44
} opal;

45 46 47 48 49 50 51 52 53
struct mcheck_recoverable_range {
	u64 start_addr;
	u64 end_addr;
	u64 recover_addr;
};

static struct mcheck_recoverable_range *mc_recoverable_range;
static int mc_recoverable_range_len;

54
struct device_node *opal_node;
55
static DEFINE_SPINLOCK(opal_write_lock);
56
static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
57
static uint32_t opal_heartbeat;
58
static struct task_struct *kopald_tsk;
59

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
static void opal_reinit_cores(void)
{
	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
	 *
	 * It will preserve non volatile GPRs and HSPRG0/1. It will
	 * also restore HIDs and other SPRs to their original value
	 * but it might clobber a bunch.
	 */
#ifdef __BIG_ENDIAN__
	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
#else
	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
#endif
}

75 76 77
int __init early_init_dt_scan_opal(unsigned long node,
				   const char *uname, int depth, void *data)
{
78
	const void *basep, *entryp, *sizep;
79
	int basesz, entrysz, runtimesz;
80 81 82 83 84 85

	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
		return 0;

	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
86
	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
87

88
	if (!basep || !entryp || !sizep)
89 90 91 92
		return 1;

	opal.base = of_read_number(basep, basesz/4);
	opal.entry = of_read_number(entryp, entrysz/4);
93
	opal.size = of_read_number(sizep, runtimesz/4);
94

95
	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
96
		 opal.base, basep, basesz);
97
	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
98
		 opal.entry, entryp, entrysz);
99
	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
100
		 opal.size, sizep, runtimesz);
101

102
	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
103 104
		powerpc_firmware_features |= FW_FEATURE_OPAL;
		pr_info("OPAL detected !\n");
105
	} else {
106
		panic("OPAL != V3 detected, no longer supported.\n");
107 108
	}

109 110 111 112 113 114 115
	/* Reinit all cores with the right endian */
	opal_reinit_cores();

	/* Restore some bits */
	if (cur_cpu_spec->cpu_restore)
		cur_cpu_spec->cpu_restore();

116 117 118
	return 1;
}

119 120 121
int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
				   const char *uname, int depth, void *data)
{
122
	int i, psize, size;
123 124 125 126 127
	const __be32 *prop;

	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
		return 0;

128
	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
129 130 131 132 133 134

	if (!prop)
		return 1;

	pr_debug("Found machine check recoverable ranges.\n");

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
	/*
	 * Calculate number of available entries.
	 *
	 * Each recoverable address range entry is (start address, len,
	 * recovery address), 2 cells each for start and recovery address,
	 * 1 cell for len, totalling 5 cells per entry.
	 */
	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);

	/* Sanity check */
	if (!mc_recoverable_range_len)
		return 1;

	/* Size required to hold all the entries. */
	size = mc_recoverable_range_len *
			sizeof(struct mcheck_recoverable_range);

152 153 154 155 156 157 158 159 160
	/*
	 * Allocate a buffer to hold the MC recoverable ranges. We would be
	 * accessing them in real mode, hence it needs to be within
	 * RMO region.
	 */
	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
							ppc64_rma_size));
	memset(mc_recoverable_range, 0, size);

161
	for (i = 0; i < mc_recoverable_range_len; i++) {
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
		mc_recoverable_range[i].start_addr =
					of_read_number(prop + (i * 5) + 0, 2);
		mc_recoverable_range[i].end_addr =
					mc_recoverable_range[i].start_addr +
					of_read_number(prop + (i * 5) + 2, 1);
		mc_recoverable_range[i].recover_addr =
					of_read_number(prop + (i * 5) + 3, 2);

		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
				mc_recoverable_range[i].start_addr,
				mc_recoverable_range[i].end_addr,
				mc_recoverable_range[i].recover_addr);
	}
	return 1;
}

178 179
static int __init opal_register_exception_handlers(void)
{
180
#ifdef __BIG_ENDIAN__
181 182 183 184 185
	u64 glue;

	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
		return -ENODEV;

186 187
	/* Hookup some exception handlers except machine check. We use the
	 * fwnmi area at 0x7000 to provide the glue space to OPAL
188 189
	 */
	glue = 0x7000;
190 191 192 193 194 195 196 197 198 199 200 201 202 203

	/*
	 * Check if we are running on newer firmware that exports
	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
	 * the HMI interrupt and we catch it directly in Linux.
	 *
	 * For older firmware (i.e currently released POWER8 System Firmware
	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
	 * patch the HMI vector and handle it inside OPAL firmware.
	 *
	 * For newer firmware (in development/yet to be released) we will
	 * start catching/handling HMI directly in Linux.
	 */
	if (!opal_check_token(OPAL_HANDLE_HMI)) {
204
		pr_info("Old firmware detected, OPAL handles HMIs.\n");
205 206 207 208 209 210
		opal_register_exception_handler(
				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
				0, glue);
		glue += 128;
	}

211
	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
212
#endif
213

214
	return 0;
215
}
216
machine_early_initcall(powernv, opal_register_exception_handlers);
217

218 219 220 221
/*
 * Opal message notifier based on message type. Allow subscribers to get
 * notified for specific messgae type.
 */
222
int opal_message_notifier_register(enum opal_msg_type msg_type,
223 224
					struct notifier_block *nb)
{
225 226
	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
		pr_warning("%s: Invalid arguments, msg_type:%d\n",
227 228 229
			   __func__, msg_type);
		return -EINVAL;
	}
230

231 232 233
	return atomic_notifier_chain_register(
				&opal_msg_notifier_head[msg_type], nb);
}
234
EXPORT_SYMBOL_GPL(opal_message_notifier_register);
235

236
int opal_message_notifier_unregister(enum opal_msg_type msg_type,
237 238 239 240 241
				     struct notifier_block *nb)
{
	return atomic_notifier_chain_unregister(
			&opal_msg_notifier_head[msg_type], nb);
}
242
EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
243

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static void opal_message_do_notify(uint32_t msg_type, void *msg)
{
	/* notify subscribers */
	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
					msg_type, msg);
}

static void opal_handle_message(void)
{
	s64 ret;
	/*
	 * TODO: pre-allocate a message buffer depending on opal-msg-size
	 * value in /proc/device-tree.
	 */
	static struct opal_msg msg;
259
	u32 type;
260 261 262 263 264 265 266 267

	ret = opal_get_msg(__pa(&msg), sizeof(msg));
	/* No opal message pending. */
	if (ret == OPAL_RESOURCE)
		return;

	/* check for errors. */
	if (ret) {
M
Masanari Iida 已提交
268
		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
269 270 271 272
				__func__, ret);
		return;
	}

273 274
	type = be32_to_cpu(msg.msg_type);

275
	/* Sanity check */
276
	if (type >= OPAL_MSG_TYPE_MAX) {
277
		pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
278 279
		return;
	}
280
	opal_message_do_notify(type, (void *)&msg);
281 282
}

283
static irqreturn_t opal_message_notify(int irq, void *data)
284
{
285 286
	opal_handle_message();
	return IRQ_HANDLED;
287 288 289 290
}

static int __init opal_message_init(void)
{
291
	int ret, i, irq;
292 293 294 295

	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);

296 297 298 299 300 301 302 303 304
	irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
	if (!irq) {
		pr_err("%s: Can't register OPAL event irq (%d)\n",
		       __func__, irq);
		return irq;
	}

	ret = request_irq(irq, opal_message_notify,
			IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
305
	if (ret) {
306
		pr_err("%s: Can't request OPAL event irq (%d)\n",
307 308 309
		       __func__, ret);
		return ret;
	}
310

311 312 313
	return 0;
}

314 315
int opal_get_chars(uint32_t vtermno, char *buf, int count)
{
316 317
	s64 rc;
	__be64 evt, len;
318 319

	if (!opal.entry)
320
		return -ENODEV;
321
	opal_poll_events(&evt);
322
	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
323
		return 0;
324
	len = cpu_to_be64(count);
325
	rc = opal_console_read(vtermno, &len, buf);
326
	if (rc == OPAL_SUCCESS)
327
		return be64_to_cpu(len);
328 329 330 331 332 333
	return 0;
}

int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
{
	int written = 0;
334
	__be64 olen;
335
	s64 len, rc;
336
	unsigned long flags;
337
	__be64 evt;
338 339

	if (!opal.entry)
340
		return -ENODEV;
341 342 343

	/* We want put_chars to be atomic to avoid mangling of hvsi
	 * packets. To do that, we first test for room and return
344 345 346 347 348
	 * -EAGAIN if there isn't enough.
	 *
	 * Unfortunately, opal_console_write_buffer_space() doesn't
	 * appear to work on opal v1, so we just assume there is
	 * enough room and be done with it
349 350
	 */
	spin_lock_irqsave(&opal_write_lock, flags);
351 352 353 354 355 356 357 358 359
	rc = opal_console_write_buffer_space(vtermno, &olen);
	len = be64_to_cpu(olen);
	if (rc || len < total_len) {
		spin_unlock_irqrestore(&opal_write_lock, flags);
		/* Closed -> drop characters */
		if (rc)
			return total_len;
		opal_poll_events(NULL);
		return -EAGAIN;
360 361 362 363 364
	}

	/* We still try to handle partial completions, though they
	 * should no longer happen.
	 */
365
	rc = OPAL_BUSY;
366 367
	while(total_len > 0 && (rc == OPAL_BUSY ||
				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
368 369 370
		olen = cpu_to_be64(total_len);
		rc = opal_console_write(vtermno, &olen, data);
		len = be64_to_cpu(olen);
371 372 373 374 375 376 377

		/* Closed or other error drop */
		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
		    rc != OPAL_BUSY_EVENT) {
			written = total_len;
			break;
		}
378 379 380 381 382 383 384 385 386 387 388 389
		if (rc == OPAL_SUCCESS) {
			total_len -= len;
			data += len;
			written += len;
		}
		/* This is a bit nasty but we need that for the console to
		 * flush when there aren't any interrupts. We will clean
		 * things a bit later to limit that to synchronous path
		 * such as the kernel console and xmon/udbg
		 */
		do
			opal_poll_events(&evt);
390 391
		while(rc == OPAL_SUCCESS &&
			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
392 393 394 395 396
	}
	spin_unlock_irqrestore(&opal_write_lock, flags);
	return written;
}

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
static int opal_recover_mce(struct pt_regs *regs,
					struct machine_check_event *evt)
{
	int recovered = 0;
	uint64_t ea = get_mce_fault_addr(evt);

	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		recovered = 0;
	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
		/* Platform corrected itself */
		recovered = 1;
	} else if (ea && !is_kernel_addr(ea)) {
		/*
		 * Faulting address is not in kernel text. We should be fine.
		 * We need to find which process uses this address.
		 * For now, kill the task if we have received exception when
		 * in userspace.
		 *
		 * TODO: Queue up this address for hwpoisioning later.
		 */
		if (user_mode(regs) && !is_global_init(current)) {
			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
			recovered = 1;
		} else
			recovered = 0;
	} else if (user_mode(regs) && !is_global_init(current) &&
		evt->severity == MCE_SEV_ERROR_SYNC) {
		/*
		 * If we have received a synchronous error when in userspace
		 * kill the task.
		 */
		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
		recovered = 1;
	}
	return recovered;
}

435 436
int opal_machine_check(struct pt_regs *regs)
{
437
	struct machine_check_event evt;
438
	int ret;
439

440 441
	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
		return 0;
442 443

	/* Print things out */
444
	if (evt.version != MCE_V1) {
445 446 447 448
		pr_err("Machine Check Exception, Unknown event version %d !\n",
		       evt.version);
		return 0;
	}
449
	machine_check_print_event_info(&evt);
450

451 452
	if (opal_recover_mce(regs, &evt))
		return 1;
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483

	/*
	 * Unrecovered machine check, we are heading to panic path.
	 *
	 * We may have hit this MCE in very early stage of kernel
	 * initialization even before opal-prd has started running. If
	 * this is the case then this MCE error may go un-noticed or
	 * un-analyzed if we go down panic path. We need to inform
	 * BMC/OCC about this error so that they can collect relevant
	 * data for error analysis before rebooting.
	 * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
	 * This function may not return on BMC based system.
	 */
	ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
			"Unrecoverable Machine Check exception");
	if (ret == OPAL_UNSUPPORTED) {
		pr_emerg("Reboot type %d not supported\n",
					OPAL_REBOOT_PLATFORM_ERROR);
	}

	/*
	 * We reached here. There can be three possibilities:
	 * 1. We are running on a firmware level that do not support
	 *    opal_cec_reboot2()
	 * 2. We are running on a firmware level that do not support
	 *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
	 * 3. We are running on FSP based system that does not need opal
	 *    to trigger checkstop explicitly for error analysis. The FSP
	 *    PRD component would have already got notified about this
	 *    error through other channels.
	 *
484 485 486
	 * If hardware marked this as an unrecoverable MCE, we are
	 * going to panic anyway. Even if it didn't, it's not safe to
	 * continue at this point, so we should explicitly panic.
487
	 */
488 489

	panic("PowerNV Unrecovered Machine Check");
490
	return 0;
491 492
}

493 494 495
/* Early hmi handler called in real mode. */
int opal_hmi_exception_early(struct pt_regs *regs)
{
496 497 498 499 500 501 502 503 504 505 506 507
	s64 rc;

	/*
	 * call opal hmi handler. Pass paca address as token.
	 * The return value OPAL_SUCCESS is an indication that there is
	 * an HMI event generated waiting to pull by Linux.
	 */
	rc = opal_handle_hmi();
	if (rc == OPAL_SUCCESS) {
		local_paca->hmi_event_available = 1;
		return 1;
	}
508 509 510 511 512 513
	return 0;
}

/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
514 515 516 517 518 519 520 521 522 523 524 525 526
	s64 rc;
	__be64 evt = 0;

	/*
	 * Check if HMI event is available.
	 * if Yes, then call opal_poll_events to pull opal messages and
	 * process them.
	 */
	if (!local_paca->hmi_event_available)
		return 0;

	local_paca->hmi_event_available = 0;
	rc = opal_poll_events(&evt);
A
Alistair Popple 已提交
527
	if (rc == OPAL_SUCCESS && evt)
528
		opal_handle_events(be64_to_cpu(evt));
529 530

	return 1;
531 532
}

533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
static uint64_t find_recovery_address(uint64_t nip)
{
	int i;

	for (i = 0; i < mc_recoverable_range_len; i++)
		if ((nip >= mc_recoverable_range[i].start_addr) &&
		    (nip < mc_recoverable_range[i].end_addr))
		    return mc_recoverable_range[i].recover_addr;
	return 0;
}

bool opal_mce_check_early_recovery(struct pt_regs *regs)
{
	uint64_t recover_addr = 0;

	if (!opal.base || !opal.size)
		goto out;

	if ((regs->nip >= opal.base) &&
552
			(regs->nip < (opal.base + opal.size)))
553 554 555 556 557 558 559 560 561 562 563 564
		recover_addr = find_recovery_address(regs->nip);

	/*
	 * Setup regs->nip to rfi into fixup address.
	 */
	if (recover_addr)
		regs->nip = recover_addr;

out:
	return !!recover_addr;
}

565 566 567 568 569 570 571 572 573 574 575
static int opal_sysfs_init(void)
{
	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
	if (!opal_kobj) {
		pr_warn("kobject_create_and_add opal failed\n");
		return -ENOMEM;
	}

	return 0;
}

576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
			       struct bin_attribute *bin_attr,
			       char *buf, loff_t off, size_t count)
{
	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
				       bin_attr->size);
}

static BIN_ATTR_RO(symbol_map, 0);

static void opal_export_symmap(void)
{
	const __be64 *syms;
	unsigned int size;
	struct device_node *fw;
	int rc;

	fw = of_find_node_by_path("/ibm,opal/firmware");
	if (!fw)
		return;
	syms = of_get_property(fw, "symbol-map", &size);
	if (!syms || size != 2 * sizeof(__be64))
		return;

	/* Setup attributes */
	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);

	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
	if (rc)
		pr_warn("Error %d creating OPAL symbols file\n", rc);
}

609 610 611 612 613 614
static void __init opal_dump_region_init(void)
{
	void *addr;
	uint64_t size;
	int rc;

615 616 617
	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
		return;

618 619
	/* Register kernel log buffer */
	addr = log_buf_addr_get();
620 621 622
	if (addr == NULL)
		return;

623
	size = log_buf_len_get();
624 625 626
	if (size == 0)
		return;

627 628 629 630 631 632 633 634 635
	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
				       __pa(addr), size);
	/* Don't warn if this is just an older OPAL that doesn't
	 * know about that call
	 */
	if (rc && rc != OPAL_UNSUPPORTED)
		pr_warn("DUMP: Failed to register kernel log buffer. "
			"rc = %d\n", rc);
}
636

637 638
static void opal_pdev_init(struct device_node *opal_node,
		const char *compatible)
639 640 641 642
{
	struct device_node *np;

	for_each_child_of_node(opal_node, np)
643
		if (of_device_is_compatible(np, compatible))
644 645 646
			of_platform_device_create(np, NULL, NULL);
}

647 648 649 650 651 652 653 654
static void opal_i2c_create_devs(void)
{
	struct device_node *np;

	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
		of_platform_device_create(np, NULL, NULL);
}

655 656
static int kopald(void *unused)
{
657
	unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
658 659
	__be64 events;

660 661 662
	set_freezable();
	do {
		try_to_freeze();
663 664
		opal_poll_events(&events);
		opal_handle_events(be64_to_cpu(events));
665
		schedule_timeout_interruptible(timeout);
666 667 668 669 670
	} while (!kthread_should_stop());

	return 0;
}

671 672 673 674 675 676
void opal_wake_poller(void)
{
	if (kopald_tsk)
		wake_up_process(kopald_tsk);
}

677 678 679 680 681 682 683 684
static void opal_init_heartbeat(void)
{
	/* Old firwmware, we assume the HVC heartbeat is sufficient */
	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
				 &opal_heartbeat) != 0)
		opal_heartbeat = 0;

	if (opal_heartbeat)
685
		kopald_tsk = kthread_run(kopald, NULL, "kopald");
686 687
}

688 689
static int __init opal_init(void)
{
690
	struct device_node *np, *consoles, *leds;
691
	int rc;
692 693 694

	opal_node = of_find_node_by_path("/ibm,opal");
	if (!opal_node) {
695
		pr_warn("Device node not found\n");
696 697
		return -ENODEV;
	}
698 699

	/* Register OPAL consoles if any ports */
700
	consoles = of_find_node_by_path("/ibm,opal/consoles");
701 702 703 704 705 706 707
	if (consoles) {
		for_each_child_of_node(consoles, np) {
			if (strcmp(np->name, "serial"))
				continue;
			of_platform_device_create(np, NULL, NULL);
		}
		of_node_put(consoles);
708
	}
709

710 711 712 713 714 715 716 717 718 719 720 721
	/* Initialise OPAL messaging system */
	opal_message_init();

	/* Initialise OPAL asynchronous completion interface */
	opal_async_comp_init();

	/* Initialise OPAL sensor interface */
	opal_sensor_init();

	/* Initialise OPAL hypervisor maintainence interrupt handling */
	opal_hmi_handler_init();

722 723 724
	/* Create i2c platform devices */
	opal_i2c_create_devs();

725 726 727
	/* Setup a heatbeat thread if requested by OPAL */
	opal_init_heartbeat();

728 729 730 731 732 733 734
	/* Create leds platform devices */
	leds = of_find_node_by_path("/ibm,opal/leds");
	if (leds) {
		of_platform_device_create(leds, "opal_leds", NULL);
		of_node_put(leds);
	}

735 736 737
	/* Initialise OPAL message log interface */
	opal_msglog_init();

738 739
	/* Create "opal" kobject under /sys/firmware */
	rc = opal_sysfs_init();
740
	if (rc == 0) {
741 742
		/* Export symbol map to userspace */
		opal_export_symmap();
743 744
		/* Setup dump region interface */
		opal_dump_region_init();
745 746
		/* Setup error log interface */
		rc = opal_elog_init();
747
		/* Setup code update interface */
748
		opal_flash_update_init();
749 750
		/* Setup platform dump extract interface */
		opal_platform_dump_init();
751 752
		/* Setup system parameters interface */
		opal_sys_param_init();
753 754
		/* Setup message log sysfs interface. */
		opal_msglog_sysfs_init();
755
	}
756

757
	/* Initialize platform devices: IPMI backend, PRD & flash interface */
758 759
	opal_pdev_init(opal_node, "ibm,opal-ipmi");
	opal_pdev_init(opal_node, "ibm,opal-flash");
760
	opal_pdev_init(opal_node, "ibm,opal-prd");
761

762 763 764
	/* Initialise platform device: oppanel interface */
	opal_pdev_init(opal_node, "ibm,opal-oppanel");

765 766 767
	/* Initialise OPAL kmsg dumper for flushing console on panic */
	opal_kmsg_init();

768 769
	return 0;
}
770
machine_subsys_initcall(powernv, opal_init);
771 772 773

void opal_shutdown(void)
{
774
	long rc = OPAL_BUSY;
775

776
	opal_event_shutdown();
777 778 779 780 781 782 783 784 785 786 787 788 789

	/*
	 * Then sync with OPAL which ensure anything that can
	 * potentially write to our memory has completed such
	 * as an ongoing dump retrieval
	 */
	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
		rc = opal_sync_host_reboot();
		if (rc == OPAL_BUSY)
			opal_poll_events(NULL);
		else
			mdelay(10);
	}
790 791

	/* Unregister memory dump region */
792 793
	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
794
}
795 796 797

/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
798 799
EXPORT_SYMBOL_GPL(opal_xscom_read);
EXPORT_SYMBOL_GPL(opal_xscom_write);
800 801
EXPORT_SYMBOL_GPL(opal_ipmi_send);
EXPORT_SYMBOL_GPL(opal_ipmi_recv);
802 803 804
EXPORT_SYMBOL_GPL(opal_flash_read);
EXPORT_SYMBOL_GPL(opal_flash_write);
EXPORT_SYMBOL_GPL(opal_flash_erase);
805
EXPORT_SYMBOL_GPL(opal_prd_msg);
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868

/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
					     unsigned long vmalloc_size)
{
	struct opal_sg_list *sg, *first = NULL;
	unsigned long i = 0;

	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
	if (!sg)
		goto nomem;

	first = sg;

	while (vmalloc_size > 0) {
		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
		uint64_t length = min(vmalloc_size, PAGE_SIZE);

		sg->entry[i].data = cpu_to_be64(data);
		sg->entry[i].length = cpu_to_be64(length);
		i++;

		if (i >= SG_ENTRIES_PER_NODE) {
			struct opal_sg_list *next;

			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
			if (!next)
				goto nomem;

			sg->length = cpu_to_be64(
					i * sizeof(struct opal_sg_entry) + 16);
			i = 0;
			sg->next = cpu_to_be64(__pa(next));
			sg = next;
		}

		vmalloc_addr += length;
		vmalloc_size -= length;
	}

	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);

	return first;

nomem:
	pr_err("%s : Failed to allocate memory\n", __func__);
	opal_free_sg_list(first);
	return NULL;
}

void opal_free_sg_list(struct opal_sg_list *sg)
{
	while (sg) {
		uint64_t next = be64_to_cpu(sg->next);

		kfree(sg);

		if (next)
			sg = __va(next);
		else
			sg = NULL;
	}
}
869

870 871 872 873 874 875 876 877 878
int opal_error_code(int rc)
{
	switch (rc) {
	case OPAL_SUCCESS:		return 0;

	case OPAL_PARAMETER:		return -EINVAL;
	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
	case OPAL_BUSY_EVENT:		return -EBUSY;
	case OPAL_NO_MEM:		return -ENOMEM;
879
	case OPAL_PERMISSION:		return -EPERM;
880 881 882 883 884 885 886 887 888 889

	case OPAL_UNSUPPORTED:		return -EIO;
	case OPAL_HARDWARE:		return -EIO;
	case OPAL_INTERNAL_ERROR:	return -EIO;
	default:
		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
		return -EIO;
	}
}

890 891 892 893 894
EXPORT_SYMBOL_GPL(opal_poll_events);
EXPORT_SYMBOL_GPL(opal_rtc_read);
EXPORT_SYMBOL_GPL(opal_rtc_write);
EXPORT_SYMBOL_GPL(opal_tpo_read);
EXPORT_SYMBOL_GPL(opal_tpo_write);
895
EXPORT_SYMBOL_GPL(opal_i2c_request);
896 897 898
/* Export these symbols for PowerNV LED class driver */
EXPORT_SYMBOL_GPL(opal_leds_get_ind);
EXPORT_SYMBOL_GPL(opal_leds_set_ind);
899 900
/* Export this symbol for PowerNV Operator Panel class driver */
EXPORT_SYMBOL_GPL(opal_write_oppanel_async);