err.c 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
// SPDX-License-Identifier: GPL-2.0
/*
 * This file implements the error recovery as a core part of PCIe error
 * reporting. When a PCIe error is delivered, an error message will be
 * collected and printed to console, then, an error recovery procedure
 * will be executed by following the PCI error recovery rules.
 *
 * Copyright (C) 2006 Intel Corp.
 *	Tom Long Nguyen (tom.l.nguyen@intel.com)
 *	Zhang Yanmin (yanmin.zhang@intel.com)
 */

13 14
#define dev_fmt(fmt) "AER: " fmt

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/aer.h>
#include "portdrv.h"
#include "../pci.h"

static pci_ers_result_t merge_result(enum pci_ers_result orig,
				  enum pci_ers_result new)
{
	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
		return PCI_ERS_RESULT_NO_AER_DRIVER;

	if (new == PCI_ERS_RESULT_NONE)
		return orig;

	switch (orig) {
	case PCI_ERS_RESULT_CAN_RECOVER:
	case PCI_ERS_RESULT_RECOVERED:
		orig = new;
		break;
	case PCI_ERS_RESULT_DISCONNECT:
		if (new == PCI_ERS_RESULT_NEED_RESET)
			orig = PCI_ERS_RESULT_NEED_RESET;
		break;
	default:
		break;
	}

	return orig;
}

K
Keith Busch 已提交
48 49 50
static int report_error_detected(struct pci_dev *dev,
				 enum pci_channel_state state,
				 enum pci_ers_result *result)
51 52 53 54 55
{
	pci_ers_result_t vote;
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
K
Keith Busch 已提交
56 57
	if (!pci_dev_set_io_state(dev, state) ||
		!dev->driver ||
58 59 60
		!dev->driver->err_handler ||
		!dev->driver->err_handler->error_detected) {
		/*
61 62 63 64
		 * If any device in the subtree does not have an error_detected
		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
		 * error callbacks of "any" device in the subtree, and will
		 * exit in the disconnected error state.
65
		 */
66
		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
67
			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
68
			pci_info(dev, "can't recover (no error_detected callback)\n");
69
		} else {
70
			vote = PCI_ERS_RESULT_NONE;
71
		}
72 73
	} else {
		err_handler = dev->driver->err_handler;
K
Keith Busch 已提交
74
		vote = err_handler->error_detected(dev, state);
75
	}
76
	pci_uevent_ers(dev, vote);
K
Keith Busch 已提交
77
	*result = merge_result(*result, vote);
78 79 80 81
	device_unlock(&dev->dev);
	return 0;
}

K
Keith Busch 已提交
82 83 84 85 86 87 88 89 90 91
static int report_frozen_detected(struct pci_dev *dev, void *data)
{
	return report_error_detected(dev, pci_channel_io_frozen, data);
}

static int report_normal_detected(struct pci_dev *dev, void *data)
{
	return report_error_detected(dev, pci_channel_io_normal, data);
}

92 93
static int report_mmio_enabled(struct pci_dev *dev, void *data)
{
K
Keith Busch 已提交
94
	pci_ers_result_t vote, *result = data;
95 96 97 98 99 100 101 102 103 104
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
	if (!dev->driver ||
		!dev->driver->err_handler ||
		!dev->driver->err_handler->mmio_enabled)
		goto out;

	err_handler = dev->driver->err_handler;
	vote = err_handler->mmio_enabled(dev);
K
Keith Busch 已提交
105
	*result = merge_result(*result, vote);
106 107 108 109 110 111 112
out:
	device_unlock(&dev->dev);
	return 0;
}

static int report_slot_reset(struct pci_dev *dev, void *data)
{
K
Keith Busch 已提交
113
	pci_ers_result_t vote, *result = data;
114 115 116 117 118 119 120 121 122 123
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
	if (!dev->driver ||
		!dev->driver->err_handler ||
		!dev->driver->err_handler->slot_reset)
		goto out;

	err_handler = dev->driver->err_handler;
	vote = err_handler->slot_reset(dev);
K
Keith Busch 已提交
124
	*result = merge_result(*result, vote);
125 126 127 128 129 130 131 132 133 134
out:
	device_unlock(&dev->dev);
	return 0;
}

static int report_resume(struct pci_dev *dev, void *data)
{
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
K
Keith Busch 已提交
135 136
	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
		!dev->driver ||
137 138 139 140 141 142 143
		!dev->driver->err_handler ||
		!dev->driver->err_handler->resume)
		goto out;

	err_handler = dev->driver->err_handler;
	err_handler->resume(dev);
out:
144
	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
145 146 147 148 149 150 151 152 153 154 155 156 157
	device_unlock(&dev->dev);
	return 0;
}

/**
 * default_reset_link - default reset function
 * @dev: pointer to pci_dev data structure
 *
 * Invoked when performing link reset on a Downstream Port or a
 * Root Port with no aer driver.
 */
static pci_ers_result_t default_reset_link(struct pci_dev *dev)
{
158 159
	int rc;

160
	rc = pci_bus_error_reset(dev);
161
	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
162
	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
163 164
}

165
static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
166 167 168 169
{
	pci_ers_result_t status;
	struct pcie_port_service_driver *driver = NULL;

170
	driver = pcie_port_find_service(dev, service);
171
	if (driver && driver->reset_link) {
172
		status = driver->reset_link(dev);
173
	} else if (pcie_downstream_port(dev)) {
174
		status = default_reset_link(dev);
175 176
	} else {
		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
177
			pci_name(dev));
178 179 180 181 182
		return PCI_ERS_RESULT_DISCONNECT;
	}

	if (status != PCI_ERS_RESULT_RECOVERED) {
		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
183
			pci_name(dev));
184 185 186 187 188 189
		return PCI_ERS_RESULT_DISCONNECT;
	}

	return status;
}

190 191
void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
		      u32 service)
192
{
K
Keith Busch 已提交
193 194
	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
	struct pci_bus *bus;
195

196 197 198 199 200 201 202
	/*
	 * Error recovery runs on all subordinates of the first downstream port.
	 * If the downstream port detected the error, it is cleared at the end.
	 */
	if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
	      pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
		dev = dev->bus->self;
K
Keith Busch 已提交
203
	bus = dev->subordinate;
204

K
Keith Busch 已提交
205 206 207 208 209
	pci_dbg(dev, "broadcast error_detected message\n");
	if (state == pci_channel_io_frozen)
		pci_walk_bus(bus, report_frozen_detected, &status);
	else
		pci_walk_bus(bus, report_normal_detected, &status);
210

211 212 213 214
	if (state == pci_channel_io_frozen &&
	    reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
		goto failed;

K
Keith Busch 已提交
215 216 217 218 219
	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast mmio_enabled message\n");
		pci_walk_bus(bus, report_mmio_enabled, &status);
	}
220 221 222 223 224 225 226

	if (status == PCI_ERS_RESULT_NEED_RESET) {
		/*
		 * TODO: Should call platform-specific
		 * functions to reset slot before calling
		 * drivers' slot_reset callbacks?
		 */
K
Keith Busch 已提交
227 228 229
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast slot_reset message\n");
		pci_walk_bus(bus, report_slot_reset, &status);
230 231 232 233 234
	}

	if (status != PCI_ERS_RESULT_RECOVERED)
		goto failed;

K
Keith Busch 已提交
235 236
	pci_dbg(dev, "broadcast resume message\n");
	pci_walk_bus(bus, report_resume, &status);
237

238 239
	pci_aer_clear_device_status(dev);
	pci_cleanup_aer_uncorrect_error_status(dev);
240
	pci_info(dev, "device recovery successful\n");
241 242 243 244 245 246
	return;

failed:
	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);

	/* TODO: Should kernel panic here? */
247
	pci_info(dev, "device recovery failed\n");
248
}