err.c 5.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
// SPDX-License-Identifier: GPL-2.0
/*
 * This file implements the error recovery as a core part of PCIe error
 * reporting. When a PCIe error is delivered, an error message will be
 * collected and printed to console, then, an error recovery procedure
 * will be executed by following the PCI error recovery rules.
 *
 * Copyright (C) 2006 Intel Corp.
 *	Tom Long Nguyen (tom.l.nguyen@intel.com)
 *	Zhang Yanmin (yanmin.zhang@intel.com)
 */

13 14
#define dev_fmt(fmt) "AER: " fmt

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/aer.h>
#include "portdrv.h"
#include "../pci.h"

static pci_ers_result_t merge_result(enum pci_ers_result orig,
				  enum pci_ers_result new)
{
	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
		return PCI_ERS_RESULT_NO_AER_DRIVER;

	if (new == PCI_ERS_RESULT_NONE)
		return orig;

	switch (orig) {
	case PCI_ERS_RESULT_CAN_RECOVER:
	case PCI_ERS_RESULT_RECOVERED:
		orig = new;
		break;
	case PCI_ERS_RESULT_DISCONNECT:
		if (new == PCI_ERS_RESULT_NEED_RESET)
			orig = PCI_ERS_RESULT_NEED_RESET;
		break;
	default:
		break;
	}

	return orig;
}

K
Keith Busch 已提交
48
static int report_error_detected(struct pci_dev *dev,
49
				 pci_channel_state_t state,
K
Keith Busch 已提交
50
				 enum pci_ers_result *result)
51 52 53 54 55
{
	pci_ers_result_t vote;
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
K
Keith Busch 已提交
56 57
	if (!pci_dev_set_io_state(dev, state) ||
		!dev->driver ||
58 59 60
		!dev->driver->err_handler ||
		!dev->driver->err_handler->error_detected) {
		/*
61 62 63 64
		 * If any device in the subtree does not have an error_detected
		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
		 * error callbacks of "any" device in the subtree, and will
		 * exit in the disconnected error state.
65
		 */
66
		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
67
			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
68
			pci_info(dev, "can't recover (no error_detected callback)\n");
69
		} else {
70
			vote = PCI_ERS_RESULT_NONE;
71
		}
72 73
	} else {
		err_handler = dev->driver->err_handler;
K
Keith Busch 已提交
74
		vote = err_handler->error_detected(dev, state);
75
	}
76
	pci_uevent_ers(dev, vote);
K
Keith Busch 已提交
77
	*result = merge_result(*result, vote);
78 79 80 81
	device_unlock(&dev->dev);
	return 0;
}

K
Keith Busch 已提交
82 83 84 85 86 87 88 89 90 91
static int report_frozen_detected(struct pci_dev *dev, void *data)
{
	return report_error_detected(dev, pci_channel_io_frozen, data);
}

static int report_normal_detected(struct pci_dev *dev, void *data)
{
	return report_error_detected(dev, pci_channel_io_normal, data);
}

92 93
static int report_mmio_enabled(struct pci_dev *dev, void *data)
{
K
Keith Busch 已提交
94
	pci_ers_result_t vote, *result = data;
95 96 97 98 99 100 101 102 103 104
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
	if (!dev->driver ||
		!dev->driver->err_handler ||
		!dev->driver->err_handler->mmio_enabled)
		goto out;

	err_handler = dev->driver->err_handler;
	vote = err_handler->mmio_enabled(dev);
K
Keith Busch 已提交
105
	*result = merge_result(*result, vote);
106 107 108 109 110 111 112
out:
	device_unlock(&dev->dev);
	return 0;
}

static int report_slot_reset(struct pci_dev *dev, void *data)
{
K
Keith Busch 已提交
113
	pci_ers_result_t vote, *result = data;
114 115 116 117 118 119 120 121 122 123
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
	if (!dev->driver ||
		!dev->driver->err_handler ||
		!dev->driver->err_handler->slot_reset)
		goto out;

	err_handler = dev->driver->err_handler;
	vote = err_handler->slot_reset(dev);
K
Keith Busch 已提交
124
	*result = merge_result(*result, vote);
125 126 127 128 129 130 131 132 133 134
out:
	device_unlock(&dev->dev);
	return 0;
}

static int report_resume(struct pci_dev *dev, void *data)
{
	const struct pci_error_handlers *err_handler;

	device_lock(&dev->dev);
K
Keith Busch 已提交
135 136
	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
		!dev->driver ||
137 138 139 140 141 142 143
		!dev->driver->err_handler ||
		!dev->driver->err_handler->resume)
		goto out;

	err_handler = dev->driver->err_handler;
	err_handler->resume(dev);
out:
144
	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
145 146 147 148
	device_unlock(&dev->dev);
	return 0;
}

149
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
150 151
		pci_channel_state_t state,
		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
152
{
153
	int type = pci_pcie_type(dev);
154
	struct pci_dev *bridge;
K
Keith Busch 已提交
155
	struct pci_bus *bus;
156
	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
157

158
	/*
159 160
	 * Error recovery runs on all subordinates of the bridge.  If the
	 * bridge detected the error, it is cleared at the end.
161
	 */
162 163
	if (type == PCI_EXP_TYPE_ROOT_PORT ||
	    type == PCI_EXP_TYPE_DOWNSTREAM)
164
		bridge = dev;
165 166
	else
		bridge = pci_upstream_bridge(dev);
167

168 169
	bus = bridge->subordinate;
	pci_dbg(bridge, "broadcast error_detected message\n");
170
	if (state == pci_channel_io_frozen) {
K
Keith Busch 已提交
171
		pci_walk_bus(bus, report_frozen_detected, &status);
172
		status = reset_subordinates(bridge);
173
		if (status != PCI_ERS_RESULT_RECOVERED) {
174
			pci_warn(bridge, "subordinate device reset failed\n");
175
			goto failed;
176
		}
177
	} else {
K
Keith Busch 已提交
178
		pci_walk_bus(bus, report_normal_detected, &status);
179
	}
180

K
Keith Busch 已提交
181 182
	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
		status = PCI_ERS_RESULT_RECOVERED;
183
		pci_dbg(bridge, "broadcast mmio_enabled message\n");
K
Keith Busch 已提交
184 185
		pci_walk_bus(bus, report_mmio_enabled, &status);
	}
186 187 188 189 190 191 192

	if (status == PCI_ERS_RESULT_NEED_RESET) {
		/*
		 * TODO: Should call platform-specific
		 * functions to reset slot before calling
		 * drivers' slot_reset callbacks?
		 */
K
Keith Busch 已提交
193
		status = PCI_ERS_RESULT_RECOVERED;
194
		pci_dbg(bridge, "broadcast slot_reset message\n");
K
Keith Busch 已提交
195
		pci_walk_bus(bus, report_slot_reset, &status);
196 197 198 199 200
	}

	if (status != PCI_ERS_RESULT_RECOVERED)
		goto failed;

201
	pci_dbg(bridge, "broadcast resume message\n");
K
Keith Busch 已提交
202
	pci_walk_bus(bus, report_resume, &status);
203

204 205 206 207
	if (pcie_aer_is_native(bridge))
		pcie_clear_device_status(bridge);
	pci_aer_clear_nonfatal_status(bridge);
	pci_info(bridge, "device recovery successful\n");
208
	return status;
209 210

failed:
211
	pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
212 213

	/* TODO: Should kernel panic here? */
214
	pci_info(bridge, "device recovery failed\n");
215 216

	return status;
217
}