aerdrv_errprint.c 7.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Format error messages and print them to console.
 *
 * Copyright (C) 2006 Intel Corp.
 *	Tom Long Nguyen (tom.l.nguyen@intel.com)
 *	Zhang Yanmin (yanmin.zhang@intel.com)
 */

#include <linux/module.h>
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/pm.h>
#include <linux/suspend.h>
16
#include <linux/cper.h>
17 18

#include "aerdrv.h"
19
#include <ras/ras_event.h>
L
Lance Ortiz 已提交
20

21 22 23 24 25
#define AER_AGENT_RECEIVER		0
#define AER_AGENT_REQUESTER		1
#define AER_AGENT_COMPLETER		2
#define AER_AGENT_TRANSMITTER		3

26 27 28 29 30 31
#define AER_AGENT_REQUESTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
#define AER_AGENT_COMPLETER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	0 : PCI_ERR_UNC_COMP_ABORT)
#define AER_AGENT_TRANSMITTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	(PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
32 33

#define AER_GET_AGENT(t, e)						\
34 35 36
	((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER :	\
	(e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER :	\
	(e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER :	\
37 38 39 40 41 42
	AER_AGENT_RECEIVER)

#define AER_PHYSICAL_LAYER_ERROR	0
#define AER_DATA_LINK_LAYER_ERROR	1
#define AER_TRANSACTION_LAYER_ERROR	2

43 44 45 46 47 48 49 50 51 52 53 54
#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
	PCI_ERR_COR_RCVR : 0)
#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
	(PCI_ERR_COR_BAD_TLP|						\
	PCI_ERR_COR_BAD_DLLP|						\
	PCI_ERR_COR_REP_ROLL|						\
	PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)

#define AER_GET_LAYER_ERROR(t, e)					\
	((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
	(e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
	AER_TRANSACTION_LAYER_ERROR)
55 56 57 58

/*
 * AER error strings
 */
59
static const char *aer_error_severity_string[] = {
60 61 62 63 64
	"Uncorrected (Non-Fatal)",
	"Uncorrected (Fatal)",
	"Corrected"
};

65
static const char *aer_error_layer[] = {
66 67 68 69
	"Physical Layer",
	"Data Link Layer",
	"Transaction Layer"
};
70 71 72

static const char *aer_correctable_error_string[] = {
	"Receiver Error",		/* Bit Position 0	*/
73 74 75 76 77
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
78 79 80
	"Bad TLP",			/* Bit Position 6	*/
	"Bad DLLP",			/* Bit Position 7	*/
	"RELAY_NUM Rollover",		/* Bit Position 8	*/
81 82 83
	NULL,
	NULL,
	NULL,
84 85
	"Replay Timer Timeout",		/* Bit Position 12	*/
	"Advisory Non-Fatal",		/* Bit Position 13	*/
86 87
	"Corrected Internal Error",	/* Bit Position 14	*/
	"Header Log Overflow",		/* Bit Position 15	*/
88 89
};

90
static const char *aer_uncorrectable_error_string[] = {
91
	"Undefined",			/* Bit Position 0	*/
92 93 94
	NULL,
	NULL,
	NULL,
95
	"Data Link Protocol",		/* Bit Position 4	*/
96
	"Surprise Down Error",		/* Bit Position 5	*/
97 98 99 100 101 102
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
103 104 105 106 107 108 109 110 111
	"Poisoned TLP",			/* Bit Position 12	*/
	"Flow Control Protocol",	/* Bit Position 13	*/
	"Completion Timeout",		/* Bit Position 14	*/
	"Completer Abort",		/* Bit Position 15	*/
	"Unexpected Completion",	/* Bit Position 16	*/
	"Receiver Overflow",		/* Bit Position 17	*/
	"Malformed TLP",		/* Bit Position 18	*/
	"ECRC",				/* Bit Position 19	*/
	"Unsupported Request",		/* Bit Position 20	*/
112 113 114 115 116
	"ACS Violation",		/* Bit Position 21	*/
	"Uncorrectable Internal Error",	/* Bit Position 22	*/
	"MC Blocked TLP",		/* Bit Position 23	*/
	"AtomicOp Egress Blocked",	/* Bit Position 24	*/
	"TLP Prefix Blocked Error",	/* Bit Position 25	*/
117 118
};

119
static const char *aer_agent_string[] = {
120 121 122 123 124 125
	"Receiver ID",
	"Requester ID",
	"Completer ID",
	"Transmitter ID"
};

126 127 128
static void __print_tlp_header(struct pci_dev *dev,
			       struct aer_header_log_regs *t)
{
129
	pci_err(dev, "  TLP Header: %08x %08x %08x %08x\n",
130
		t->dw0, t->dw1, t->dw2, t->dw3);
131 132
}

L
Lance Ortiz 已提交
133
static void __aer_print_error(struct pci_dev *dev,
134
			      struct aer_err_info *info)
135
{
136
	int i, status;
137
	const char *errmsg = NULL;
138 139
	status = (info->status & ~info->mask);

140
	for (i = 0; i < 32; i++) {
141
		if (!(status & (1 << i)))
142 143
			continue;

144
		if (info->severity == AER_CORRECTABLE)
145 146
			errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
				aer_correctable_error_string[i] : NULL;
147
		else
148 149
			errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
				aer_uncorrectable_error_string[i] : NULL;
150

151
		if (errmsg)
152
			pci_err(dev, "   [%2d] %-22s%s\n", i, errmsg,
153
				info->first_error == i ? " (First)" : "");
154
		else
155
			pci_err(dev, "   [%2d] Unknown Error Bit%s\n",
L
Lance Ortiz 已提交
156
				i, info->first_error == i ? " (First)" : "");
157 158 159 160 161
	}
}

void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
162
	int layer, agent;
163
	int id = ((dev->bus->number << 8) | dev->devfn);
164

165
	if (!info->status) {
166 167
		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
			aer_error_severity_string[info->severity]);
168 169
		goto out;
	}
170

171 172
	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
	agent = AER_GET_AGENT(info->severity, info->status);
173

174
	pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
175
		aer_error_severity_string[info->severity],
176
		aer_error_layer[layer], aer_agent_string[agent]);
177

178
	pci_err(dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
179 180
		dev->vendor, dev->device,
		info->status, info->mask);
181

182
	__aer_print_error(dev, info);
183

184 185
	if (info->tlp_header_valid)
		__print_tlp_header(dev, &info->tlp);
186

187
out:
188
	if (info->id && info->error_dev_num > 1 && info->id == id)
189
		pci_err(dev, "  Error of this Agent is reported first\n");
190

L
Lance Ortiz 已提交
191
	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
192
			info->severity, info->tlp_header_valid, &info->tlp);
193 194 195 196
}

void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
{
197 198 199 200
	u8 bus = info->id >> 8;
	u8 devfn = info->id & 0xff;

	pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
201
		info->multi_error_valid ? "Multiple " : "",
202 203
		aer_error_severity_string[info->severity],
		pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
204
}
205 206

#ifdef CONFIG_ACPI_APEI_PCIEAER
207
int cper_severity_to_aer(int cper_severity)
208 209 210 211 212 213 214 215 216 217
{
	switch (cper_severity) {
	case CPER_SEV_RECOVERABLE:
		return AER_NONFATAL;
	case CPER_SEV_FATAL:
		return AER_FATAL;
	default:
		return AER_CORRECTABLE;
	}
}
218
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
219

220
void cper_print_aer(struct pci_dev *dev, int aer_severity,
221 222
		    struct aer_capability_regs *aer)
{
223
	int layer, agent, tlp_header_valid = 0;
224
	u32 status, mask;
225
	struct aer_err_info info;
226 227 228 229 230 231 232 233 234

	if (aer_severity == AER_CORRECTABLE) {
		status = aer->cor_status;
		mask = aer->cor_mask;
	} else {
		status = aer->uncor_status;
		mask = aer->uncor_mask;
		tlp_header_valid = status & AER_LOG_TLP_MASKS;
	}
235

236 237
	layer = AER_GET_LAYER_ERROR(aer_severity, status);
	agent = AER_GET_AGENT(aer_severity, status);
238

239 240 241 242 243 244
	memset(&info, 0, sizeof(info));
	info.severity = aer_severity;
	info.status = status;
	info.mask = mask;
	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);

245
	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
246
	__aer_print_error(dev, &info);
247
	pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
248 249
		aer_error_layer[layer], aer_agent_string[agent]);

250
	if (aer_severity != AER_CORRECTABLE)
251
		pci_err(dev, "aer_uncor_severity: 0x%08x\n",
252
			aer->uncor_severity);
253 254 255 256

	if (tlp_header_valid)
		__print_tlp_header(dev, &aer->header_log);

L
Lance Ortiz 已提交
257
	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
258
			aer_severity, tlp_header_valid, &aer->header_log);
259 260
}
#endif