aerdrv_errprint.c 7.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * drivers/pci/pcie/aer/aerdrv_errprint.c
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Format error messages and print them to console.
 *
 * Copyright (C) 2006 Intel Corp.
 *	Tom Long Nguyen (tom.l.nguyen@intel.com)
 *	Zhang Yanmin (yanmin.zhang@intel.com)
 *
 */

#include <linux/module.h>
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/pm.h>
#include <linux/suspend.h>
22
#include <linux/cper.h>
23 24 25

#include "aerdrv.h"

L
Lance Ortiz 已提交
26 27 28
#define CREATE_TRACE_POINTS
#include <trace/events/ras.h>

29 30 31 32 33
#define AER_AGENT_RECEIVER		0
#define AER_AGENT_REQUESTER		1
#define AER_AGENT_COMPLETER		2
#define AER_AGENT_TRANSMITTER		3

34 35 36 37 38 39
#define AER_AGENT_REQUESTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
#define AER_AGENT_COMPLETER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	0 : PCI_ERR_UNC_COMP_ABORT)
#define AER_AGENT_TRANSMITTER_MASK(t)	((t == AER_CORRECTABLE) ?	\
	(PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
40 41

#define AER_GET_AGENT(t, e)						\
42 43 44
	((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER :	\
	(e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER :	\
	(e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER :	\
45 46 47 48 49 50
	AER_AGENT_RECEIVER)

#define AER_PHYSICAL_LAYER_ERROR	0
#define AER_DATA_LINK_LAYER_ERROR	1
#define AER_TRANSACTION_LAYER_ERROR	2

51 52 53 54 55 56 57 58 59 60 61 62
#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
	PCI_ERR_COR_RCVR : 0)
#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ?	\
	(PCI_ERR_COR_BAD_TLP|						\
	PCI_ERR_COR_BAD_DLLP|						\
	PCI_ERR_COR_REP_ROLL|						\
	PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)

#define AER_GET_LAYER_ERROR(t, e)					\
	((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
	(e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
	AER_TRANSACTION_LAYER_ERROR)
63 64 65 66

/*
 * AER error strings
 */
67
static const char *aer_error_severity_string[] = {
68 69 70 71 72
	"Uncorrected (Non-Fatal)",
	"Uncorrected (Fatal)",
	"Corrected"
};

73
static const char *aer_error_layer[] = {
74 75 76 77
	"Physical Layer",
	"Data Link Layer",
	"Transaction Layer"
};
78 79 80

static const char *aer_correctable_error_string[] = {
	"Receiver Error",		/* Bit Position 0	*/
81 82 83 84 85
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
86 87 88
	"Bad TLP",			/* Bit Position 6	*/
	"Bad DLLP",			/* Bit Position 7	*/
	"RELAY_NUM Rollover",		/* Bit Position 8	*/
89 90 91
	NULL,
	NULL,
	NULL,
92 93
	"Replay Timer Timeout",		/* Bit Position 12	*/
	"Advisory Non-Fatal",		/* Bit Position 13	*/
94 95
};

96
static const char *aer_uncorrectable_error_string[] = {
97 98 99 100
	NULL,
	NULL,
	NULL,
	NULL,
101
	"Data Link Protocol",		/* Bit Position 4	*/
102 103 104 105 106 107 108
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
109 110 111 112 113 114 115 116 117
	"Poisoned TLP",			/* Bit Position 12	*/
	"Flow Control Protocol",	/* Bit Position 13	*/
	"Completion Timeout",		/* Bit Position 14	*/
	"Completer Abort",		/* Bit Position 15	*/
	"Unexpected Completion",	/* Bit Position 16	*/
	"Receiver Overflow",		/* Bit Position 17	*/
	"Malformed TLP",		/* Bit Position 18	*/
	"ECRC",				/* Bit Position 19	*/
	"Unsupported Request",		/* Bit Position 20	*/
118 119
};

120
static const char *aer_agent_string[] = {
121 122 123 124 125 126
	"Receiver ID",
	"Requester ID",
	"Completer ID",
	"Transmitter ID"
};

127 128
static void __aer_print_error(const char *prefix,
			      struct aer_err_info *info)
129
{
130
	int i, status;
131
	const char *errmsg = NULL;
132

133 134
	status = (info->status & ~info->mask);

135
	for (i = 0; i < 32; i++) {
136
		if (!(status & (1 << i)))
137 138
			continue;

139
		if (info->severity == AER_CORRECTABLE)
140 141
			errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
				aer_correctable_error_string[i] : NULL;
142
		else
143 144
			errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
				aer_uncorrectable_error_string[i] : NULL;
145

146
		if (errmsg)
147
			printk("%s""   [%2d] %-22s%s\n", prefix, i, errmsg,
148
				info->first_error == i ? " (First)" : "");
149
		else
150
			printk("%s""   [%2d] Unknown Error Bit%s\n", prefix, i,
151
				info->first_error == i ? " (First)" : "");
152 153 154 155 156
	}
}

void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
157
	int id = ((dev->bus->number << 8) | dev->devfn);
158 159 160 161 162
	char prefix[44];

	snprintf(prefix, sizeof(prefix), "%s%s %s: ",
		 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
		 dev_driver_string(&dev->dev), dev_name(&dev->dev));
163

164
	if (info->status == 0) {
165 166
		printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
			"id=%04x(Unregistered Agent ID)\n", prefix,
167
			aer_error_severity_string[info->severity], id);
168
	} else {
169
		int layer, agent;
170

171
		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
172 173
		agent = AER_GET_AGENT(info->severity, info->status);

174 175
		printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
			prefix, aer_error_severity_string[info->severity],
176 177
			aer_error_layer[layer], id, aer_agent_string[agent]);

178 179 180
		printk("%s""  device [%04x:%04x] error status/mask=%08x/%08x\n",
			prefix, dev->vendor, dev->device,
			info->status, info->mask);
181

182
		__aer_print_error(prefix, info);
183

H
Hidetoshi Seto 已提交
184
		if (info->tlp_header_valid) {
185
			unsigned char *tlp = (unsigned char *) &info->tlp;
186
			printk("%s""  TLP Header:"
187
				" %02x%02x%02x%02x %02x%02x%02x%02x"
188
				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
189
				prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
190 191 192 193 194 195
				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
				*(tlp + 11), *(tlp + 10), *(tlp + 9),
				*(tlp + 8), *(tlp + 15), *(tlp + 14),
				*(tlp + 13), *(tlp + 12));
		}
	}
196 197

	if (info->id && info->error_dev_num > 1 && info->id == id)
198 199
		printk("%s""  Error of this Agent(%04x) is reported first\n",
			prefix, id);
L
Lance Ortiz 已提交
200 201
	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
			info->severity);
202 203 204 205 206 207 208
}

void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
{
	dev_info(&dev->dev, "AER: %s%s error received: id=%04x\n",
		info->multi_error_valid ? "Multiple " : "",
		aer_error_severity_string[info->severity], info->id);
209
}
210 211

#ifdef CONFIG_ACPI_APEI_PCIEAER
212
int cper_severity_to_aer(int cper_severity)
213 214 215 216 217 218 219 220 221 222
{
	switch (cper_severity) {
	case CPER_SEV_RECOVERABLE:
		return AER_NONFATAL;
	case CPER_SEV_FATAL:
		return AER_FATAL;
	default:
		return AER_CORRECTABLE;
	}
}
223
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
224

L
Lance Ortiz 已提交
225
void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
		    struct aer_capability_regs *aer)
{
	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
	u32 status, mask;
	const char **status_strs;

	aer_severity = cper_severity_to_aer(cper_severity);
	if (aer_severity == AER_CORRECTABLE) {
		status = aer->cor_status;
		mask = aer->cor_mask;
		status_strs = aer_correctable_error_string;
		status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
	} else {
		status = aer->uncor_status;
		mask = aer->uncor_mask;
		status_strs = aer_uncorrectable_error_string;
		status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
		tlp_header_valid = status & AER_LOG_TLP_MASKS;
	}
	layer = AER_GET_LAYER_ERROR(aer_severity, status);
	agent = AER_GET_AGENT(aer_severity, status);
	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
	       prefix, status, mask);
	cper_print_bits(prefix, status, status_strs, status_strs_size);
	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
	       aer_error_layer[layer], aer_agent_string[agent]);
	if (aer_severity != AER_CORRECTABLE)
		printk("%s""aer_uncor_severity: 0x%08x\n",
		       prefix, aer->uncor_severity);
	if (tlp_header_valid) {
		const unsigned char *tlp;
		tlp = (const unsigned char *)&aer->header_log;
		printk("%s""aer_tlp_header:"
			" %02x%02x%02x%02x %02x%02x%02x%02x"
			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
			*(tlp + 11), *(tlp + 10), *(tlp + 9),
			*(tlp + 8), *(tlp + 15), *(tlp + 14),
			*(tlp + 13), *(tlp + 12));
	}
L
Lance Ortiz 已提交
267 268
	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
			aer_severity);
269 270
}
#endif