edac_pci_sysfs.c 15.7 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written Doug Thompson <norsk5@xmission.com>
 *
 */
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/ctype.h>

13
#include "edac_core.h"
14 15 16
#include "edac_module.h"

#ifdef CONFIG_PCI
17 18 19 20

#define EDAC_PCI_SYMLINK	"device"

static int check_pci_errors = 0;	/* default YES check PCI parity */
D
Dave Jiang 已提交
21 22 23
static int edac_pci_panic_on_pe = 0;	/* default no panic on PCI Parity */
static int edac_pci_log_pe = 1;	/* log PCI parity errors */
static int edac_pci_log_npe = 1;	/* log PCI non-parity error errors */
24
static atomic_t pci_parity_count = ATOMIC_INIT(0);
25
static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
D
Dave Jiang 已提交
26
static int edac_pci_poll_msec = 1000;
27

28
static struct kobject edac_pci_kobj;	/* /sys/devices/system/edac/pci */
29
static struct completion edac_pci_kobj_complete;
30 31
static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);

D
Dave Jiang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
int edac_pci_get_check_errors(void)
{
	return check_pci_errors;
}

int edac_pci_get_log_pe(void)
{
	return edac_pci_log_pe;
}

int edac_pci_get_log_npe(void)
{
	return edac_pci_log_npe;
}

int edac_pci_get_panic_on_pe(void)
{
	return edac_pci_panic_on_pe;
}

int edac_pci_get_poll_msec(void)
{
	return edac_pci_poll_msec;
}

57 58 59
/**************************** EDAC PCI sysfs instance *******************/
static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data)
{
60
	return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count));
61 62 63
}

static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci,
64
				       char *data)
65
{
66
	return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count));
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
}

#define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj)
#define to_instance_attr(a) container_of(a, struct instance_attribute, attr)

/* DEVICE instance kobject release() function */
static void edac_pci_instance_release(struct kobject *kobj)
{
	struct edac_pci_ctl_info *pci;

	debugf1("%s()\n", __func__);

	pci = to_instance(kobj);
	complete(&pci->kobj_complete);
}

/* instance specific attribute structure */
struct instance_attribute {
85 86 87
	struct attribute attr;
	 ssize_t(*show) (struct edac_pci_ctl_info *, char *);
	 ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t);
88 89 90 91
};

/* Function to 'show' fields from the edac_pci 'instance' structure */
static ssize_t edac_pci_instance_show(struct kobject *kobj,
92
				      struct attribute *attr, char *buffer)
93
{
94 95
	struct edac_pci_ctl_info *pci = to_instance(kobj);
	struct instance_attribute *instance_attr = to_instance_attr(attr);
96

97 98 99
	if (instance_attr->show)
		return instance_attr->show(pci, buffer);
	return -EIO;
100 101 102 103
}

/* Function to 'store' fields into the edac_pci 'instance' structure */
static ssize_t edac_pci_instance_store(struct kobject *kobj,
104 105
				       struct attribute *attr,
				       const char *buffer, size_t count)
106
{
107 108
	struct edac_pci_ctl_info *pci = to_instance(kobj);
	struct instance_attribute *instance_attr = to_instance_attr(attr);
109

110 111 112
	if (instance_attr->store)
		return instance_attr->store(pci, buffer, count);
	return -EIO;
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
}

static struct sysfs_ops pci_instance_ops = {
	.show = edac_pci_instance_show,
	.store = edac_pci_instance_store
};

#define INSTANCE_ATTR(_name, _mode, _show, _store)	\
static struct instance_attribute attr_instance_##_name = {	\
	.attr	= {.name = __stringify(_name), .mode = _mode },	\
	.show	= _show,					\
	.store	= _store,					\
};

INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);

/* pci instance attributes */
static struct instance_attribute *pci_instance_attr[] = {
	&attr_instance_pe_count,
	&attr_instance_npe_count,
	NULL
};
136

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
/* the ktype for pci instance */
static struct kobj_type ktype_pci_instance = {
	.release = edac_pci_instance_release,
	.sysfs_ops = &pci_instance_ops,
	.default_attrs = (struct attribute **)pci_instance_attr,
};

static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
{
	int err;

	pci->kobj.parent = &edac_pci_kobj;
	pci->kobj.ktype = &ktype_pci_instance;

	err = kobject_set_name(&pci->kobj, "pci%d", idx);
	if (err)
		return err;

	err = kobject_register(&pci->kobj);
	if (err != 0) {
		debugf2("%s() failed to register instance pci%d\n",
158
			__func__, idx);
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
		return err;
	}

	debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx);

	return 0;
}

static void
edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
{
	init_completion(&pci->kobj_complete);
	kobject_unregister(&pci->kobj);
	wait_for_completion(&pci->kobj_complete);
}

/***************************** EDAC PCI sysfs root **********************/
#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
178 179 180 181

static ssize_t edac_pci_int_show(void *ptr, char *buffer)
{
	int *value = ptr;
182
	return sprintf(buffer, "%d\n", *value);
183 184 185 186 187 188 189
}

static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
{
	int *value = ptr;

	if (isdigit(*buffer))
190
		*value = simple_strtoul(buffer, NULL, 0);
191 192 193 194 195 196 197

	return count;
}

struct edac_pci_dev_attribute {
	struct attribute attr;
	void *value;
198 199
	 ssize_t(*show) (void *, char *);
	 ssize_t(*store) (void *, const char *, size_t);
200 201 202 203
};

/* Set of show/store abstract level functions for PCI Parity object */
static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
204
				 char *buffer)
205 206
{
	struct edac_pci_dev_attribute *edac_pci_dev;
207
	edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
208 209 210 211 212 213 214

	if (edac_pci_dev->show)
		return edac_pci_dev->show(edac_pci_dev->value, buffer);
	return -EIO;
}

static ssize_t edac_pci_dev_store(struct kobject *kobj,
215 216
				  struct attribute *attr, const char *buffer,
				  size_t count)
217 218
{
	struct edac_pci_dev_attribute *edac_pci_dev;
219
	edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
220 221 222 223 224 225 226

	if (edac_pci_dev->show)
		return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
	return -EIO;
}

static struct sysfs_ops edac_pci_sysfs_ops = {
227 228
	.show = edac_pci_dev_show,
	.store = edac_pci_dev_store
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
};

#define EDAC_PCI_ATTR(_name,_mode,_show,_store)			\
static struct edac_pci_dev_attribute edac_pci_attr_##_name = {		\
	.attr = {.name = __stringify(_name), .mode = _mode },	\
	.value  = &_name,					\
	.show   = _show,					\
	.store  = _store,					\
};

#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store)	\
static struct edac_pci_dev_attribute edac_pci_attr_##_name = {		\
	.attr = {.name = __stringify(_name), .mode = _mode },	\
	.value  = _data,					\
	.show   = _show,					\
	.store  = _store,					\
};

/* PCI Parity control files */
248 249 250 251 252 253 254 255
EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show,
	      edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
	      edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show,
	      edac_pci_int_store);
EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show,
	      edac_pci_int_store);
256
EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
257
EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
258 259 260

/* Base Attributes of the memory ECC object */
static struct edac_pci_dev_attribute *edac_pci_attr[] = {
261
	&edac_pci_attr_check_pci_errors,
D
Dave Jiang 已提交
262 263 264
	&edac_pci_attr_edac_pci_log_pe,
	&edac_pci_attr_edac_pci_log_npe,
	&edac_pci_attr_edac_pci_panic_on_pe,
265
	&edac_pci_attr_pci_parity_count,
266
	&edac_pci_attr_pci_nonparity_count,
267 268 269 270 271 272
	NULL,
};

/* No memory to release */
static void edac_pci_release(struct kobject *kobj)
{
273 274 275 276
	struct edac_pci_ctl_info *pci;

	pci = to_edacpci(kobj);

277
	debugf1("%s()\n", __func__);
278
	complete(&pci->kobj_complete);
279 280 281 282 283
}

static struct kobj_type ktype_edac_pci = {
	.release = edac_pci_release,
	.sysfs_ops = &edac_pci_sysfs_ops,
284
	.default_attrs = (struct attribute **)edac_pci_attr,
285 286 287 288 289 290 291 292
};

/**
 * edac_sysfs_pci_setup()
 *
 *	setup the sysfs for EDAC PCI attributes
 *	assumes edac_class has already been initialized
 */
293
int edac_pci_register_main_kobj(void)
294 295 296 297 298 299 300
{
	int err;
	struct sysdev_class *edac_class;

	debugf1("%s()\n", __func__);

	edac_class = edac_get_edac_class();
301 302 303 304
	if (edac_class == NULL) {
		debugf1("%s() no edac_class\n", __func__);
		return -ENODEV;
	}
305 306

	edac_pci_kobj.ktype = &ktype_edac_pci;
307 308 309

	edac_pci_kobj.parent = &edac_class->kset.kobj;

310
	err = kobject_set_name(&edac_pci_kobj, "pci");
311
	if (err)
312
		return err;
313

314 315 316
	/* Instanstiate the pci object */
	/* FIXME: maybe new sysdev_create_subdir() */
	err = kobject_register(&edac_pci_kobj);
317

318 319 320
	if (err) {
		debugf1("Failed to register '.../edac/pci'\n");
		return err;
321 322
	}

323 324 325
	debugf1("Registered '.../edac/pci' kobject\n");

	return 0;
326 327 328
}

/*
329
 * edac_pci_unregister_main_kobj()
330 331 332
 *
 *	perform the sysfs teardown for the PCI attributes
 */
333
void edac_pci_unregister_main_kobj(void)
334 335 336 337 338 339 340
{
	debugf0("%s()\n", __func__);
	init_completion(&edac_pci_kobj_complete);
	kobject_unregister(&edac_pci_kobj);
	wait_for_completion(&edac_pci_kobj_complete);
}

341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
{
	int err;
	struct kobject *edac_kobj = &pci->kobj;

	if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) {
		err = edac_pci_register_main_kobj();
		if (err) {
			atomic_dec(&edac_pci_sysfs_refcount);
			return err;
		}
	}

	err = edac_pci_create_instance_kobj(pci, pci->pci_idx);
	if (err) {
		if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0)
			edac_pci_unregister_main_kobj();
	}
359

360 361
	debugf0("%s() idx=%d\n", __func__, pci->pci_idx);

362
	err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
363 364
	if (err) {
		debugf0("%s() sysfs_create_link() returned err= %d\n",
365
			__func__, err);
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
		return err;
	}

	return 0;
}

void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
{
	debugf0("%s()\n", __func__);

	edac_pci_delete_instance_kobj(pci, pci->pci_idx);

	sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);

	if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0)
		edac_pci_unregister_main_kobj();
}

/************************ PCI error handling *************************/
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
{
	int where;
	u16 status;

	where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
	pci_read_config_word(dev, where, &status);

	/* If we get back 0xFFFF then we must suspect that the card has been
	 * pulled but the Linux PCI layer has not yet finished cleaning up.
	 * We don't want to report on such devices
	 */

	if (status == 0xFFFF) {
		u32 sanity;

		pci_read_config_dword(dev, 0, &sanity);

		if (sanity == 0xFFFFFFFF)
			return 0;
	}

	status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
408
	    PCI_STATUS_PARITY;
409 410 411 412 413 414 415 416

	if (status)
		/* reset only the bits we are interested in */
		pci_write_config_word(dev, where, status);

	return status;
}

417
typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev);
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439

/* Clear any PCI parity errors logged by this device. */
static void edac_pci_dev_parity_clear(struct pci_dev *dev)
{
	u8 header_type;

	get_pci_parity_status(dev, 0);

	/* read the device TYPE, looking for bridges */
	pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);

	if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
		get_pci_parity_status(dev, 1);
}

/*
 *  PCI Parity polling
 *
 */
static void edac_pci_dev_parity_test(struct pci_dev *dev)
{
	u16 status;
440
	u8 header_type;
441 442 443 444 445

	/* read the STATUS register on this device
	 */
	status = get_pci_parity_status(dev, 0);

446
	debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
447 448 449

	/* check the status reg for errors */
	if (status) {
450
		if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
451
			edac_printk(KERN_CRIT, EDAC_PCI,
452 453
				    "Signaled System Error on %s\n",
				    pci_name(dev));
454 455
			atomic_inc(&pci_nonparity_count);
		}
456 457 458

		if (status & (PCI_STATUS_PARITY)) {
			edac_printk(KERN_CRIT, EDAC_PCI,
459 460
				    "Master Data Parity Error on %s\n",
				    pci_name(dev));
461 462 463 464 465 466

			atomic_inc(&pci_parity_count);
		}

		if (status & (PCI_STATUS_DETECTED_PARITY)) {
			edac_printk(KERN_CRIT, EDAC_PCI,
467 468
				    "Detected Parity Error on %s\n",
				    pci_name(dev));
469 470 471 472 473 474 475 476

			atomic_inc(&pci_parity_count);
		}
	}

	/* read the device TYPE, looking for bridges */
	pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);

477
	debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id);
478 479 480 481 482

	if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
		/* On bridges, need to examine secondary status register  */
		status = get_pci_parity_status(dev, 1);

483
		debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
484 485 486

		/* check the secondary status reg for errors */
		if (status) {
487
			if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) {
488
				edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
489 490
					    "Signaled System Error on %s\n",
					    pci_name(dev));
491 492
				atomic_inc(&pci_nonparity_count);
			}
493 494 495

			if (status & (PCI_STATUS_PARITY)) {
				edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
496 497
					    "Master Data Parity Error on "
					    "%s\n", pci_name(dev));
498 499 500 501 502 503

				atomic_inc(&pci_parity_count);
			}

			if (status & (PCI_STATUS_DETECTED_PARITY)) {
				edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
504 505
					    "Detected Parity Error on %s\n",
					    pci_name(dev));
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525

				atomic_inc(&pci_parity_count);
			}
		}
	}
}

/*
 * pci_dev parity list iterator
 *	Scan the PCI device list for one iteration, looking for SERRORs
 *	Master Parity ERRORS or Parity ERRORs on primary or secondary devices
 */
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
{
	struct pci_dev *dev = NULL;

	/* request for kernel access to the next PCI device, if any,
	 * and while we are looking at it have its reference count
	 * bumped until we are done with it
	 */
526
	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
		fn(dev);
	}
}

/*
 * edac_pci_do_parity_check
 *
 *	performs the actual PCI parity check operation
 */
void edac_pci_do_parity_check(void)
{
	unsigned long flags;
	int before_count;

	debugf3("%s()\n", __func__);

543
	if (!check_pci_errors)
544 545 546 547 548 549 550 551 552 553 554 555
		return;

	before_count = atomic_read(&pci_parity_count);

	/* scan all PCI devices looking for a Parity Error on devices and
	 * bridges
	 */
	local_irq_save(flags);
	edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
	local_irq_restore(flags);

	/* Only if operator has selected panic on PCI Error */
D
Dave Jiang 已提交
556
	if (edac_pci_get_panic_on_pe()) {
557 558 559 560 561 562 563 564 565 566 567 568 569
		/* If the count is different 'after' from 'before' */
		if (before_count != atomic_read(&pci_parity_count))
			panic("EDAC: PCI Parity Error");
	}
}

void edac_pci_clear_parity_errors(void)
{
	/* Clear any PCI bus parity errors that devices initially have logged
	 * in their registers.
	 */
	edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
}
570 571 572 573 574 575
void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
{

	/* global PE counter incremented by edac_pci_do_parity_check() */
	atomic_inc(&pci->counters.pe_count);

D
Dave Jiang 已提交
576
	if (edac_pci_get_log_pe())
577 578 579 580 581 582 583 584 585 586
		edac_pci_printk(pci, KERN_WARNING,
				"Parity Error ctl: %s %d: %s\n",
				pci->ctl_name, pci->pci_idx, msg);

	/*
	 * poke all PCI devices and see which one is the troublemaker
	 * panic() is called if set
	 */
	edac_pci_do_parity_check();
}
587

588
EXPORT_SYMBOL_GPL(edac_pci_handle_pe);
589

590 591 592 593 594 595
void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
{

	/* global NPE counter incremented by edac_pci_do_parity_check() */
	atomic_inc(&pci->counters.npe_count);

D
Dave Jiang 已提交
596
	if (edac_pci_get_log_npe())
597 598 599 600 601 602 603 604 605 606
		edac_pci_printk(pci, KERN_WARNING,
				"Non-Parity Error ctl: %s %d: %s\n",
				pci->ctl_name, pci->pci_idx, msg);

	/*
	 * poke all PCI devices and see which one is the troublemaker
	 * panic() is called if set
	 */
	edac_pci_do_parity_check();
}
607

608
EXPORT_SYMBOL_GPL(edac_pci_handle_npe);
609 610 611 612

/*
 * Define the PCI parameter to the module
 */
613
module_param(check_pci_errors, int, 0644);
D
Dave Jiang 已提交
614
MODULE_PARM_DESC(check_pci_errors,
615
		 "Check for PCI bus parity errors: 0=off 1=on");
D
Dave Jiang 已提交
616 617
module_param(edac_pci_panic_on_pe, int, 0644);
MODULE_PARM_DESC(edac_pci_panic_on_pe,
618
		 "Panic on PCI Bus Parity error: 0=off 1=on");
619

620
#endif				/* CONFIG_PCI */