edac_core.h 14.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
/*
 * Defines, structures, APIs for edac_core module
 *
 * (C) 2007 Linux Networx (http://lnxi.com)
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * NMI handling support added by
 *     Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
 *
 * Refactored for multi-source files:
 *	Doug Thompson <norsk5@xmission.com>
 *
 */

#ifndef _EDAC_CORE_H_
#define _EDAC_CORE_H_

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/pci.h>
#include <linux/time.h>
#include <linux/nmi.h>
#include <linux/rcupdate.h>
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/platform_device.h>
35
#include <linux/workqueue.h>
36
#include <linux/edac.h>
37

38 39
#define EDAC_DEVICE_NAME_LEN	31
#define EDAC_ATTRIB_VALUE_LEN	15
40 41

#if PAGE_SHIFT < 20
42 43
#define PAGES_TO_MiB(pages)	((pages) >> (20 - PAGE_SHIFT))
#define MiB_TO_PAGES(mb)	((mb) << (20 - PAGE_SHIFT))
44
#else				/* PAGE_SHIFT > 20 */
45
#define PAGES_TO_MiB(pages)	((pages) << (PAGE_SHIFT - 20))
46
#define MiB_TO_PAGES(mb)	((mb) >> (PAGE_SHIFT - 20))
47 48 49 50 51 52 53 54 55 56 57
#endif

#define edac_printk(level, prefix, fmt, arg...) \
	printk(level "EDAC " prefix ": " fmt, ##arg)

#define edac_mc_printk(mci, level, fmt, arg...) \
	printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)

#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
	printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)

58 59 60
#define edac_device_printk(ctl, level, fmt, arg...) \
	printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)

61 62 63
#define edac_pci_printk(ctl, level, fmt, arg...) \
	printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)

64 65 66 67 68
/* prefixes for edac_printk() and edac_mc_printk() */
#define EDAC_MC "MC"
#define EDAC_PCI "PCI"
#define EDAC_DEBUG "DEBUG"

69 70
extern const char *edac_mem_types[];

71 72 73
#ifdef CONFIG_EDAC_DEBUG
extern int edac_debug_level;

74
#define edac_dbg(level, fmt, ...)					\
75 76 77 78 79
do {									\
	if (level <= edac_debug_level)					\
		edac_printk(KERN_DEBUG, EDAC_DEBUG,			\
			    "%s: " fmt, __func__, ##__VA_ARGS__);	\
} while (0)
80

81
#else				/* !CONFIG_EDAC_DEBUG */
82

83
#define edac_dbg(level, fmt, ...)					\
84 85 86 87 88
do {									\
	if (0)								\
		edac_printk(KERN_DEBUG, EDAC_DEBUG,			\
			    "%s: " fmt, __func__, ##__VA_ARGS__);	\
} while (0)
89

90
#endif				/* !CONFIG_EDAC_DEBUG */
91 92 93 94

#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
	PCI_DEVICE_ID_ ## vend ## _ ## dev

95
#define edac_dev_name(dev) (dev)->dev_name
96

97
/*
98
 * The following are the structures to provide for a generic
99 100 101 102 103 104
 * or abstract 'edac_device'. This set of structures and the
 * code that implements the APIs for the same, provide for
 * registering EDAC type devices which are NOT standard memory.
 *
 * CPU caches (L1 and L2)
 * DMA engines
D
David Mackey 已提交
105
 * Core CPU switches
106 107 108 109 110
 * Fabric switch units
 * PCIe interface controllers
 * other EDAC/ECC type devices that can be monitored for
 * errors, etc.
 *
D
David Mackey 已提交
111
 * It allows for a 2 level set of hierarchy. For example:
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
 *
 * cache could be composed of L1, L2 and L3 levels of cache.
 * Each CPU core would have its own L1 cache, while sharing
 * L2 and maybe L3 caches.
 *
 * View them arranged, via the sysfs presentation:
 * /sys/devices/system/edac/..
 *
 *	mc/		<existing memory device directory>
 *	cpu/cpu0/..	<L1 and L2 block directory>
 *		/L1-cache/ce_count
 *			 /ue_count
 *		/L2-cache/ce_count
 *			 /ue_count
 *	cpu/cpu1/..	<L1 and L2 block directory>
 *		/L1-cache/ce_count
 *			 /ue_count
 *		/L2-cache/ce_count
 *			 /ue_count
 *	...
 *
 *	the L1 and L2 directories would be "edac_device_block's"
 */

struct edac_device_counter {
137 138
	u32 ue_count;
	u32 ce_count;
139 140
};

141 142 143
/* forward reference */
struct edac_device_ctl_info;
struct edac_device_block;
144

145 146 147 148 149 150 151 152 153
/* edac_dev_sysfs_attribute structure
 *	used for driver sysfs attributes in mem_ctl_info
 *	for extra controls and attributes:
 *		like high level error Injection controls
 */
struct edac_dev_sysfs_attribute {
	struct attribute attr;
	ssize_t (*show)(struct edac_device_ctl_info *, char *);
	ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
154 155
};

156
/* edac_dev_sysfs_block_attribute structure
157
 *
158
 *	used in leaf 'block' nodes for adding controls/attributes
159 160 161 162 163 164 165 166
 *
 *	each block in each instance of the containing control structure
 *	can have an array of the following. The show and store functions
 *	will be filled in with the show/store function in the
 *	low level driver.
 *
 *	The 'value' field will be the actual value field used for
 *	counting
167
 */
168 169 170 171 172 173 174 175
struct edac_dev_sysfs_block_attribute {
	struct attribute attr;
	ssize_t (*show)(struct kobject *, struct attribute *, char *);
	ssize_t (*store)(struct kobject *, struct attribute *,
			const char *, size_t);
	struct edac_device_block *block;

	unsigned int value;
176 177 178 179 180
};

/* device block control structure */
struct edac_device_block {
	struct edac_device_instance *instance;	/* Up Pointer */
181
	char name[EDAC_DEVICE_NAME_LEN + 1];
182 183 184

	struct edac_device_counter counters;	/* basic UE and CE counters */

185
	int nr_attribs;		/* how many attributes */
186 187 188

	/* this block's attributes, could be NULL */
	struct edac_dev_sysfs_block_attribute *block_attributes;
189 190 191 192 193 194 195 196 197 198 199 200

	/* edac sysfs device control */
	struct kobject kobj;
};

/* device instance control structure */
struct edac_device_instance {
	struct edac_device_ctl_info *ctl;	/* Up pointer */
	char name[EDAC_DEVICE_NAME_LEN + 4];

	struct edac_device_counter counters;	/* instance counters */

201
	u32 nr_blocks;		/* how many blocks */
202 203 204 205 206 207
	struct edac_device_block *blocks;	/* block array */

	/* edac sysfs device control */
	struct kobject kobj;
};

208

209 210 211 212 213 214 215 216
/*
 * Abstract edac_device control info structure
 *
 */
struct edac_device_ctl_info {
	/* for global list of edac_device_ctl_info structs */
	struct list_head link;

217 218
	struct module *owner;	/* Module owner of this control struct */

219 220 221 222 223 224 225 226 227
	int dev_idx;

	/* Per instance controls for this edac_device */
	int log_ue;		/* boolean for logging UEs */
	int log_ce;		/* boolean for logging CEs */
	int panic_on_ue;	/* boolean for panic'ing on an UE */
	unsigned poll_msec;	/* number of milliseconds to poll interval */
	unsigned long delay;	/* number of jiffies for poll_msec */

228 229 230 231 232 233 234 235 236 237 238 239
	/* Additional top controller level attributes, but specified
	 * by the low level driver.
	 *
	 * Set by the low level driver to provide attributes at the
	 * controller level, same level as 'ue_count' and 'ce_count' above.
	 * An array of structures, NULL terminated
	 *
	 * If attributes are desired, then set to array of attributes
	 * If no attributes are desired, leave NULL
	 */
	struct edac_dev_sysfs_attribute *sysfs_attributes;

240 241
	/* pointer to main 'edac' subsys in sysfs */
	struct bus_type *edac_subsys;
242 243 244 245 246 247 248

	/* the internal state of this controller instance */
	int op_state;
	/* work struct for this instance */
	struct delayed_work work;

	/* pointer to edac polling checking routine:
249 250 251
	 *      If NOT NULL: points to polling check routine
	 *      If NULL: Then assumes INTERRUPT operation, where
	 *              MC driver will receive events
252 253 254 255 256 257 258
	 */
	void (*edac_check) (struct edac_device_ctl_info * edac_dev);

	struct device *dev;	/* pointer to device structure */

	const char *mod_name;	/* module name */
	const char *ctl_name;	/* edac controller  name */
259
	const char *dev_name;	/* pci/platform/etc... name */
260 261 262

	void *pvt_info;		/* pointer to 'private driver' info */

263
	unsigned long start_time;	/* edac_device load start time (jiffies) */
264

265
	struct completion removal_complete;
266 267 268

	/* sysfs top name under 'edac' directory
	 * and instance name:
269 270 271 272
	 *      cpu/cpu0/...
	 *      cpu/cpu1/...
	 *      cpu/cpu2/...
	 *      ...
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	 */
	char name[EDAC_DEVICE_NAME_LEN + 1];

	/* Number of instances supported on this control structure
	 * and the array of those instances
	 */
	u32 nr_instances;
	struct edac_device_instance *instances;

	/* Event counters for the this whole EDAC Device */
	struct edac_device_counter counters;

	/* edac sysfs device control for the 'name'
	 * device this structure controls
	 */
	struct kobject kobj;
};

/* To get from the instance's wq to the beginning of the ctl structure */
292 293 294
#define to_edac_mem_ctl_work(w) \
		container_of(w, struct mem_ctl_info, work)

295 296 297 298 299 300 301 302 303
#define to_edac_device_ctl_work(w) \
		container_of(w,struct edac_device_ctl_info,work)

/*
 * The alloc() and free() functions for the 'edac_device' control info
 * structure. A MC driver will allocate one of these for each edac_device
 * it is going to control/register with the EDAC CORE.
 */
extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
304
		unsigned sizeof_private,
305 306
		char *edac_device_name, unsigned nr_instances,
		char *edac_block_name, unsigned nr_blocks,
307
		unsigned offset_value,
308
		struct edac_dev_sysfs_block_attribute *block_attributes,
309 310
		unsigned nr_attribs,
		int device_index);
311 312 313 314 315 316 317 318 319

/* The offset value can be:
 *	-1 indicating no offset value
 *	0 for zero-based block numbers
 *	1 for 1-based block number
 *	other for other-based block number
 */
#define	BLOCK_OFFSET_VALUE_OFF	((unsigned) -1)

320
extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
321

322 323
#ifdef CONFIG_PCI

324
struct edac_pci_counter {
325 326
	atomic_t pe_count;
	atomic_t npe_count;
327 328 329 330 331 332 333 334 335 336 337 338
};

/*
 * Abstract edac_pci control info structure
 *
 */
struct edac_pci_ctl_info {
	/* for global list of edac_pci_ctl_info structs */
	struct list_head link;

	int pci_idx;

339
	struct bus_type *edac_subsys;	/* pointer to subsystem */
340 341 342 343 344 345 346

	/* the internal state of this controller instance */
	int op_state;
	/* work struct for this instance */
	struct delayed_work work;

	/* pointer to edac polling checking routine:
347 348 349
	 *      If NOT NULL: points to polling check routine
	 *      If NULL: Then assumes INTERRUPT operation, where
	 *              MC driver will receive events
350 351 352 353 354 355 356 357 358 359 360
	 */
	void (*edac_check) (struct edac_pci_ctl_info * edac_dev);

	struct device *dev;	/* pointer to device structure */

	const char *mod_name;	/* module name */
	const char *ctl_name;	/* edac controller  name */
	const char *dev_name;	/* pci/platform/etc... name */

	void *pvt_info;		/* pointer to 'private driver' info */

361
	unsigned long start_time;	/* edac_pci load start time (jiffies) */
362 363 364 365 366

	struct completion complete;

	/* sysfs top name under 'edac' directory
	 * and instance name:
367 368 369 370
	 *      cpu/cpu0/...
	 *      cpu/cpu1/...
	 *      cpu/cpu2/...
	 *      ...
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
	 */
	char name[EDAC_DEVICE_NAME_LEN + 1];

	/* Event counters for the this whole EDAC Device */
	struct edac_pci_counter counters;

	/* edac sysfs device control for the 'name'
	 * device this structure controls
	 */
	struct kobject kobj;
	struct completion kobj_complete;
};

#define to_edac_pci_ctl_work(w) \
		container_of(w, struct edac_pci_ctl_info,work)

387 388
/* write all or some bits in a byte-register*/
static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
389
				   u8 mask)
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
{
	if (mask != 0xff) {
		u8 buf;

		pci_read_config_byte(pdev, offset, &buf);
		value &= mask;
		buf &= ~mask;
		value |= buf;
	}

	pci_write_config_byte(pdev, offset, value);
}

/* write all or some bits in a word-register*/
static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
405
				    u16 value, u16 mask)
406 407 408 409 410 411 412 413 414 415 416 417 418
{
	if (mask != 0xffff) {
		u16 buf;

		pci_read_config_word(pdev, offset, &buf);
		value &= mask;
		buf &= ~mask;
		value |= buf;
	}

	pci_write_config_word(pdev, offset, value);
}

J
Jeff Haran 已提交
419 420 421 422 423 424 425 426 427
/*
 * pci_write_bits32
 *
 * edac local routine to do pci_write_config_dword, but adds
 * a mask parameter. If mask is all ones, ignore the mask.
 * Otherwise utilize the mask to isolate specified bits
 *
 * write all or some bits in a dword-register
 */
428
static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
429
				    u32 value, u32 mask)
430
{
J
Jeff Haran 已提交
431
	if (mask != 0xffffffff) {
432 433 434 435 436 437 438 439 440 441 442
		u32 buf;

		pci_read_config_dword(pdev, offset, &buf);
		value &= mask;
		buf &= ~mask;
		value |= buf;
	}

	pci_write_config_dword(pdev, offset, value);
}

443
#endif				/* CONFIG_PCI */
444

445
struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
446 447 448
				   unsigned n_layers,
				   struct edac_mc_layer *layers,
				   unsigned sz_pvt);
449 450
extern int edac_mc_add_mc(struct mem_ctl_info *mci);
extern void edac_mc_free(struct mem_ctl_info *mci);
451
extern struct mem_ctl_info *edac_mc_find(int idx);
452
extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
453
extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
454
extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
455
				      unsigned long page);
456 457 458 459 460

void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
			      struct mem_ctl_info *mci,
			      struct edac_raw_error_desc *e);

461 462
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
			  struct mem_ctl_info *mci,
463
			  const u16 error_count,
464 465 466
			  const unsigned long page_frame_number,
			  const unsigned long offset_in_page,
			  const unsigned long syndrome,
467 468 469
			  const int top_layer,
			  const int mid_layer,
			  const int low_layer,
470
			  const char *msg,
471
			  const char *other_detail);
472 473

/*
474
 * edac_device APIs
475
 */
476
extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
477
extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
478
extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
479
				int inst_nr, int block_nr, const char *msg);
480
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
481
				int inst_nr, int block_nr, const char *msg);
H
Harry Ciao 已提交
482
extern int edac_device_alloc_index(void);
483
extern const char *edac_layer_name[];
484

485 486 487
/*
 * edac_pci APIs
 */
488 489
extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
				const char *edac_pci_name);
490 491 492

extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);

493 494
extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
				unsigned long value);
495

H
Harry Ciao 已提交
496
extern int edac_pci_alloc_index(void);
497
extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
498
extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
499

500 501 502
extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
				struct device *dev,
				const char *mod_name);
503 504 505 506 507 508 509 510

extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);

/*
 * edac misc APIs
 */
511
extern char *edac_op_state_to_string(int op_state);
512 513

#endif				/* _EDAC_CORE_H_ */