edac_mc.c 31.4 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
29
#include <linux/edac.h>
30
#include <linux/bitops.h>
A
Alan Cox 已提交
31 32 33
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
A
Alan Cox 已提交
36

37 38 39 40
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>

A
Alan Cox 已提交
41
/* lock to memory controller's control array */
42
static DEFINE_MUTEX(mem_ctls_mutex);
43
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
			         unsigned len)
{
	struct mem_ctl_info *mci = dimm->mci;
	int i, n, count = 0;
	char *p = buf;

	for (i = 0; i < mci->n_layers; i++) {
		n = snprintf(p, len, "%s %d ",
			      edac_layer_name[mci->layers[i].type],
			      dimm->location[i]);
		p += n;
		len -= n;
		count += n;
		if (!len)
			break;
	}

	return count;
}

A
Alan Cox 已提交
66 67
#ifdef CONFIG_EDAC_DEBUG

68
static void edac_mc_dump_channel(struct rank_info *chan)
A
Alan Cox 已提交
69
{
70 71 72 73
	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
	edac_dbg(4, "    channel = %p\n", chan);
	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
74 75
}

76
static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
77
{
78 79 80 81 82 83 84 85 86 87 88 89
	char location[80];

	edac_dimm_info_location(dimm, location, sizeof(location));

	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
		 number, location, dimm->csrow, dimm->cschannel);
	edac_dbg(4, "  dimm = %p\n", dimm);
	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
A
Alan Cox 已提交
90 91
}

92
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
93
{
94 95 96 97 98 99 100 101
	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
	edac_dbg(4, "  csrow = %p\n", csrow);
	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
A
Alan Cox 已提交
102 103
}

104
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
105
{
106 107 108 109 110 111 112 113 114 115 116 117 118
	edac_dbg(3, "\tmci = %p\n", mci);
	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
		 mci->nr_csrows, mci->csrows);
	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
		 mci->tot_dimms, mci->dimms);
	edac_dbg(3, "\tdev = %p\n", mci->pdev);
	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
		 mci->mod_name, mci->ctl_name);
	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
A
Alan Cox 已提交
119 120
}

121 122
#endif				/* CONFIG_EDAC_DEBUG */

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/*
 * keep those in sync with the enum mem_type
 */
const char *edac_mem_types[] = {
	"Empty csrow",
	"Reserved csrow type",
	"Unknown csrow type",
	"Fast page mode RAM",
	"Extended data out RAM",
	"Burst Extended data out RAM",
	"Single data rate SDRAM",
	"Registered single data rate SDRAM",
	"Double data rate SDRAM",
	"Registered Double data rate SDRAM",
	"Rambus DRAM",
	"Unbuffered DDR2 RAM",
	"Fully buffered DDR2",
	"Registered DDR2 RAM",
	"Rambus XDR",
	"Unbuffered DDR3 RAM",
	"Registered DDR3 RAM",
};
EXPORT_SYMBOL_GPL(edac_mem_types);

147 148 149 150 151 152
/**
 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 * @p:		pointer to a pointer with the memory offset to be used. At
 *		return, this will be incremented to point to the next offset
 * @size:	Size of the data structure to be reserved
 * @n_elems:	Number of elements that should be reserved
A
Alan Cox 已提交
153 154
 *
 * If 'size' is a constant, the compiler will optimize this whole function
155 156 157 158 159 160 161 162 163
 * down to either a no-op or the addition of a constant to the value of '*p'.
 *
 * The 'p' pointer is absolutely needed to keep the proper advancing
 * further in memory to the proper offsets when allocating the struct along
 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 * above, for example.
 *
 * At return, the pointer 'p' will be incremented to be used on a next call
 * to this function.
A
Alan Cox 已提交
164
 */
165
void *edac_align_ptr(void **p, unsigned size, int n_elems)
A
Alan Cox 已提交
166 167
{
	unsigned align, r;
168
	void *ptr = *p;
A
Alan Cox 已提交
169

170 171 172 173 174 175 176 177
	*p += size * n_elems;

	/*
	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
	 * 'size'.  Adjust 'p' so that its alignment is at least as
	 * stringent as what the compiler would provide for X and return
	 * the aligned result.
	 * Here we assume that the alignment of a "long long" is the most
A
Alan Cox 已提交
178 179 180 181 182 183 184 185 186 187 188 189
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
190
		return (char *)ptr;
A
Alan Cox 已提交
191

192
	r = (unsigned long)p % align;
A
Alan Cox 已提交
193 194

	if (r == 0)
195
		return (char *)ptr;
A
Alan Cox 已提交
196

197 198
	*p += align - r;

199
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
200 201 202
}

/**
203 204 205 206 207 208
 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 * @mc_num:		Memory controller number
 * @n_layers:		Number of MC hierarchy layers
 * layers:		Describes each layer as seen by the Memory Controller
 * @size_pvt:		size of private storage needed
 *
A
Alan Cox 已提交
209 210 211 212 213 214 215
 *
 * Everything is kmalloc'ed as one big chunk - more efficient.
 * Only can be used if all structures have the same lifetime - otherwise
 * you have to allocate and initialize your own structures.
 *
 * Use edac_mc_free() to free mc structures allocated by this function.
 *
216 217 218 219 220 221
 * NOTE: drivers handle multi-rank memories in different ways: in some
 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 * others, a single multi-rank memory stick would be mapped into several
 * entries. Currently, this function will allocate multiple struct dimm_info
 * on such scenarios, as grouping the multiple ranks require drivers change.
 *
A
Alan Cox 已提交
222
 * Returns:
223 224
 *	On failure: NULL
 *	On success: struct mem_ctl_info pointer
A
Alan Cox 已提交
225
 */
226 227 228 229
struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
				   unsigned n_layers,
				   struct edac_mc_layer *layers,
				   unsigned sz_pvt)
A
Alan Cox 已提交
230 231
{
	struct mem_ctl_info *mci;
232
	struct edac_mc_layer *layer;
233 234
	struct csrow_info *csr;
	struct rank_info *chan;
235
	struct dimm_info *dimm;
236 237 238 239
	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
	unsigned pos[EDAC_MAX_LAYERS];
	unsigned size, tot_dimms = 1, count = 1;
	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
240
	void *pvt, *p, *ptr = NULL;
241
	int i, j, row, chn, n, len, off;
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
	bool per_rank = false;

	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
	/*
	 * Calculate the total amount of dimms and csrows/cschannels while
	 * in the old API emulation mode
	 */
	for (i = 0; i < n_layers; i++) {
		tot_dimms *= layers[i].size;
		if (layers[i].is_virt_csrow)
			tot_csrows *= layers[i].size;
		else
			tot_channels *= layers[i].size;

		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
			per_rank = true;
	}
A
Alan Cox 已提交
259 260 261 262 263 264

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
265
	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
266 267 268
	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
	for (i = 0; i < n_layers; i++) {
		count *= layers[i].size;
269
		edac_dbg(4, "errcount layer %d size %d\n", i, count);
270 271 272 273 274
		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
		tot_errcount += 2 * count;
	}

275
	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
276
	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
277
	size = ((unsigned long)pvt) + sz_pvt;
A
Alan Cox 已提交
278

279 280 281 282 283
	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
		 size,
		 tot_dimms,
		 per_rank ? "ranks" : "dimms",
		 tot_csrows * tot_channels);
284

285 286
	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
A
Alan Cox 已提交
287 288 289 290 291
		return NULL;

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
292 293 294 295 296
	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
	for (i = 0; i < n_layers; i++) {
		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
	}
297
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
A
Alan Cox 已提交
298

299
	/* setup index and various internal pointers */
300 301
	mci->mc_idx = mc_num;
	mci->tot_dimms = tot_dimms;
A
Alan Cox 已提交
302
	mci->pvt_info = pvt;
303 304 305 306 307 308
	mci->n_layers = n_layers;
	mci->layers = layer;
	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
	mci->nr_csrows = tot_csrows;
	mci->num_cschannel = tot_channels;
	mci->mem_is_per_rank = per_rank;
A
Alan Cox 已提交
309

310
	/*
311
	 * Alocate and fill the csrow/channels structs
312
	 */
313 314 315
	mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
	if (!mci->csrows)
		goto error;
316
	for (row = 0; row < tot_csrows; row++) {
317 318 319 320
		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
		if (!csr)
			goto error;
		mci->csrows[row] = csr;
321 322 323
		csr->csrow_idx = row;
		csr->mci = mci;
		csr->nr_channels = tot_channels;
324 325 326 327
		csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
					GFP_KERNEL);
		if (!csr->channels)
			goto error;
328 329

		for (chn = 0; chn < tot_channels; chn++) {
330 331 332 333
			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
			if (!chan)
				goto error;
			csr->channels[chn] = chan;
A
Alan Cox 已提交
334
			chan->chan_idx = chn;
335 336 337 338 339
			chan->csrow = csr;
		}
	}

	/*
340
	 * Allocate and fill the dimm structs
341
	 */
342 343 344 345
	mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
	if (!mci->dimms)
		goto error;

346 347 348 349
	memset(&pos, 0, sizeof(pos));
	row = 0;
	chn = 0;
	for (i = 0; i < tot_dimms; i++) {
350 351 352 353 354 355
		chan = mci->csrows[row]->channels[chn];
		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
		if (off < 0 || off >= tot_dimms) {
			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
			goto error;
		}
356

357
		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
358 359
		if (!dimm)
			goto error;
360
		mci->dimms[off] = dimm;
361 362
		dimm->mci = mci;

363 364 365 366 367 368 369 370 371 372 373 374 375 376
		/*
		 * Copy DIMM location and initialize it.
		 */
		len = sizeof(dimm->label);
		p = dimm->label;
		n = snprintf(p, len, "mc#%u", mc_num);
		p += n;
		len -= n;
		for (j = 0; j < n_layers; j++) {
			n = snprintf(p, len, "%s#%u",
				     edac_layer_name[layers[j].type],
				     pos[j]);
			p += n;
			len -= n;
377 378
			dimm->location[j] = pos[j];

379 380 381 382
			if (len <= 0)
				break;
		}

383 384 385 386 387 388 389 390 391 392 393
		/* Link it to the csrows old API data */
		chan->dimm = dimm;
		dimm->csrow = row;
		dimm->cschannel = chn;

		/* Increment csrow location */
		row++;
		if (row == tot_csrows) {
			row = 0;
			chn++;
		}
394

395 396 397 398 399 400
		/* Increment dimm location */
		for (j = n_layers - 1; j >= 0; j--) {
			pos[j]++;
			if (pos[j] < layers[j].size)
				break;
			pos[j] = 0;
A
Alan Cox 已提交
401 402 403
		}
	}

404
	mci->op_state = OP_ALLOC;
405 406 407 408 409 410 411

	/* at this point, the root kobj is valid, and in order to
	 * 'free' the object, then the function:
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
	 * which will perform kobj unregistration and the actual free
	 * will occur during the kobject callback operation
	 */
412

A
Alan Cox 已提交
413
	return mci;
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435

error:
	if (mci->dimms) {
		for (i = 0; i < tot_dimms; i++)
			kfree(mci->dimms[i]);
		kfree(mci->dimms);
	}
	if (mci->csrows) {
		for (chn = 0; chn < tot_channels; chn++) {
			csr = mci->csrows[chn];
			if (csr) {
				for (chn = 0; chn < tot_channels; chn++)
					kfree(csr->channels[chn]);
				kfree(csr);
			}
			kfree(mci->csrows[i]);
		}
		kfree(mci->csrows);
	}
	kfree(mci);

	return NULL;
436
}
437
EXPORT_SYMBOL_GPL(edac_mc_alloc);
A
Alan Cox 已提交
438 439

/**
440 441
 * edac_mc_free
 *	'Free' a previously allocated 'mci' structure
A
Alan Cox 已提交
442 443 444 445
 * @mci: pointer to a struct mem_ctl_info structure
 */
void edac_mc_free(struct mem_ctl_info *mci)
{
446
	edac_dbg(1, "\n");
447

448
	/* the mci instance is freed here, when the sysfs object is dropped */
449
	edac_unregister_sysfs(mci);
A
Alan Cox 已提交
450
}
451
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
452

453

454
/**
455 456 457 458
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
459
 * @dev: pointer to a struct device related with the MCI
460
 */
461
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
462 463 464 465
{
	struct mem_ctl_info *mci;
	struct list_head *item;

466
	edac_dbg(3, "\n");
A
Alan Cox 已提交
467 468 469 470

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

471
		if (mci->pdev == dev)
A
Alan Cox 已提交
472 473 474 475 476
			return mci;
	}

	return NULL;
}
477
EXPORT_SYMBOL_GPL(find_mci_by_dev);
A
Alan Cox 已提交
478

479 480 481 482 483
/*
 * handler for EDAC to check if NMI type handler has asserted interrupt
 */
static int edac_mc_assert_error_check_and_clear(void)
{
484
	int old_state;
485

486
	if (edac_op_state == EDAC_OPSTATE_POLL)
487 488
		return 1;

489 490
	old_state = edac_err_assert;
	edac_err_assert = 0;
491

492
	return old_state;
493 494 495 496 497 498 499 500
}

/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
J
Jean Delvare 已提交
501
	struct delayed_work *d_work = to_delayed_work(work_req);
502 503 504 505
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

506 507 508 509 510 511
	/* if this control struct has movd to offline state, we are done */
	if (mci->op_state == OP_OFFLINE) {
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

512 513 514 515 516 517 518
	/* Only poll controllers that are running polled and have a check */
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

	/* Reschedule */
D
Dave Jiang 已提交
519
	queue_delayed_work(edac_workqueue, &mci->work,
520
			msecs_to_jiffies(edac_mc_get_poll_msec()));
521 522 523 524 525 526
}

/*
 * edac_mc_workq_setup
 *	initialize a workq item for this mci
 *	passing in the new delay period in msec
527 528 529 530
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex held
531
 */
532
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
533
{
534
	edac_dbg(0, "\n");
535

536 537 538 539
	/* if this instance is not in the POLL state, then simply return */
	if (mci->op_state != OP_RUNNING_POLL)
		return;

540
	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
541
	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
542 543 544 545 546
}

/*
 * edac_mc_workq_teardown
 *	stop the workq processing on this mci
547 548 549 550
 *
 *	locking model:
 *
 *		called WITHOUT lock held
551
 */
552
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
553 554 555
{
	int status;

556 557 558
	if (mci->op_state != OP_RUNNING_POLL)
		return;

559 560
	status = cancel_delayed_work(&mci->work);
	if (status == 0) {
561
		edac_dbg(0, "not canceled, flush the queue\n");
562

563 564
		/* workq instance might be running, wait for it */
		flush_workqueue(edac_workqueue);
565 566 567 568
	}
}

/*
569 570 571 572
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
573
 */
574
void edac_mc_reset_delay_period(int value)
575
{
576 577 578 579 580 581 582 583 584 585
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		edac_mc_workq_setup(mci, (unsigned long) value);
	}
586 587 588 589

	mutex_unlock(&mem_ctls_mutex);
}

590 591


592 593 594
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
595 596 597 598
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
599
 */
600
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
601 602 603 604
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

605
	insert_before = &mc_devices;
A
Alan Cox 已提交
606

607
	p = find_mci_by_dev(mci->pdev);
608
	if (unlikely(p != NULL))
609
		goto fail0;
A
Alan Cox 已提交
610

611 612
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
613

614 615 616
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
617

618 619
			insert_before = item;
			break;
A
Alan Cox 已提交
620 621 622 623
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
D
Dave Jiang 已提交
624
	atomic_inc(&edac_handlers);
A
Alan Cox 已提交
625
	return 0;
626

627
fail0:
628
	edac_printk(KERN_WARNING, EDAC_MC,
629
		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
630
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
631 632
	return 1;

633
fail1:
634
	edac_printk(KERN_WARNING, EDAC_MC,
635 636
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
637
	return 1;
A
Alan Cox 已提交
638 639
}

D
Dave Peterson 已提交
640
static void del_mc_from_global_list(struct mem_ctl_info *mci)
641
{
D
Dave Jiang 已提交
642
	atomic_dec(&edac_handlers);
643
	list_del_rcu(&mci->link);
644 645 646 647 648 649

	/* these are for safe removal of devices from global list while
	 * NMI handlers may be traversing list
	 */
	synchronize_rcu();
	INIT_LIST_HEAD(&mci->link);
650 651
}

652 653 654 655 656 657 658 659
/**
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 *
 * If found, return a pointer to the structure.
 * Else return NULL.
 *
 * Caller must hold mem_ctls_mutex.
 */
660
struct mem_ctl_info *edac_mc_find(int idx)
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
{
	struct list_head *item;
	struct mem_ctl_info *mci;

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->mc_idx >= idx) {
			if (mci->mc_idx == idx)
				return mci;

			break;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(edac_mc_find);

A
Alan Cox 已提交
680
/**
681 682
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 *                 create sysfs entries associated with mci structure
A
Alan Cox 已提交
683 684 685 686 687 688 689 690
 * @mci: pointer to the mci structure to be added to the list
 *
 * Return:
 *	0	Success
 *	!0	Failure
 */

/* FIXME - should a warning be printed if no error detection? correction? */
691
int edac_mc_add_mc(struct mem_ctl_info *mci)
A
Alan Cox 已提交
692
{
693
	edac_dbg(0, "\n");
694

A
Alan Cox 已提交
695 696 697
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
698

A
Alan Cox 已提交
699 700 701 702
	if (edac_debug_level >= 4) {
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
703 704
			struct csrow_info *csrow = mci->csrows[i];
			u32 nr_pages = 0;
A
Alan Cox 已提交
705
			int j;
D
Dave Peterson 已提交
706

707 708 709 710 711 712 713 714
			for (j = 0; j < csrow->nr_channels; j++)
				nr_pages += csrow->channels[j]->dimm->nr_pages;
			if (!nr_pages)
				continue;
			edac_mc_dump_csrow(csrow);
			for (j = 0; j < csrow->nr_channels; j++)
				if (csrow->channels[j]->dimm->nr_pages)
					edac_mc_dump_channel(csrow->channels[j]);
A
Alan Cox 已提交
715
		}
716
		for (i = 0; i < mci->tot_dimms; i++)
717 718
			if (mci->dimms[i]->nr_pages)
				edac_mc_dump_dimm(mci->dimms[i], i);
A
Alan Cox 已提交
719 720
	}
#endif
721
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
722 723

	if (add_mc_to_global_list(mci))
724
		goto fail0;
A
Alan Cox 已提交
725 726 727 728

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

729 730
	if (edac_create_sysfs_mci_device(mci)) {
		edac_mc_printk(mci, KERN_WARNING,
731
			"failed to create sysfs device\n");
732 733
		goto fail1;
	}
A
Alan Cox 已提交
734

735 736 737 738 739 740 741 742 743 744
	/* If there IS a check routine, then we are running POLLED */
	if (mci->edac_check != NULL) {
		/* This instance is NOW RUNNING */
		mci->op_state = OP_RUNNING_POLL;

		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
745
	/* Report action taken */
746
	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
747
		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
A
Alan Cox 已提交
748

749
	mutex_unlock(&mem_ctls_mutex);
750
	return 0;
A
Alan Cox 已提交
751

752
fail1:
753 754
	del_mc_from_global_list(mci);

755
fail0:
756
	mutex_unlock(&mem_ctls_mutex);
757
	return 1;
A
Alan Cox 已提交
758
}
759
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
A
Alan Cox 已提交
760 761

/**
762 763
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 *                 remove mci structure from global list
764
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
A
Alan Cox 已提交
765
 *
766
 * Return pointer to removed mci structure, or NULL if device not found.
A
Alan Cox 已提交
767
 */
768
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
769
{
770
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
771

772
	edac_dbg(0, "\n");
773

774
	mutex_lock(&mem_ctls_mutex);
775

776 777 778
	/* find the requested mci struct in the global list */
	mci = find_mci_by_dev(dev);
	if (mci == NULL) {
779
		mutex_unlock(&mem_ctls_mutex);
780 781 782
		return NULL;
	}

A
Alan Cox 已提交
783
	del_mc_from_global_list(mci);
784
	mutex_unlock(&mem_ctls_mutex);
785

786
	/* flush workq processes */
787
	edac_mc_workq_teardown(mci);
788 789 790 791 792

	/* marking MCI offline */
	mci->op_state = OP_OFFLINE;

	/* remove from sysfs */
793 794
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
795
	edac_printk(KERN_INFO, EDAC_MC,
796
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
797
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
798

799
	return mci;
A
Alan Cox 已提交
800
}
801
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
802

803 804
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
805 806 807 808 809
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

810
	edac_dbg(3, "\n");
A
Alan Cox 已提交
811 812

	/* ECC error page was not in our memory. Ignore it. */
813
	if (!pfn_valid(page))
A
Alan Cox 已提交
814 815 816 817 818 819 820 821
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

822
	virt_addr = kmap_atomic(pg);
A
Alan Cox 已提交
823 824 825 826 827

	/* Perform architecture specific atomic scrub operation */
	atomic_scrub(virt_addr + offset, size);

	/* Unmap and complete */
828
	kunmap_atomic(virt_addr);
A
Alan Cox 已提交
829 830 831 832 833 834

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
835
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
836
{
837
	struct csrow_info **csrows = mci->csrows;
838
	int row, i, j, n;
A
Alan Cox 已提交
839

840
	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
A
Alan Cox 已提交
841 842 843
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
844
		struct csrow_info *csrow = csrows[i];
845 846
		n = 0;
		for (j = 0; j < csrow->nr_channels; j++) {
847
			struct dimm_info *dimm = csrow->channels[j]->dimm;
848 849 850
			n += dimm->nr_pages;
		}
		if (n == 0)
A
Alan Cox 已提交
851 852
			continue;

853 854 855 856
		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
			 mci->mc_idx,
			 csrow->first_page, page, csrow->last_page,
			 csrow->page_mask);
A
Alan Cox 已提交
857 858 859 860 861 862 863 864 865 866 867

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
868
		edac_mc_printk(mci, KERN_ERR,
869 870
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
871 872 873

	return row;
}
874
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
875

876 877 878 879 880 881 882 883 884
const char *edac_layer_name[] = {
	[EDAC_MC_LAYER_BRANCH] = "branch",
	[EDAC_MC_LAYER_CHANNEL] = "channel",
	[EDAC_MC_LAYER_SLOT] = "slot",
	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
};
EXPORT_SYMBOL_GPL(edac_layer_name);

static void edac_inc_ce_error(struct mem_ctl_info *mci,
885 886 887
			      bool enable_per_layer_report,
			      const int pos[EDAC_MAX_LAYERS],
			      const u16 count)
A
Alan Cox 已提交
888
{
889
	int i, index = 0;
A
Alan Cox 已提交
890

891
	mci->ce_mc += count;
A
Alan Cox 已提交
892

893
	if (!enable_per_layer_report) {
894
		mci->ce_noinfo_count += count;
A
Alan Cox 已提交
895 896
		return;
	}
D
Dave Peterson 已提交
897

898 899 900 901
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] < 0)
			break;
		index += pos[i];
902
		mci->ce_per_layer[i][index] += count;
903 904 905 906 907 908 909 910

		if (i < mci->n_layers - 1)
			index *= mci->layers[i + 1].size;
	}
}

static void edac_inc_ue_error(struct mem_ctl_info *mci,
				    bool enable_per_layer_report,
911 912
				    const int pos[EDAC_MAX_LAYERS],
				    const u16 count)
913 914 915
{
	int i, index = 0;

916
	mci->ue_mc += count;
917 918

	if (!enable_per_layer_report) {
919
		mci->ce_noinfo_count += count;
A
Alan Cox 已提交
920 921 922
		return;
	}

923 924 925 926
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] < 0)
			break;
		index += pos[i];
927
		mci->ue_per_layer[i][index] += count;
928

929 930 931 932
		if (i < mci->n_layers - 1)
			index *= mci->layers[i + 1].size;
	}
}
A
Alan Cox 已提交
933

934
static void edac_ce_error(struct mem_ctl_info *mci,
935
			  const u16 error_count,
936 937 938 939 940 941 942 943 944
			  const int pos[EDAC_MAX_LAYERS],
			  const char *msg,
			  const char *location,
			  const char *label,
			  const char *detail,
			  const char *other_detail,
			  const bool enable_per_layer_report,
			  const unsigned long page_frame_number,
			  const unsigned long offset_in_page,
945
			  long grain)
946 947 948 949 950 951
{
	unsigned long remapped_page;

	if (edac_mc_get_log_ce()) {
		if (other_detail && *other_detail)
			edac_mc_printk(mci, KERN_WARNING,
952 953
				       "%d CE %s on %s (%s %s - %s)\n",
				       error_count,
954 955 956 957
				       msg, label, location,
				       detail, other_detail);
		else
			edac_mc_printk(mci, KERN_WARNING,
958 959
				       "%d CE %s on %s (%s %s)\n",
				       error_count,
960 961 962
				       msg, label, location,
				       detail);
	}
963
	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
A
Alan Cox 已提交
964 965 966

	if (mci->scrub_mode & SCRUB_SW_SRC) {
		/*
967 968 969 970 971 972 973 974 975 976
			* Some memory controllers (called MCs below) can remap
			* memory so that it is still available at a different
			* address when PCI devices map into memory.
			* MC's that can't do this, lose the memory where PCI
			* devices are mapped. This mapping is MC-dependent
			* and so we call back into the MC driver for it to
			* map the MC page to a physical (CPU) page which can
			* then be mapped to a virtual page - which can then
			* be scrubbed.
			*/
A
Alan Cox 已提交
977
		remapped_page = mci->ctl_page_to_phys ?
978 979
			mci->ctl_page_to_phys(mci, page_frame_number) :
			page_frame_number;
A
Alan Cox 已提交
980

981 982
		edac_mc_scrub_block(remapped_page,
					offset_in_page, grain);
A
Alan Cox 已提交
983 984 985
	}
}

986
static void edac_ue_error(struct mem_ctl_info *mci,
987
			  const u16 error_count,
988 989 990 991 992 993 994
			  const int pos[EDAC_MAX_LAYERS],
			  const char *msg,
			  const char *location,
			  const char *label,
			  const char *detail,
			  const char *other_detail,
			  const bool enable_per_layer_report)
A
Alan Cox 已提交
995
{
996 997 998
	if (edac_mc_get_log_ue()) {
		if (other_detail && *other_detail)
			edac_mc_printk(mci, KERN_WARNING,
999 1000
				       "%d UE %s on %s (%s %s - %s)\n",
				       error_count,
1001 1002 1003 1004
			               msg, label, location, detail,
				       other_detail);
		else
			edac_mc_printk(mci, KERN_WARNING,
1005 1006
				       "%d UE %s on %s (%s %s)\n",
				       error_count,
1007 1008
			               msg, label, location, detail);
	}
D
Dave Peterson 已提交
1009

1010 1011 1012 1013 1014 1015 1016 1017 1018
	if (edac_mc_get_panic_on_ue()) {
		if (other_detail && *other_detail)
			panic("UE %s on %s (%s%s - %s)\n",
			      msg, label, location, detail, other_detail);
		else
			panic("UE %s on %s (%s%s)\n",
			      msg, label, location, detail);
	}

1019
	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
A
Alan Cox 已提交
1020 1021
}

1022
#define OTHER_LABEL " or "
1023 1024 1025 1026 1027 1028

/**
 * edac_mc_handle_error - reports a memory event to userspace
 *
 * @type:		severity of the error (CE/UE/Fatal)
 * @mci:		a struct mem_ctl_info pointer
1029
 * @error_count:	Number of errors of the same type
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
 * @page_frame_number:	mem page where the error occurred
 * @offset_in_page:	offset of the error inside the page
 * @syndrome:		ECC syndrome
 * @top_layer:		Memory layer[0] position
 * @mid_layer:		Memory layer[1] position
 * @low_layer:		Memory layer[2] position
 * @msg:		Message meaningful to the end users that
 *			explains the event
 * @other_detail:	Technical details about the event that
 *			may help hardware manufacturers and
 *			EDAC developers to analyse the event
 */
1042 1043
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
			  struct mem_ctl_info *mci,
1044
			  const u16 error_count,
1045 1046 1047
			  const unsigned long page_frame_number,
			  const unsigned long offset_in_page,
			  const unsigned long syndrome,
1048 1049 1050
			  const int top_layer,
			  const int mid_layer,
			  const int low_layer,
1051
			  const char *msg,
1052
			  const char *other_detail)
A
Alan Cox 已提交
1053
{
1054 1055 1056 1057 1058
	/* FIXME: too much for stack: move it to some pre-alocated area */
	char detail[80], location[80];
	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
	char *p;
	int row = -1, chan = -1;
1059
	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1060
	int i;
1061
	long grain;
1062
	bool enable_per_layer_report = false;
1063
	u8 grain_bits;
A
Alan Cox 已提交
1064

1065
	edac_dbg(3, "MC%d\n", mci->mc_idx);
A
Alan Cox 已提交
1066

1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
	/*
	 * Check if the event report is consistent and if the memory
	 * location is known. If it is known, enable_per_layer_report will be
	 * true, the DIMM(s) label info will be filled and the per-layer
	 * error counters will be incremented.
	 */
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] >= (int)mci->layers[i].size) {
			if (type == HW_EVENT_ERR_CORRECTED)
				p = "CE";
			else
				p = "UE";

			edac_mc_printk(mci, KERN_ERR,
				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
				       edac_layer_name[mci->layers[i].type],
				       pos[i], mci->layers[i].size);
			/*
			 * Instead of just returning it, let's use what's
			 * known about the error. The increment routines and
			 * the DIMM filter logic will do the right thing by
			 * pointing the likely damaged DIMMs.
			 */
			pos[i] = -1;
		}
		if (pos[i] >= 0)
			enable_per_layer_report = true;
A
Alan Cox 已提交
1094 1095
	}

1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
	/*
	 * Get the dimm label/grain that applies to the match criteria.
	 * As the error algorithm may not be able to point to just one memory
	 * stick, the logic here will get all possible labels that could
	 * pottentially be affected by the error.
	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
	 * to have only the MC channel and the MC dimm (also called "branch")
	 * but the channel is not known, as the memory is arranged in pairs,
	 * where each memory belongs to a separate channel within the same
	 * branch.
	 */
	grain = 0;
	p = label;
	*p = '\0';
	for (i = 0; i < mci->tot_dimms; i++) {
1111
		struct dimm_info *dimm = mci->dimms[i];
A
Alan Cox 已提交
1112

1113
		if (top_layer >= 0 && top_layer != dimm->location[0])
1114
			continue;
1115
		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1116
			continue;
1117
		if (low_layer >= 0 && low_layer != dimm->location[2])
1118
			continue;
A
Alan Cox 已提交
1119

1120 1121 1122
		/* get the max grain, over the error match range */
		if (dimm->grain > grain)
			grain = dimm->grain;
1123

1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
		/*
		 * If the error is memory-controller wide, there's no need to
		 * seek for the affected DIMMs because the whole
		 * channel/memory controller/...  may be affected.
		 * Also, don't show errors for empty DIMM slots.
		 */
		if (enable_per_layer_report && dimm->nr_pages) {
			if (p != label) {
				strcpy(p, OTHER_LABEL);
				p += strlen(OTHER_LABEL);
			}
			strcpy(p, dimm->label);
			p += strlen(p);
			*p = '\0';

			/*
			 * get csrow/channel of the DIMM, in order to allow
			 * incrementing the compat API counters
			 */
1143 1144 1145
			edac_dbg(4, "%s csrows map: (%d,%d)\n",
				 mci->mem_is_per_rank ? "rank" : "dimm",
				 dimm->csrow, dimm->cschannel);
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
			if (row == -1)
				row = dimm->csrow;
			else if (row >= 0 && row != dimm->csrow)
				row = -2;

			if (chan == -1)
				chan = dimm->cschannel;
			else if (chan >= 0 && chan != dimm->cschannel)
				chan = -2;
		}
1156 1157
	}

1158 1159 1160
	if (!enable_per_layer_report) {
		strcpy(label, "any memory");
	} else {
1161
		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1162 1163 1164 1165
		if (p == label)
			strcpy(label, "unknown memory");
		if (type == HW_EVENT_ERR_CORRECTED) {
			if (row >= 0) {
1166
				mci->csrows[row]->ce_count += error_count;
1167
				if (chan >= 0)
1168
					mci->csrows[row]->channels[chan]->ce_count += error_count;
1169 1170 1171
			}
		} else
			if (row >= 0)
1172
				mci->csrows[row]->ue_count += error_count;
1173 1174
	}

1175 1176 1177 1178 1179
	/* Fill the RAM location data */
	p = location;
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] < 0)
			continue;
1180

1181 1182 1183
		p += sprintf(p, "%s:%d ",
			     edac_layer_name[mci->layers[i].type],
			     pos[i]);
1184
	}
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
	if (p > location)
		*(p - 1) = '\0';

	/* Report the error via the trace interface */

	grain_bits = fls_long(grain) + 1;
	trace_mc_event(type, msg, label, error_count,
		       mci->mc_idx, top_layer, mid_layer, low_layer,
		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
		       grain_bits, syndrome, other_detail);
1195

1196 1197 1198
	/* Memory type dependent details about the error */
	if (type == HW_EVENT_ERR_CORRECTED) {
		snprintf(detail, sizeof(detail),
1199
			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1200 1201
			page_frame_number, offset_in_page,
			grain, syndrome);
1202 1203
		edac_ce_error(mci, error_count, pos, msg, location, label,
			      detail, other_detail, enable_per_layer_report,
1204 1205 1206
			      page_frame_number, offset_in_page, grain);
	} else {
		snprintf(detail, sizeof(detail),
1207
			"page:0x%lx offset:0x%lx grain:%ld",
1208
			page_frame_number, offset_in_page, grain);
1209

1210 1211
		edac_ue_error(mci, error_count, pos, msg, location, label,
			      detail, other_detail, enable_per_layer_report);
1212
	}
1213
}
1214
EXPORT_SYMBOL_GPL(edac_mc_handle_error);