edac_mc.c 23.3 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sysdev.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
30
#include <linux/edac.h>
A
Alan Cox 已提交
31 32 33
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
A
Alan Cox 已提交
36 37

/* lock to memory controller's control array */
38
static DEFINE_MUTEX(mem_ctls_mutex);
39
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
40 41 42

#ifdef CONFIG_EDAC_DEBUG

43
static void edac_mc_dump_channel(struct channel_info *chan)
A
Alan Cox 已提交
44 45 46 47 48 49 50 51
{
	debugf4("\tchannel = %p\n", chan);
	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
	debugf4("\tchannel->label = '%s'\n", chan->label);
	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
}

52
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
53 54 55
{
	debugf4("\tcsrow = %p\n", csrow);
	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56
	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
A
Alan Cox 已提交
57 58 59
	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60
	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
A
Alan Cox 已提交
61 62 63 64
	debugf4("\tcsrow->channels = %p\n", csrow->channels);
	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
}

65
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
66 67 68 69 70 71 72 73
{
	debugf3("\tmci = %p\n", mci);
	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
		mci->nr_csrows, mci->csrows);
74
	debugf3("\tdev = %p\n", mci->dev);
75
	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
A
Alan Cox 已提交
76 77 78
	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
}

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/*
 * keep those in sync with the enum mem_type
 */
const char *edac_mem_types[] = {
	"Empty csrow",
	"Reserved csrow type",
	"Unknown csrow type",
	"Fast page mode RAM",
	"Extended data out RAM",
	"Burst Extended data out RAM",
	"Single data rate SDRAM",
	"Registered single data rate SDRAM",
	"Double data rate SDRAM",
	"Registered Double data rate SDRAM",
	"Rambus DRAM",
	"Unbuffered DDR2 RAM",
	"Fully buffered DDR2",
	"Registered DDR2 RAM",
	"Rambus XDR",
	"Unbuffered DDR3 RAM",
	"Registered DDR3 RAM",
};
EXPORT_SYMBOL_GPL(edac_mem_types);

103
#endif				/* CONFIG_EDAC_DEBUG */
A
Alan Cox 已提交
104 105 106 107 108 109 110 111

/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 * Adjust 'ptr' so that its alignment is at least as stringent as what the
 * compiler would provide for X and return the aligned result.
 *
 * If 'size' is a constant, the compiler will optimize this whole function
 * down to either a no-op or the addition of a constant to the value of 'ptr'.
 */
112
void *edac_align_ptr(void *ptr, unsigned size)
A
Alan Cox 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
{
	unsigned align, r;

	/* Here we assume that the alignment of a "long long" is the most
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
129
		return (char *)ptr;
A
Alan Cox 已提交
130 131 132 133

	r = size % align;

	if (r == 0)
134
		return (char *)ptr;
A
Alan Cox 已提交
135

136
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
}

/**
 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 * @size_pvt:	size of private storage needed
 * @nr_csrows:	Number of CWROWS needed for this MC
 * @nr_chans:	Number of channels for the MC
 *
 * Everything is kmalloc'ed as one big chunk - more efficient.
 * Only can be used if all structures have the same lifetime - otherwise
 * you have to allocate and initialize your own structures.
 *
 * Use edac_mc_free() to free mc structures allocated by this function.
 *
 * Returns:
 *	NULL allocation failed
 *	struct mem_ctl_info pointer
 */
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156
				unsigned nr_chans, int edac_index)
A
Alan Cox 已提交
157 158 159 160 161 162 163
{
	struct mem_ctl_info *mci;
	struct csrow_info *csi, *csrow;
	struct channel_info *chi, *chp, *chan;
	void *pvt;
	unsigned size;
	int row, chn;
164
	int err;
A
Alan Cox 已提交
165 166 167 168 169 170

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
171
	mci = (struct mem_ctl_info *)0;
172 173
	csi = edac_align_ptr(&mci[1], sizeof(*csi));
	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174
	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175
	size = ((unsigned long)pvt) + sz_pvt;
A
Alan Cox 已提交
176

177 178
	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
A
Alan Cox 已提交
179 180 181 182 183
		return NULL;

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
184 185 186
	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
A
Alan Cox 已提交
187

188 189
	/* setup index and various internal pointers */
	mci->mc_idx = edac_index;
A
Alan Cox 已提交
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	mci->csrows = csi;
	mci->pvt_info = pvt;
	mci->nr_csrows = nr_csrows;

	for (row = 0; row < nr_csrows; row++) {
		csrow = &csi[row];
		csrow->csrow_idx = row;
		csrow->mci = mci;
		csrow->nr_channels = nr_chans;
		chp = &chi[row * nr_chans];
		csrow->channels = chp;

		for (chn = 0; chn < nr_chans; chn++) {
			chan = &chp[chn];
			chan->chan_idx = chn;
			chan->csrow = csrow;
		}
	}

209 210
	mci->op_state = OP_ALLOC;

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	/*
	 * Initialize the 'root' kobj for the edac_mc controller
	 */
	err = edac_mc_register_sysfs_main_kobj(mci);
	if (err) {
		kfree(mci);
		return NULL;
	}

	/* at this point, the root kobj is valid, and in order to
	 * 'free' the object, then the function:
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
	 * which will perform kobj unregistration and the actual free
	 * will occur during the kobject callback operation
	 */
A
Alan Cox 已提交
226 227
	return mci;
}
228
EXPORT_SYMBOL_GPL(edac_mc_alloc);
A
Alan Cox 已提交
229 230

/**
231 232
 * edac_mc_free
 *	'Free' a previously allocated 'mci' structure
A
Alan Cox 已提交
233 234 235 236
 * @mci: pointer to a struct mem_ctl_info structure
 */
void edac_mc_free(struct mem_ctl_info *mci)
{
237
	edac_mc_unregister_sysfs_main_kobj(mci);
A
Alan Cox 已提交
238
}
239
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
240

241 242 243 244 245 246 247

/*
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
 */
248
static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
249 250 251 252
{
	struct mem_ctl_info *mci;
	struct list_head *item;

D
Dave Peterson 已提交
253
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
254 255 256 257

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

258
		if (mci->dev == dev)
A
Alan Cox 已提交
259 260 261 262 263 264
			return mci;
	}

	return NULL;
}

265 266 267 268 269
/*
 * handler for EDAC to check if NMI type handler has asserted interrupt
 */
static int edac_mc_assert_error_check_and_clear(void)
{
270
	int old_state;
271

272
	if (edac_op_state == EDAC_OPSTATE_POLL)
273 274
		return 1;

275 276
	old_state = edac_err_assert;
	edac_err_assert = 0;
277

278
	return old_state;
279 280 281 282 283 284 285 286
}

/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
J
Jean Delvare 已提交
287
	struct delayed_work *d_work = to_delayed_work(work_req);
288 289 290 291
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

292 293 294 295 296 297
	/* if this control struct has movd to offline state, we are done */
	if (mci->op_state == OP_OFFLINE) {
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

298 299 300 301 302 303 304
	/* Only poll controllers that are running polled and have a check */
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

	/* Reschedule */
D
Dave Jiang 已提交
305
	queue_delayed_work(edac_workqueue, &mci->work,
306
			msecs_to_jiffies(edac_mc_get_poll_msec()));
307 308 309 310 311 312
}

/*
 * edac_mc_workq_setup
 *	initialize a workq item for this mci
 *	passing in the new delay period in msec
313 314 315 316
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex held
317
 */
318
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
319 320 321
{
	debugf0("%s()\n", __func__);

322 323 324 325
	/* if this instance is not in the POLL state, then simply return */
	if (mci->op_state != OP_RUNNING_POLL)
		return;

326 327 328 329 330 331 332
	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
}

/*
 * edac_mc_workq_teardown
 *	stop the workq processing on this mci
333 334 335 336
 *
 *	locking model:
 *
 *		called WITHOUT lock held
337
 */
338
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
339 340 341
{
	int status;

342 343 344
	if (mci->op_state != OP_RUNNING_POLL)
		return;

345 346 347 348
	status = cancel_delayed_work(&mci->work);
	if (status == 0) {
		debugf0("%s() not canceled, flush the queue\n",
			__func__);
349

350 351
		/* workq instance might be running, wait for it */
		flush_workqueue(edac_workqueue);
352 353 354 355
	}
}

/*
356 357 358 359
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
360
 */
361
void edac_mc_reset_delay_period(int value)
362
{
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	/* scan the list and turn off all workq timers, doing so under lock
	 */
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->op_state == OP_RUNNING_POLL)
			cancel_delayed_work(&mci->work);
	}

	mutex_unlock(&mem_ctls_mutex);
378

379 380

	/* re-walk the list, and reset the poll delay */
381 382
	mutex_lock(&mem_ctls_mutex);

383 384 385 386 387
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		edac_mc_workq_setup(mci, (unsigned long) value);
	}
388 389 390 391

	mutex_unlock(&mem_ctls_mutex);
}

392 393


394 395 396
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
397 398 399 400
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
401
 */
402
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
403 404 405 406
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

407
	insert_before = &mc_devices;
A
Alan Cox 已提交
408

409 410
	p = find_mci_by_dev(mci->dev);
	if (unlikely(p != NULL))
411
		goto fail0;
A
Alan Cox 已提交
412

413 414
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
415

416 417 418
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
419

420 421
			insert_before = item;
			break;
A
Alan Cox 已提交
422 423 424 425
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
D
Dave Jiang 已提交
426
	atomic_inc(&edac_handlers);
A
Alan Cox 已提交
427
	return 0;
428

429
fail0:
430
	edac_printk(KERN_WARNING, EDAC_MC,
431
		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
432
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
433 434
	return 1;

435
fail1:
436
	edac_printk(KERN_WARNING, EDAC_MC,
437 438
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
439
	return 1;
A
Alan Cox 已提交
440 441
}

D
Dave Peterson 已提交
442
static void complete_mc_list_del(struct rcu_head *head)
443 444 445 446 447 448 449
{
	struct mem_ctl_info *mci;

	mci = container_of(head, struct mem_ctl_info, rcu);
	INIT_LIST_HEAD(&mci->link);
}

D
Dave Peterson 已提交
450
static void del_mc_from_global_list(struct mem_ctl_info *mci)
451
{
D
Dave Jiang 已提交
452
	atomic_dec(&edac_handlers);
453 454
	list_del_rcu(&mci->link);
	call_rcu(&mci->rcu, complete_mc_list_del);
455
	rcu_barrier();
456 457
}

458 459 460 461 462 463 464 465
/**
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 *
 * If found, return a pointer to the structure.
 * Else return NULL.
 *
 * Caller must hold mem_ctls_mutex.
 */
466
struct mem_ctl_info *edac_mc_find(int idx)
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
{
	struct list_head *item;
	struct mem_ctl_info *mci;

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->mc_idx >= idx) {
			if (mci->mc_idx == idx)
				return mci;

			break;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(edac_mc_find);

A
Alan Cox 已提交
486
/**
487 488
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 *                 create sysfs entries associated with mci structure
A
Alan Cox 已提交
489
 * @mci: pointer to the mci structure to be added to the list
490
 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
A
Alan Cox 已提交
491 492 493 494 495 496 497
 *
 * Return:
 *	0	Success
 *	!0	Failure
 */

/* FIXME - should a warning be printed if no error detection? correction? */
498
int edac_mc_add_mc(struct mem_ctl_info *mci)
A
Alan Cox 已提交
499
{
D
Dave Peterson 已提交
500
	debugf0("%s()\n", __func__);
501

A
Alan Cox 已提交
502 503 504
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
505

A
Alan Cox 已提交
506 507 508 509 510
	if (edac_debug_level >= 4) {
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
			int j;
D
Dave Peterson 已提交
511

A
Alan Cox 已提交
512 513
			edac_mc_dump_csrow(&mci->csrows[i]);
			for (j = 0; j < mci->csrows[i].nr_channels; j++)
514
				edac_mc_dump_channel(&mci->csrows[i].
515
						channels[j]);
A
Alan Cox 已提交
516 517 518
		}
	}
#endif
519
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
520 521

	if (add_mc_to_global_list(mci))
522
		goto fail0;
A
Alan Cox 已提交
523 524 525 526

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

527 528
	if (edac_create_sysfs_mci_device(mci)) {
		edac_mc_printk(mci, KERN_WARNING,
529
			"failed to create sysfs device\n");
530 531
		goto fail1;
	}
A
Alan Cox 已提交
532

533 534 535 536 537 538 539 540 541 542
	/* If there IS a check routine, then we are running POLLED */
	if (mci->edac_check != NULL) {
		/* This instance is NOW RUNNING */
		mci->op_state = OP_RUNNING_POLL;

		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
543
	/* Report action taken */
544
	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
545
		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
A
Alan Cox 已提交
546

547
	mutex_unlock(&mem_ctls_mutex);
548
	return 0;
A
Alan Cox 已提交
549

550
fail1:
551 552
	del_mc_from_global_list(mci);

553
fail0:
554
	mutex_unlock(&mem_ctls_mutex);
555
	return 1;
A
Alan Cox 已提交
556
}
557
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
A
Alan Cox 已提交
558 559

/**
560 561
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 *                 remove mci structure from global list
562
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
A
Alan Cox 已提交
563
 *
564
 * Return pointer to removed mci structure, or NULL if device not found.
A
Alan Cox 已提交
565
 */
566
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
567
{
568
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
569

570 571
	debugf0("%s()\n", __func__);

572
	mutex_lock(&mem_ctls_mutex);
573

574 575 576
	/* find the requested mci struct in the global list */
	mci = find_mci_by_dev(dev);
	if (mci == NULL) {
577
		mutex_unlock(&mem_ctls_mutex);
578 579 580
		return NULL;
	}

581 582 583
	/* marking MCI offline */
	mci->op_state = OP_OFFLINE;

A
Alan Cox 已提交
584
	del_mc_from_global_list(mci);
585
	mutex_unlock(&mem_ctls_mutex);
586 587 588 589 590

	/* flush workq processes and remove sysfs */
	edac_mc_workq_teardown(mci);
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
591
	edac_printk(KERN_INFO, EDAC_MC,
592
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
593
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
594

595
	return mci;
A
Alan Cox 已提交
596
}
597
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
598

599 600
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
601 602 603 604 605
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

D
Dave Peterson 已提交
606
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
607 608

	/* ECC error page was not in our memory. Ignore it. */
609
	if (!pfn_valid(page))
A
Alan Cox 已提交
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);

	/* Perform architecture specific atomic scrub operation */
	atomic_scrub(virt_addr + offset, size);

	/* Unmap and complete */
	kunmap_atomic(virt_addr, KM_BOUNCE_READ);

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
631
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
632 633 634 635
{
	struct csrow_info *csrows = mci->csrows;
	int row, i;

D
Dave Peterson 已提交
636
	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
A
Alan Cox 已提交
637 638 639 640 641 642 643 644
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
		struct csrow_info *csrow = &csrows[i];

		if (csrow->nr_pages == 0)
			continue;

D
Dave Peterson 已提交
645 646 647 648
		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
			"mask(0x%lx)\n", mci->mc_idx, __func__,
			csrow->first_page, page, csrow->last_page,
			csrow->page_mask);
A
Alan Cox 已提交
649 650 651 652 653 654 655 656 657 658 659

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
660
		edac_mc_printk(mci, KERN_ERR,
661 662
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
663 664 665

	return row;
}
666
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
667 668 669 670

/* FIXME - setable log (warning/emerg) levels */
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
void edac_mc_handle_ce(struct mem_ctl_info *mci,
671 672 673
		unsigned long page_frame_number,
		unsigned long offset_in_page, unsigned long syndrome,
		int row, int channel, const char *msg)
A
Alan Cox 已提交
674 675 676
{
	unsigned long remapped_page;

D
Dave Peterson 已提交
677
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
678 679 680 681

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
682
		edac_mc_printk(mci, KERN_ERR,
683 684
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
685 686 687
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
D
Dave Peterson 已提交
688

A
Alan Cox 已提交
689 690
	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
691
		edac_mc_printk(mci, KERN_ERR,
692 693 694
			"INTERNAL ERROR: channel out of range "
			"(%d >= %d)\n", channel,
			mci->csrows[row].nr_channels);
A
Alan Cox 已提交
695 696 697 698
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
699
	if (edac_mc_get_log_ce())
A
Alan Cox 已提交
700
		/* FIXME - put in DIMM location */
D
Dave Peterson 已提交
701
		edac_mc_printk(mci, KERN_WARNING,
702 703 704 705 706
			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, syndrome, row, channel,
			mci->csrows[row].channels[channel].label, msg);
A
Alan Cox 已提交
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722

	mci->ce_count++;
	mci->csrows[row].ce_count++;
	mci->csrows[row].channels[channel].ce_count++;

	if (mci->scrub_mode & SCRUB_SW_SRC) {
		/*
		 * Some MC's can remap memory so that it is still available
		 * at a different address when PCI devices map into memory.
		 * MC's that can't do this lose the memory where PCI devices
		 * are mapped.  This mapping is MC dependant and so we call
		 * back into the MC driver for it to map the MC page to
		 * a physical (CPU) page which can then be mapped to a virtual
		 * page - which can then be scrubbed.
		 */
		remapped_page = mci->ctl_page_to_phys ?
723 724
			mci->ctl_page_to_phys(mci, page_frame_number) :
			page_frame_number;
A
Alan Cox 已提交
725 726

		edac_mc_scrub_block(remapped_page, offset_in_page,
727
				mci->csrows[row].grain);
A
Alan Cox 已提交
728 729
	}
}
730
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
A
Alan Cox 已提交
731

D
Dave Peterson 已提交
732
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
733
{
D
Dave Jiang 已提交
734
	if (edac_mc_get_log_ce())
D
Dave Peterson 已提交
735
		edac_mc_printk(mci, KERN_WARNING,
736
			"CE - no information available: %s\n", msg);
D
Dave Peterson 已提交
737

A
Alan Cox 已提交
738 739 740
	mci->ce_noinfo_count++;
	mci->ce_count++;
}
741
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
A
Alan Cox 已提交
742 743

void edac_mc_handle_ue(struct mem_ctl_info *mci,
744 745
		unsigned long page_frame_number,
		unsigned long offset_in_page, int row, const char *msg)
A
Alan Cox 已提交
746 747 748 749 750 751 752
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chan;
	int chars;

D
Dave Peterson 已提交
753
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
754 755 756 757

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
758
		edac_mc_printk(mci, KERN_ERR,
759 760
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
761 762 763 764 765
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	chars = snprintf(pos, len + 1, "%s",
766
			 mci->csrows[row].channels[0].label);
A
Alan Cox 已提交
767 768
	len -= chars;
	pos += chars;
D
Dave Peterson 已提交
769

A
Alan Cox 已提交
770
	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
771
		chan++) {
A
Alan Cox 已提交
772
		chars = snprintf(pos, len + 1, ":%s",
773
				 mci->csrows[row].channels[chan].label);
A
Alan Cox 已提交
774 775 776 777
		len -= chars;
		pos += chars;
	}

D
Dave Jiang 已提交
778
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
779
		edac_mc_printk(mci, KERN_EMERG,
780 781 782 783
			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
			"labels \"%s\": %s\n", page_frame_number,
			offset_in_page, mci->csrows[row].grain, row,
			labels, msg);
A
Alan Cox 已提交
784

D
Dave Jiang 已提交
785
	if (edac_mc_get_panic_on_ue())
D
Dave Peterson 已提交
786
		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
787 788 789
			"row %d, labels \"%s\": %s\n", mci->mc_idx,
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, row, labels, msg);
A
Alan Cox 已提交
790 791 792 793

	mci->ue_count++;
	mci->csrows[row].ue_count++;
}
794
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
A
Alan Cox 已提交
795

D
Dave Peterson 已提交
796
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
797
{
D
Dave Jiang 已提交
798
	if (edac_mc_get_panic_on_ue())
A
Alan Cox 已提交
799 800
		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);

D
Dave Jiang 已提交
801
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
802
		edac_mc_printk(mci, KERN_WARNING,
803
			"UE - no information available: %s\n", msg);
A
Alan Cox 已提交
804 805 806
	mci->ue_noinfo_count++;
	mci->ue_count++;
}
807
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
A
Alan Cox 已提交
808

809 810 811 812 813
/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process UE events
 */
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
814 815 816
			unsigned int csrow,
			unsigned int channela,
			unsigned int channelb, char *msg)
817 818 819 820 821 822 823 824 825
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chars;

	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
826 827
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
828 829 830 831 832 833 834
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channela >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
835 836 837
			"INTERNAL ERROR: channel-a out of range "
			"(%d >= %d)\n",
			channela, mci->csrows[csrow].nr_channels);
838 839 840 841 842 843 844
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channelb >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
845 846 847
			"INTERNAL ERROR: channel-b out of range "
			"(%d >= %d)\n",
			channelb, mci->csrows[csrow].nr_channels);
848 849 850 851 852 853 854 855 856 857
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	mci->ue_count++;
	mci->csrows[csrow].ue_count++;

	/* Generate the DIMM labels from the specified channels */
	chars = snprintf(pos, len + 1, "%s",
			 mci->csrows[csrow].channels[channela].label);
858 859
	len -= chars;
	pos += chars;
860 861 862
	chars = snprintf(pos, len + 1, "-%s",
			 mci->csrows[csrow].channels[channelb].label);

D
Dave Jiang 已提交
863
	if (edac_mc_get_log_ue())
864
		edac_mc_printk(mci, KERN_EMERG,
865 866 867
			"UE row %d, channel-a= %d channel-b= %d "
			"labels \"%s\": %s\n", csrow, channela, channelb,
			labels, msg);
868

D
Dave Jiang 已提交
869
	if (edac_mc_get_panic_on_ue())
870
		panic("UE row %d, channel-a= %d channel-b= %d "
871 872
			"labels \"%s\": %s\n", csrow, channela,
			channelb, labels, msg);
873 874 875 876 877 878 879 880
}
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);

/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process CE events
 */
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
881
			unsigned int csrow, unsigned int channel, char *msg)
882 883 884 885 886 887
{

	/* Ensure boundary values */
	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
888 889
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
890 891 892 893 894 895
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
	if (channel >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
896 897
			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
			channel, mci->csrows[csrow].nr_channels);
898 899 900 901
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
902
	if (edac_mc_get_log_ce())
903 904
		/* FIXME - put in DIMM location */
		edac_mc_printk(mci, KERN_WARNING,
905 906 907
			"CE row %d, channel %d, label \"%s\": %s\n",
			csrow, channel,
			mci->csrows[csrow].channels[channel].label, msg);
908 909 910 911 912

	mci->ce_count++;
	mci->csrows[csrow].ce_count++;
	mci->csrows[csrow].channels[channel].ce_count++;
}
913
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);