edac_mc.c 22.2 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sysdev.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
30
#include <linux/edac.h>
A
Alan Cox 已提交
31 32 33
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
A
Alan Cox 已提交
36 37

/* lock to memory controller's control array */
38
static DEFINE_MUTEX(mem_ctls_mutex);
A
Alan Cox 已提交
39 40 41 42
static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);

#ifdef CONFIG_EDAC_DEBUG

43
static void edac_mc_dump_channel(struct channel_info *chan)
A
Alan Cox 已提交
44 45 46 47 48 49 50 51
{
	debugf4("\tchannel = %p\n", chan);
	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
	debugf4("\tchannel->label = '%s'\n", chan->label);
	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
}

52
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
53 54 55
{
	debugf4("\tcsrow = %p\n", csrow);
	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56
	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
A
Alan Cox 已提交
57 58 59
	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60
	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
A
Alan Cox 已提交
61 62 63 64
	debugf4("\tcsrow->channels = %p\n", csrow->channels);
	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
}

65
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
66 67 68 69 70 71 72 73
{
	debugf3("\tmci = %p\n", mci);
	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
		mci->nr_csrows, mci->csrows);
74
	debugf3("\tdev = %p\n", mci->dev);
75
	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
A
Alan Cox 已提交
76 77 78
	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
}

79
#endif				/* CONFIG_EDAC_DEBUG */
A
Alan Cox 已提交
80 81 82 83 84 85 86 87

/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 * Adjust 'ptr' so that its alignment is at least as stringent as what the
 * compiler would provide for X and return the aligned result.
 *
 * If 'size' is a constant, the compiler will optimize this whole function
 * down to either a no-op or the addition of a constant to the value of 'ptr'.
 */
88
void *edac_align_ptr(void *ptr, unsigned size)
A
Alan Cox 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
{
	unsigned align, r;

	/* Here we assume that the alignment of a "long long" is the most
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
105
		return (char *)ptr;
A
Alan Cox 已提交
106 107 108 109

	r = size % align;

	if (r == 0)
110
		return (char *)ptr;
A
Alan Cox 已提交
111

112
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
}

/**
 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 * @size_pvt:	size of private storage needed
 * @nr_csrows:	Number of CWROWS needed for this MC
 * @nr_chans:	Number of channels for the MC
 *
 * Everything is kmalloc'ed as one big chunk - more efficient.
 * Only can be used if all structures have the same lifetime - otherwise
 * you have to allocate and initialize your own structures.
 *
 * Use edac_mc_free() to free mc structures allocated by this function.
 *
 * Returns:
 *	NULL allocation failed
 *	struct mem_ctl_info pointer
 */
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
132
				unsigned nr_chans, int edac_index)
A
Alan Cox 已提交
133 134 135 136 137 138 139
{
	struct mem_ctl_info *mci;
	struct csrow_info *csi, *csrow;
	struct channel_info *chi, *chp, *chan;
	void *pvt;
	unsigned size;
	int row, chn;
140
	int err;
A
Alan Cox 已提交
141 142 143 144 145 146

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
147
	mci = (struct mem_ctl_info *)0;
148 149
	csi = edac_align_ptr(&mci[1], sizeof(*csi));
	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
150
	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
151
	size = ((unsigned long)pvt) + sz_pvt;
A
Alan Cox 已提交
152

153 154
	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
A
Alan Cox 已提交
155 156 157 158 159
		return NULL;

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
160 161 162
	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
A
Alan Cox 已提交
163

164 165
	/* setup index and various internal pointers */
	mci->mc_idx = edac_index;
A
Alan Cox 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
	mci->csrows = csi;
	mci->pvt_info = pvt;
	mci->nr_csrows = nr_csrows;

	for (row = 0; row < nr_csrows; row++) {
		csrow = &csi[row];
		csrow->csrow_idx = row;
		csrow->mci = mci;
		csrow->nr_channels = nr_chans;
		chp = &chi[row * nr_chans];
		csrow->channels = chp;

		for (chn = 0; chn < nr_chans; chn++) {
			chan = &chp[chn];
			chan->chan_idx = chn;
			chan->csrow = csrow;
		}
	}

185 186
	mci->op_state = OP_ALLOC;

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
	/*
	 * Initialize the 'root' kobj for the edac_mc controller
	 */
	err = edac_mc_register_sysfs_main_kobj(mci);
	if (err) {
		kfree(mci);
		return NULL;
	}

	/* at this point, the root kobj is valid, and in order to
	 * 'free' the object, then the function:
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
	 * which will perform kobj unregistration and the actual free
	 * will occur during the kobject callback operation
	 */
A
Alan Cox 已提交
202 203
	return mci;
}
204
EXPORT_SYMBOL_GPL(edac_mc_alloc);
A
Alan Cox 已提交
205 206

/**
207 208
 * edac_mc_free
 *	'Free' a previously allocated 'mci' structure
A
Alan Cox 已提交
209 210 211 212
 * @mci: pointer to a struct mem_ctl_info structure
 */
void edac_mc_free(struct mem_ctl_info *mci)
{
213
	edac_mc_unregister_sysfs_main_kobj(mci);
A
Alan Cox 已提交
214
}
215
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
216

217
static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
218 219 220 221
{
	struct mem_ctl_info *mci;
	struct list_head *item;

D
Dave Peterson 已提交
222
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
223 224 225 226

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

227
		if (mci->dev == dev)
A
Alan Cox 已提交
228 229 230 231 232 233
			return mci;
	}

	return NULL;
}

234 235 236 237 238
/*
 * handler for EDAC to check if NMI type handler has asserted interrupt
 */
static int edac_mc_assert_error_check_and_clear(void)
{
239
	int old_state;
240

241
	if (edac_op_state == EDAC_OPSTATE_POLL)
242 243
		return 1;

244 245
	old_state = edac_err_assert;
	edac_err_assert = 0;
246

247
	return old_state;
248 249 250 251 252 253 254 255
}

/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
256
	struct delayed_work *d_work = (struct delayed_work *)work_req;
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

	/* Only poll controllers that are running polled and have a check */
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
		mci->edac_check(mci);

	/*
	 * FIXME: temp place holder for PCI checks,
	 * goes away when we break out PCI
	 */
	edac_pci_do_parity_check();

	mutex_unlock(&mem_ctls_mutex);

	/* Reschedule */
D
Dave Jiang 已提交
274
	queue_delayed_work(edac_workqueue, &mci->work,
275
			msecs_to_jiffies(edac_mc_get_poll_msec()));
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
}

/*
 * edac_mc_workq_setup
 *	initialize a workq item for this mci
 *	passing in the new delay period in msec
 */
void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
{
	debugf0("%s()\n", __func__);

	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
}

/*
 * edac_mc_workq_teardown
 *	stop the workq processing on this mci
 */
void edac_mc_workq_teardown(struct mem_ctl_info *mci)
{
	int status;

	status = cancel_delayed_work(&mci->work);
	if (status == 0) {
		/* workq instance might be running, wait for it */
		flush_workqueue(edac_workqueue);
	}
}

/*
 * edac_reset_delay_period
 */

void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
{
	mutex_lock(&mem_ctls_mutex);

	/* cancel the current workq request */
	edac_mc_workq_teardown(mci);

	/* restart the workq request, with new delay value */
	edac_mc_workq_setup(mci, value);

	mutex_unlock(&mem_ctls_mutex);
}

323 324 325 326
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
 */
327
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
328 329 330 331
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

332
	insert_before = &mc_devices;
A
Alan Cox 已提交
333

334 335
	if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
		goto fail0;
A
Alan Cox 已提交
336

337 338
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
339

340 341 342
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
343

344 345
			insert_before = item;
			break;
A
Alan Cox 已提交
346 347 348 349
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
D
Dave Jiang 已提交
350
	atomic_inc(&edac_handlers);
A
Alan Cox 已提交
351
	return 0;
352

353
fail0:
354
	edac_printk(KERN_WARNING, EDAC_MC,
355 356
		"%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
		dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
357 358
	return 1;

359
fail1:
360
	edac_printk(KERN_WARNING, EDAC_MC,
361 362
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
363
	return 1;
A
Alan Cox 已提交
364 365
}

D
Dave Peterson 已提交
366
static void complete_mc_list_del(struct rcu_head *head)
367 368 369 370 371 372 373 374
{
	struct mem_ctl_info *mci;

	mci = container_of(head, struct mem_ctl_info, rcu);
	INIT_LIST_HEAD(&mci->link);
	complete(&mci->complete);
}

D
Dave Peterson 已提交
375
static void del_mc_from_global_list(struct mem_ctl_info *mci)
376
{
D
Dave Jiang 已提交
377
	atomic_dec(&edac_handlers);
378 379 380 381 382 383
	list_del_rcu(&mci->link);
	init_completion(&mci->complete);
	call_rcu(&mci->rcu, complete_mc_list_del);
	wait_for_completion(&mci->complete);
}

384 385 386 387 388 389 390 391
/**
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 *
 * If found, return a pointer to the structure.
 * Else return NULL.
 *
 * Caller must hold mem_ctls_mutex.
 */
392
struct mem_ctl_info *edac_mc_find(int idx)
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
{
	struct list_head *item;
	struct mem_ctl_info *mci;

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->mc_idx >= idx) {
			if (mci->mc_idx == idx)
				return mci;

			break;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(edac_mc_find);

A
Alan Cox 已提交
412
/**
413 414
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 *                 create sysfs entries associated with mci structure
A
Alan Cox 已提交
415
 * @mci: pointer to the mci structure to be added to the list
416
 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
A
Alan Cox 已提交
417 418 419 420 421 422 423
 *
 * Return:
 *	0	Success
 *	!0	Failure
 */

/* FIXME - should a warning be printed if no error detection? correction? */
424
int edac_mc_add_mc(struct mem_ctl_info *mci)
A
Alan Cox 已提交
425
{
D
Dave Peterson 已提交
426
	debugf0("%s()\n", __func__);
427

A
Alan Cox 已提交
428 429 430
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
431

A
Alan Cox 已提交
432 433 434 435 436
	if (edac_debug_level >= 4) {
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
			int j;
D
Dave Peterson 已提交
437

A
Alan Cox 已提交
438 439
			edac_mc_dump_csrow(&mci->csrows[i]);
			for (j = 0; j < mci->csrows[i].nr_channels; j++)
440
				edac_mc_dump_channel(&mci->csrows[i].
441
						channels[j]);
A
Alan Cox 已提交
442 443 444
		}
	}
#endif
445
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
446 447

	if (add_mc_to_global_list(mci))
448
		goto fail0;
A
Alan Cox 已提交
449 450 451 452

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

453 454
	if (edac_create_sysfs_mci_device(mci)) {
		edac_mc_printk(mci, KERN_WARNING,
455
			"failed to create sysfs device\n");
456 457
		goto fail1;
	}
A
Alan Cox 已提交
458

459 460 461 462 463 464 465 466 467 468
	/* If there IS a check routine, then we are running POLLED */
	if (mci->edac_check != NULL) {
		/* This instance is NOW RUNNING */
		mci->op_state = OP_RUNNING_POLL;

		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
469
	/* Report action taken */
470
	edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
471
		mci->mod_name, mci->ctl_name, dev_name(mci));
A
Alan Cox 已提交
472

473
	mutex_unlock(&mem_ctls_mutex);
474
	return 0;
A
Alan Cox 已提交
475

476
fail1:
477 478
	del_mc_from_global_list(mci);

479
fail0:
480
	mutex_unlock(&mem_ctls_mutex);
481
	return 1;
A
Alan Cox 已提交
482
}
483
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
A
Alan Cox 已提交
484 485

/**
486 487
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 *                 remove mci structure from global list
488
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
A
Alan Cox 已提交
489
 *
490
 * Return pointer to removed mci structure, or NULL if device not found.
A
Alan Cox 已提交
491
 */
492
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
493
{
494
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
495

496
	debugf0("MC: %s()\n", __func__);
497
	mutex_lock(&mem_ctls_mutex);
498

499
	if ((mci = find_mci_by_dev(dev)) == NULL) {
500
		mutex_unlock(&mem_ctls_mutex);
501 502 503
		return NULL;
	}

504 505 506 507 508 509
	/* marking MCI offline */
	mci->op_state = OP_OFFLINE;

	/* flush workq processes */
	edac_mc_workq_teardown(mci);

510
	edac_remove_sysfs_mci_device(mci);
A
Alan Cox 已提交
511
	del_mc_from_global_list(mci);
512
	mutex_unlock(&mem_ctls_mutex);
D
Dave Peterson 已提交
513
	edac_printk(KERN_INFO, EDAC_MC,
514 515
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
		mci->mod_name, mci->ctl_name, dev_name(mci));
516
	return mci;
A
Alan Cox 已提交
517
}
518
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
519

520 521
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
522 523 524 525 526
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

D
Dave Peterson 已提交
527
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
528 529

	/* ECC error page was not in our memory. Ignore it. */
530
	if (!pfn_valid(page))
A
Alan Cox 已提交
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);

	/* Perform architecture specific atomic scrub operation */
	atomic_scrub(virt_addr + offset, size);

	/* Unmap and complete */
	kunmap_atomic(virt_addr, KM_BOUNCE_READ);

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
552
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
553 554 555 556
{
	struct csrow_info *csrows = mci->csrows;
	int row, i;

D
Dave Peterson 已提交
557
	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
A
Alan Cox 已提交
558 559 560 561 562 563 564 565
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
		struct csrow_info *csrow = &csrows[i];

		if (csrow->nr_pages == 0)
			continue;

D
Dave Peterson 已提交
566 567 568 569
		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
			"mask(0x%lx)\n", mci->mc_idx, __func__,
			csrow->first_page, page, csrow->last_page,
			csrow->page_mask);
A
Alan Cox 已提交
570 571 572 573 574 575 576 577 578 579 580

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
581
		edac_mc_printk(mci, KERN_ERR,
582 583
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
584 585 586

	return row;
}
587
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
588 589 590 591

/* FIXME - setable log (warning/emerg) levels */
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
void edac_mc_handle_ce(struct mem_ctl_info *mci,
592 593 594
		unsigned long page_frame_number,
		unsigned long offset_in_page, unsigned long syndrome,
		int row, int channel, const char *msg)
A
Alan Cox 已提交
595 596 597
{
	unsigned long remapped_page;

D
Dave Peterson 已提交
598
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
599 600 601 602

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
603
		edac_mc_printk(mci, KERN_ERR,
604 605
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
606 607 608
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
D
Dave Peterson 已提交
609

A
Alan Cox 已提交
610 611
	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
612
		edac_mc_printk(mci, KERN_ERR,
613 614 615
			"INTERNAL ERROR: channel out of range "
			"(%d >= %d)\n", channel,
			mci->csrows[row].nr_channels);
A
Alan Cox 已提交
616 617 618 619
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
620
	if (edac_mc_get_log_ce())
A
Alan Cox 已提交
621
		/* FIXME - put in DIMM location */
D
Dave Peterson 已提交
622
		edac_mc_printk(mci, KERN_WARNING,
623 624 625 626 627
			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, syndrome, row, channel,
			mci->csrows[row].channels[channel].label, msg);
A
Alan Cox 已提交
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643

	mci->ce_count++;
	mci->csrows[row].ce_count++;
	mci->csrows[row].channels[channel].ce_count++;

	if (mci->scrub_mode & SCRUB_SW_SRC) {
		/*
		 * Some MC's can remap memory so that it is still available
		 * at a different address when PCI devices map into memory.
		 * MC's that can't do this lose the memory where PCI devices
		 * are mapped.  This mapping is MC dependant and so we call
		 * back into the MC driver for it to map the MC page to
		 * a physical (CPU) page which can then be mapped to a virtual
		 * page - which can then be scrubbed.
		 */
		remapped_page = mci->ctl_page_to_phys ?
644 645
			mci->ctl_page_to_phys(mci, page_frame_number) :
			page_frame_number;
A
Alan Cox 已提交
646 647

		edac_mc_scrub_block(remapped_page, offset_in_page,
648
				mci->csrows[row].grain);
A
Alan Cox 已提交
649 650
	}
}
651
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
A
Alan Cox 已提交
652

D
Dave Peterson 已提交
653
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
654
{
D
Dave Jiang 已提交
655
	if (edac_mc_get_log_ce())
D
Dave Peterson 已提交
656
		edac_mc_printk(mci, KERN_WARNING,
657
			"CE - no information available: %s\n", msg);
D
Dave Peterson 已提交
658

A
Alan Cox 已提交
659 660 661
	mci->ce_noinfo_count++;
	mci->ce_count++;
}
662
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
A
Alan Cox 已提交
663 664

void edac_mc_handle_ue(struct mem_ctl_info *mci,
665 666
		unsigned long page_frame_number,
		unsigned long offset_in_page, int row, const char *msg)
A
Alan Cox 已提交
667 668 669 670 671 672 673
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chan;
	int chars;

D
Dave Peterson 已提交
674
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
675 676 677 678

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
679
		edac_mc_printk(mci, KERN_ERR,
680 681
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
682 683 684 685 686
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	chars = snprintf(pos, len + 1, "%s",
687
			 mci->csrows[row].channels[0].label);
A
Alan Cox 已提交
688 689
	len -= chars;
	pos += chars;
D
Dave Peterson 已提交
690

A
Alan Cox 已提交
691
	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
692
		chan++) {
A
Alan Cox 已提交
693
		chars = snprintf(pos, len + 1, ":%s",
694
				 mci->csrows[row].channels[chan].label);
A
Alan Cox 已提交
695 696 697 698
		len -= chars;
		pos += chars;
	}

D
Dave Jiang 已提交
699
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
700
		edac_mc_printk(mci, KERN_EMERG,
701 702 703 704
			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
			"labels \"%s\": %s\n", page_frame_number,
			offset_in_page, mci->csrows[row].grain, row,
			labels, msg);
A
Alan Cox 已提交
705

D
Dave Jiang 已提交
706
	if (edac_mc_get_panic_on_ue())
D
Dave Peterson 已提交
707
		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
708 709 710
			"row %d, labels \"%s\": %s\n", mci->mc_idx,
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, row, labels, msg);
A
Alan Cox 已提交
711 712 713 714

	mci->ue_count++;
	mci->csrows[row].ue_count++;
}
715
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
A
Alan Cox 已提交
716

D
Dave Peterson 已提交
717
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
718
{
D
Dave Jiang 已提交
719
	if (edac_mc_get_panic_on_ue())
A
Alan Cox 已提交
720 721
		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);

D
Dave Jiang 已提交
722
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
723
		edac_mc_printk(mci, KERN_WARNING,
724
			"UE - no information available: %s\n", msg);
A
Alan Cox 已提交
725 726 727
	mci->ue_noinfo_count++;
	mci->ue_count++;
}
728
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
A
Alan Cox 已提交
729

730 731 732 733 734
/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process UE events
 */
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
735 736 737
			unsigned int csrow,
			unsigned int channela,
			unsigned int channelb, char *msg)
738 739 740 741 742 743 744 745 746
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chars;

	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
747 748
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
749 750 751 752 753 754 755
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channela >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
756 757 758
			"INTERNAL ERROR: channel-a out of range "
			"(%d >= %d)\n",
			channela, mci->csrows[csrow].nr_channels);
759 760 761 762 763 764 765
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channelb >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
766 767 768
			"INTERNAL ERROR: channel-b out of range "
			"(%d >= %d)\n",
			channelb, mci->csrows[csrow].nr_channels);
769 770 771 772 773 774 775 776 777 778
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	mci->ue_count++;
	mci->csrows[csrow].ue_count++;

	/* Generate the DIMM labels from the specified channels */
	chars = snprintf(pos, len + 1, "%s",
			 mci->csrows[csrow].channels[channela].label);
779 780
	len -= chars;
	pos += chars;
781 782 783
	chars = snprintf(pos, len + 1, "-%s",
			 mci->csrows[csrow].channels[channelb].label);

D
Dave Jiang 已提交
784
	if (edac_mc_get_log_ue())
785
		edac_mc_printk(mci, KERN_EMERG,
786 787 788
			"UE row %d, channel-a= %d channel-b= %d "
			"labels \"%s\": %s\n", csrow, channela, channelb,
			labels, msg);
789

D
Dave Jiang 已提交
790
	if (edac_mc_get_panic_on_ue())
791
		panic("UE row %d, channel-a= %d channel-b= %d "
792 793
			"labels \"%s\": %s\n", csrow, channela,
			channelb, labels, msg);
794 795 796 797 798 799 800 801
}
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);

/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process CE events
 */
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
802
			unsigned int csrow, unsigned int channel, char *msg)
803 804 805 806 807 808
{

	/* Ensure boundary values */
	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
809 810
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
811 812 813 814 815 816
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
	if (channel >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
817 818
			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
			channel, mci->csrows[csrow].nr_channels);
819 820 821 822
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
823
	if (edac_mc_get_log_ce())
824 825
		/* FIXME - put in DIMM location */
		edac_mc_printk(mci, KERN_WARNING,
826 827 828
			"CE row %d, channel %d, label \"%s\": %s\n",
			csrow, channel,
			mci->csrows[csrow].channels[channel].label, msg);
829 830 831 832 833

	mci->ce_count++;
	mci->csrows[csrow].ce_count++;
	mci->csrows[csrow].channels[channel].ce_count++;
}
834
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
835

A
Alan Cox 已提交
836 837 838
/*
 * Iterate over all MC instances and check for ECC, et al, errors
 */
839
void edac_check_mc_devices(void)
A
Alan Cox 已提交
840 841 842 843
{
	struct list_head *item;
	struct mem_ctl_info *mci;

D
Dave Peterson 已提交
844
	debugf3("%s()\n", __func__);
845
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
846 847 848 849 850 851 852 853

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->edac_check != NULL)
			mci->edac_check(mci);
	}

854
	mutex_unlock(&mem_ctls_mutex);
A
Alan Cox 已提交
855
}