edac_mc.c 23.2 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sysdev.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
30
#include <linux/edac.h>
A
Alan Cox 已提交
31 32 33
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
34
#include "edac_core.h"
35
#include "edac_module.h"
A
Alan Cox 已提交
36 37

/* lock to memory controller's control array */
38
static DEFINE_MUTEX(mem_ctls_mutex);
39
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
40 41 42

#ifdef CONFIG_EDAC_DEBUG

43
static void edac_mc_dump_channel(struct channel_info *chan)
A
Alan Cox 已提交
44 45 46 47 48 49 50 51
{
	debugf4("\tchannel = %p\n", chan);
	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
	debugf4("\tchannel->label = '%s'\n", chan->label);
	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
}

52
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
53 54 55
{
	debugf4("\tcsrow = %p\n", csrow);
	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56
	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
A
Alan Cox 已提交
57 58 59
	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60
	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
A
Alan Cox 已提交
61 62 63 64
	debugf4("\tcsrow->channels = %p\n", csrow->channels);
	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
}

65
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
66 67 68 69 70 71 72 73
{
	debugf3("\tmci = %p\n", mci);
	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
		mci->nr_csrows, mci->csrows);
74
	debugf3("\tdev = %p\n", mci->dev);
75
	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
A
Alan Cox 已提交
76 77 78
	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
}

79
#endif				/* CONFIG_EDAC_DEBUG */
A
Alan Cox 已提交
80 81 82 83 84 85 86 87

/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 * Adjust 'ptr' so that its alignment is at least as stringent as what the
 * compiler would provide for X and return the aligned result.
 *
 * If 'size' is a constant, the compiler will optimize this whole function
 * down to either a no-op or the addition of a constant to the value of 'ptr'.
 */
88
void *edac_align_ptr(void *ptr, unsigned size)
A
Alan Cox 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
{
	unsigned align, r;

	/* Here we assume that the alignment of a "long long" is the most
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
105
		return (char *)ptr;
A
Alan Cox 已提交
106 107 108 109

	r = size % align;

	if (r == 0)
110
		return (char *)ptr;
A
Alan Cox 已提交
111

112
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
}

/**
 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 * @size_pvt:	size of private storage needed
 * @nr_csrows:	Number of CWROWS needed for this MC
 * @nr_chans:	Number of channels for the MC
 *
 * Everything is kmalloc'ed as one big chunk - more efficient.
 * Only can be used if all structures have the same lifetime - otherwise
 * you have to allocate and initialize your own structures.
 *
 * Use edac_mc_free() to free mc structures allocated by this function.
 *
 * Returns:
 *	NULL allocation failed
 *	struct mem_ctl_info pointer
 */
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
132
				unsigned nr_chans, int edac_index)
A
Alan Cox 已提交
133 134 135 136 137 138 139
{
	struct mem_ctl_info *mci;
	struct csrow_info *csi, *csrow;
	struct channel_info *chi, *chp, *chan;
	void *pvt;
	unsigned size;
	int row, chn;
140
	int err;
A
Alan Cox 已提交
141 142 143 144 145 146

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
147
	mci = (struct mem_ctl_info *)0;
148 149
	csi = edac_align_ptr(&mci[1], sizeof(*csi));
	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
150
	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
151
	size = ((unsigned long)pvt) + sz_pvt;
A
Alan Cox 已提交
152

153 154
	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
A
Alan Cox 已提交
155 156 157 158 159
		return NULL;

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
160 161 162
	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
A
Alan Cox 已提交
163

164 165
	/* setup index and various internal pointers */
	mci->mc_idx = edac_index;
A
Alan Cox 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
	mci->csrows = csi;
	mci->pvt_info = pvt;
	mci->nr_csrows = nr_csrows;

	for (row = 0; row < nr_csrows; row++) {
		csrow = &csi[row];
		csrow->csrow_idx = row;
		csrow->mci = mci;
		csrow->nr_channels = nr_chans;
		chp = &chi[row * nr_chans];
		csrow->channels = chp;

		for (chn = 0; chn < nr_chans; chn++) {
			chan = &chp[chn];
			chan->chan_idx = chn;
			chan->csrow = csrow;
		}
	}

185 186
	mci->op_state = OP_ALLOC;

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
	/*
	 * Initialize the 'root' kobj for the edac_mc controller
	 */
	err = edac_mc_register_sysfs_main_kobj(mci);
	if (err) {
		kfree(mci);
		return NULL;
	}

	/* at this point, the root kobj is valid, and in order to
	 * 'free' the object, then the function:
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
	 * which will perform kobj unregistration and the actual free
	 * will occur during the kobject callback operation
	 */
A
Alan Cox 已提交
202 203
	return mci;
}
204
EXPORT_SYMBOL_GPL(edac_mc_alloc);
A
Alan Cox 已提交
205 206

/**
207 208
 * edac_mc_free
 *	'Free' a previously allocated 'mci' structure
A
Alan Cox 已提交
209 210 211 212
 * @mci: pointer to a struct mem_ctl_info structure
 */
void edac_mc_free(struct mem_ctl_info *mci)
{
213
	edac_mc_unregister_sysfs_main_kobj(mci);
A
Alan Cox 已提交
214
}
215
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
216

217 218 219 220 221 222 223

/*
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
 */
224
static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
225 226 227 228
{
	struct mem_ctl_info *mci;
	struct list_head *item;

D
Dave Peterson 已提交
229
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
230 231 232 233

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

234
		if (mci->dev == dev)
A
Alan Cox 已提交
235 236 237 238 239 240
			return mci;
	}

	return NULL;
}

241 242 243 244 245
/*
 * handler for EDAC to check if NMI type handler has asserted interrupt
 */
static int edac_mc_assert_error_check_and_clear(void)
{
246
	int old_state;
247

248
	if (edac_op_state == EDAC_OPSTATE_POLL)
249 250
		return 1;

251 252
	old_state = edac_err_assert;
	edac_err_assert = 0;
253

254
	return old_state;
255 256 257 258 259 260 261 262
}

/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
263
	struct delayed_work *d_work = (struct delayed_work *)work_req;
264 265 266 267
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

268 269 270 271 272 273
	/* if this control struct has movd to offline state, we are done */
	if (mci->op_state == OP_OFFLINE) {
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

274 275 276 277 278 279 280
	/* Only poll controllers that are running polled and have a check */
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

	/* Reschedule */
D
Dave Jiang 已提交
281
	queue_delayed_work(edac_workqueue, &mci->work,
282
			msecs_to_jiffies(edac_mc_get_poll_msec()));
283 284 285 286 287 288
}

/*
 * edac_mc_workq_setup
 *	initialize a workq item for this mci
 *	passing in the new delay period in msec
289 290 291 292
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex held
293
 */
294
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
295 296 297
{
	debugf0("%s()\n", __func__);

298 299 300 301
	/* if this instance is not in the POLL state, then simply return */
	if (mci->op_state != OP_RUNNING_POLL)
		return;

302 303 304 305 306 307 308
	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
}

/*
 * edac_mc_workq_teardown
 *	stop the workq processing on this mci
309 310 311 312
 *
 *	locking model:
 *
 *		called WITHOUT lock held
313
 */
314
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
315 316 317
{
	int status;

318 319 320 321
	status = cancel_delayed_work(&mci->work);
	if (status == 0) {
		debugf0("%s() not canceled, flush the queue\n",
			__func__);
322

323 324
		/* workq instance might be running, wait for it */
		flush_workqueue(edac_workqueue);
325 326 327 328
	}
}

/*
329 330 331 332
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
333
 */
334
void edac_mc_reset_delay_period(int value)
335
{
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	/* scan the list and turn off all workq timers, doing so under lock
	 */
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->op_state == OP_RUNNING_POLL)
			cancel_delayed_work(&mci->work);
	}

	mutex_unlock(&mem_ctls_mutex);
351

352 353

	/* re-walk the list, and reset the poll delay */
354 355
	mutex_lock(&mem_ctls_mutex);

356 357 358 359 360
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		edac_mc_workq_setup(mci, (unsigned long) value);
	}
361 362 363 364

	mutex_unlock(&mem_ctls_mutex);
}

365 366


367 368 369
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
370 371 372 373
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
374
 */
375
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
376 377 378 379
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

380
	insert_before = &mc_devices;
A
Alan Cox 已提交
381

382 383
	p = find_mci_by_dev(mci->dev);
	if (unlikely(p != NULL))
384
		goto fail0;
A
Alan Cox 已提交
385

386 387
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
388

389 390 391
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
392

393 394
			insert_before = item;
			break;
A
Alan Cox 已提交
395 396 397 398
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
D
Dave Jiang 已提交
399
	atomic_inc(&edac_handlers);
A
Alan Cox 已提交
400
	return 0;
401

402
fail0:
403
	edac_printk(KERN_WARNING, EDAC_MC,
404 405
		"%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
		dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
406 407
	return 1;

408
fail1:
409
	edac_printk(KERN_WARNING, EDAC_MC,
410 411
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
412
	return 1;
A
Alan Cox 已提交
413 414
}

D
Dave Peterson 已提交
415
static void complete_mc_list_del(struct rcu_head *head)
416 417 418 419 420 421 422 423
{
	struct mem_ctl_info *mci;

	mci = container_of(head, struct mem_ctl_info, rcu);
	INIT_LIST_HEAD(&mci->link);
	complete(&mci->complete);
}

D
Dave Peterson 已提交
424
static void del_mc_from_global_list(struct mem_ctl_info *mci)
425
{
D
Dave Jiang 已提交
426
	atomic_dec(&edac_handlers);
427 428 429 430 431 432
	list_del_rcu(&mci->link);
	init_completion(&mci->complete);
	call_rcu(&mci->rcu, complete_mc_list_del);
	wait_for_completion(&mci->complete);
}

433 434 435 436 437 438 439 440
/**
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 *
 * If found, return a pointer to the structure.
 * Else return NULL.
 *
 * Caller must hold mem_ctls_mutex.
 */
441
struct mem_ctl_info *edac_mc_find(int idx)
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
{
	struct list_head *item;
	struct mem_ctl_info *mci;

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->mc_idx >= idx) {
			if (mci->mc_idx == idx)
				return mci;

			break;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(edac_mc_find);

A
Alan Cox 已提交
461
/**
462 463
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 *                 create sysfs entries associated with mci structure
A
Alan Cox 已提交
464
 * @mci: pointer to the mci structure to be added to the list
465
 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
A
Alan Cox 已提交
466 467 468 469 470 471 472
 *
 * Return:
 *	0	Success
 *	!0	Failure
 */

/* FIXME - should a warning be printed if no error detection? correction? */
473
int edac_mc_add_mc(struct mem_ctl_info *mci)
A
Alan Cox 已提交
474
{
D
Dave Peterson 已提交
475
	debugf0("%s()\n", __func__);
476

A
Alan Cox 已提交
477 478 479
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
480

A
Alan Cox 已提交
481 482 483 484 485
	if (edac_debug_level >= 4) {
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
			int j;
D
Dave Peterson 已提交
486

A
Alan Cox 已提交
487 488
			edac_mc_dump_csrow(&mci->csrows[i]);
			for (j = 0; j < mci->csrows[i].nr_channels; j++)
489
				edac_mc_dump_channel(&mci->csrows[i].
490
						channels[j]);
A
Alan Cox 已提交
491 492 493
		}
	}
#endif
494
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
495 496

	if (add_mc_to_global_list(mci))
497
		goto fail0;
A
Alan Cox 已提交
498 499 500 501

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

502 503
	if (edac_create_sysfs_mci_device(mci)) {
		edac_mc_printk(mci, KERN_WARNING,
504
			"failed to create sysfs device\n");
505 506
		goto fail1;
	}
A
Alan Cox 已提交
507

508 509 510 511 512 513 514 515 516 517
	/* If there IS a check routine, then we are running POLLED */
	if (mci->edac_check != NULL) {
		/* This instance is NOW RUNNING */
		mci->op_state = OP_RUNNING_POLL;

		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
518
	/* Report action taken */
519 520
	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
		" DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci));
A
Alan Cox 已提交
521

522
	mutex_unlock(&mem_ctls_mutex);
523
	return 0;
A
Alan Cox 已提交
524

525
fail1:
526 527
	del_mc_from_global_list(mci);

528
fail0:
529
	mutex_unlock(&mem_ctls_mutex);
530
	return 1;
A
Alan Cox 已提交
531
}
532
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
A
Alan Cox 已提交
533 534

/**
535 536
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 *                 remove mci structure from global list
537
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
A
Alan Cox 已提交
538
 *
539
 * Return pointer to removed mci structure, or NULL if device not found.
A
Alan Cox 已提交
540
 */
541
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
542
{
543
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
544

545 546
	debugf0("%s()\n", __func__);

547
	mutex_lock(&mem_ctls_mutex);
548

549 550 551
	/* find the requested mci struct in the global list */
	mci = find_mci_by_dev(dev);
	if (mci == NULL) {
552
		mutex_unlock(&mem_ctls_mutex);
553 554 555
		return NULL;
	}

556 557 558
	/* marking MCI offline */
	mci->op_state = OP_OFFLINE;

A
Alan Cox 已提交
559
	del_mc_from_global_list(mci);
560
	mutex_unlock(&mem_ctls_mutex);
561 562 563 564 565

	/* flush workq processes and remove sysfs */
	edac_mc_workq_teardown(mci);
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
566
	edac_printk(KERN_INFO, EDAC_MC,
567 568
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
		mci->mod_name, mci->ctl_name, dev_name(mci));
569

570
	return mci;
A
Alan Cox 已提交
571
}
572
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
573

574 575
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
576 577 578 579 580
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

D
Dave Peterson 已提交
581
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
582 583

	/* ECC error page was not in our memory. Ignore it. */
584
	if (!pfn_valid(page))
A
Alan Cox 已提交
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);

	/* Perform architecture specific atomic scrub operation */
	atomic_scrub(virt_addr + offset, size);

	/* Unmap and complete */
	kunmap_atomic(virt_addr, KM_BOUNCE_READ);

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
606
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
607 608 609 610
{
	struct csrow_info *csrows = mci->csrows;
	int row, i;

D
Dave Peterson 已提交
611
	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
A
Alan Cox 已提交
612 613 614 615 616 617 618 619
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
		struct csrow_info *csrow = &csrows[i];

		if (csrow->nr_pages == 0)
			continue;

D
Dave Peterson 已提交
620 621 622 623
		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
			"mask(0x%lx)\n", mci->mc_idx, __func__,
			csrow->first_page, page, csrow->last_page,
			csrow->page_mask);
A
Alan Cox 已提交
624 625 626 627 628 629 630 631 632 633 634

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
635
		edac_mc_printk(mci, KERN_ERR,
636 637
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
638 639 640

	return row;
}
641
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
642 643 644 645

/* FIXME - setable log (warning/emerg) levels */
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
void edac_mc_handle_ce(struct mem_ctl_info *mci,
646 647 648
		unsigned long page_frame_number,
		unsigned long offset_in_page, unsigned long syndrome,
		int row, int channel, const char *msg)
A
Alan Cox 已提交
649 650 651
{
	unsigned long remapped_page;

D
Dave Peterson 已提交
652
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
653 654 655 656

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
657
		edac_mc_printk(mci, KERN_ERR,
658 659
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
660 661 662
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
D
Dave Peterson 已提交
663

A
Alan Cox 已提交
664 665
	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
666
		edac_mc_printk(mci, KERN_ERR,
667 668 669
			"INTERNAL ERROR: channel out of range "
			"(%d >= %d)\n", channel,
			mci->csrows[row].nr_channels);
A
Alan Cox 已提交
670 671 672 673
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
674
	if (edac_mc_get_log_ce())
A
Alan Cox 已提交
675
		/* FIXME - put in DIMM location */
D
Dave Peterson 已提交
676
		edac_mc_printk(mci, KERN_WARNING,
677 678 679 680 681
			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, syndrome, row, channel,
			mci->csrows[row].channels[channel].label, msg);
A
Alan Cox 已提交
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697

	mci->ce_count++;
	mci->csrows[row].ce_count++;
	mci->csrows[row].channels[channel].ce_count++;

	if (mci->scrub_mode & SCRUB_SW_SRC) {
		/*
		 * Some MC's can remap memory so that it is still available
		 * at a different address when PCI devices map into memory.
		 * MC's that can't do this lose the memory where PCI devices
		 * are mapped.  This mapping is MC dependant and so we call
		 * back into the MC driver for it to map the MC page to
		 * a physical (CPU) page which can then be mapped to a virtual
		 * page - which can then be scrubbed.
		 */
		remapped_page = mci->ctl_page_to_phys ?
698 699
			mci->ctl_page_to_phys(mci, page_frame_number) :
			page_frame_number;
A
Alan Cox 已提交
700 701

		edac_mc_scrub_block(remapped_page, offset_in_page,
702
				mci->csrows[row].grain);
A
Alan Cox 已提交
703 704
	}
}
705
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
A
Alan Cox 已提交
706

D
Dave Peterson 已提交
707
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
708
{
D
Dave Jiang 已提交
709
	if (edac_mc_get_log_ce())
D
Dave Peterson 已提交
710
		edac_mc_printk(mci, KERN_WARNING,
711
			"CE - no information available: %s\n", msg);
D
Dave Peterson 已提交
712

A
Alan Cox 已提交
713 714 715
	mci->ce_noinfo_count++;
	mci->ce_count++;
}
716
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
A
Alan Cox 已提交
717 718

void edac_mc_handle_ue(struct mem_ctl_info *mci,
719 720
		unsigned long page_frame_number,
		unsigned long offset_in_page, int row, const char *msg)
A
Alan Cox 已提交
721 722 723 724 725 726 727
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chan;
	int chars;

D
Dave Peterson 已提交
728
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
729 730 731 732

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
733
		edac_mc_printk(mci, KERN_ERR,
734 735
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
736 737 738 739 740
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	chars = snprintf(pos, len + 1, "%s",
741
			 mci->csrows[row].channels[0].label);
A
Alan Cox 已提交
742 743
	len -= chars;
	pos += chars;
D
Dave Peterson 已提交
744

A
Alan Cox 已提交
745
	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
746
		chan++) {
A
Alan Cox 已提交
747
		chars = snprintf(pos, len + 1, ":%s",
748
				 mci->csrows[row].channels[chan].label);
A
Alan Cox 已提交
749 750 751 752
		len -= chars;
		pos += chars;
	}

D
Dave Jiang 已提交
753
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
754
		edac_mc_printk(mci, KERN_EMERG,
755 756 757 758
			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
			"labels \"%s\": %s\n", page_frame_number,
			offset_in_page, mci->csrows[row].grain, row,
			labels, msg);
A
Alan Cox 已提交
759

D
Dave Jiang 已提交
760
	if (edac_mc_get_panic_on_ue())
D
Dave Peterson 已提交
761
		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
762 763 764
			"row %d, labels \"%s\": %s\n", mci->mc_idx,
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, row, labels, msg);
A
Alan Cox 已提交
765 766 767 768

	mci->ue_count++;
	mci->csrows[row].ue_count++;
}
769
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
A
Alan Cox 已提交
770

D
Dave Peterson 已提交
771
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
772
{
D
Dave Jiang 已提交
773
	if (edac_mc_get_panic_on_ue())
A
Alan Cox 已提交
774 775
		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);

D
Dave Jiang 已提交
776
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
777
		edac_mc_printk(mci, KERN_WARNING,
778
			"UE - no information available: %s\n", msg);
A
Alan Cox 已提交
779 780 781
	mci->ue_noinfo_count++;
	mci->ue_count++;
}
782
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
A
Alan Cox 已提交
783

784 785 786 787 788
/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process UE events
 */
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
789 790 791
			unsigned int csrow,
			unsigned int channela,
			unsigned int channelb, char *msg)
792 793 794 795 796 797 798 799 800
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chars;

	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
801 802
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
803 804 805 806 807 808 809
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channela >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
810 811 812
			"INTERNAL ERROR: channel-a out of range "
			"(%d >= %d)\n",
			channela, mci->csrows[csrow].nr_channels);
813 814 815 816 817 818 819
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channelb >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
820 821 822
			"INTERNAL ERROR: channel-b out of range "
			"(%d >= %d)\n",
			channelb, mci->csrows[csrow].nr_channels);
823 824 825 826 827 828 829 830 831 832
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	mci->ue_count++;
	mci->csrows[csrow].ue_count++;

	/* Generate the DIMM labels from the specified channels */
	chars = snprintf(pos, len + 1, "%s",
			 mci->csrows[csrow].channels[channela].label);
833 834
	len -= chars;
	pos += chars;
835 836 837
	chars = snprintf(pos, len + 1, "-%s",
			 mci->csrows[csrow].channels[channelb].label);

D
Dave Jiang 已提交
838
	if (edac_mc_get_log_ue())
839
		edac_mc_printk(mci, KERN_EMERG,
840 841 842
			"UE row %d, channel-a= %d channel-b= %d "
			"labels \"%s\": %s\n", csrow, channela, channelb,
			labels, msg);
843

D
Dave Jiang 已提交
844
	if (edac_mc_get_panic_on_ue())
845
		panic("UE row %d, channel-a= %d channel-b= %d "
846 847
			"labels \"%s\": %s\n", csrow, channela,
			channelb, labels, msg);
848 849 850 851 852 853 854 855
}
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);

/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process CE events
 */
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
856
			unsigned int csrow, unsigned int channel, char *msg)
857 858 859 860 861 862
{

	/* Ensure boundary values */
	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
863 864
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
865 866 867 868 869 870
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
	if (channel >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
871 872
			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
			channel, mci->csrows[csrow].nr_channels);
873 874 875 876
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
877
	if (edac_mc_get_log_ce())
878 879
		/* FIXME - put in DIMM location */
		edac_mc_printk(mci, KERN_WARNING,
880 881 882
			"CE row %d, channel %d, label \"%s\": %s\n",
			csrow, channel,
			mci->csrows[csrow].channels[channel].label, msg);
883 884 885 886 887

	mci->ce_count++;
	mci->csrows[csrow].ce_count++;
	mci->csrows[csrow].channels[channel].ce_count++;
}
888
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
889

A
Alan Cox 已提交
890 891 892
/*
 * Iterate over all MC instances and check for ECC, et al, errors
 */
893
void edac_check_mc_devices(void)
A
Alan Cox 已提交
894 895 896 897
{
	struct list_head *item;
	struct mem_ctl_info *mci;

D
Dave Peterson 已提交
898
	debugf3("%s()\n", __func__);
899
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
900 901 902 903 904 905 906 907

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->edac_check != NULL)
			mci->edac_check(mci);
	}

908
	mutex_unlock(&mem_ctls_mutex);
A
Alan Cox 已提交
909
}