edac_mc.c 23.4 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
29
#include <linux/edac.h>
A
Alan Cox 已提交
30 31 32
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
33
#include "edac_core.h"
34
#include "edac_module.h"
A
Alan Cox 已提交
35 36

/* lock to memory controller's control array */
37
static DEFINE_MUTEX(mem_ctls_mutex);
38
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
39 40 41

#ifdef CONFIG_EDAC_DEBUG

42
static void edac_mc_dump_channel(struct channel_info *chan)
A
Alan Cox 已提交
43 44 45 46 47 48 49 50
{
	debugf4("\tchannel = %p\n", chan);
	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
	debugf4("\tchannel->label = '%s'\n", chan->label);
	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
}

51
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
52 53 54
{
	debugf4("\tcsrow = %p\n", csrow);
	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
55
	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
A
Alan Cox 已提交
56 57 58
	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
59
	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
A
Alan Cox 已提交
60 61 62 63
	debugf4("\tcsrow->channels = %p\n", csrow->channels);
	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
}

64
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
65 66 67 68 69 70 71 72
{
	debugf3("\tmci = %p\n", mci);
	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
		mci->nr_csrows, mci->csrows);
73
	debugf3("\tdev = %p\n", mci->dev);
74
	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
A
Alan Cox 已提交
75 76 77
	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
}

78 79
#endif				/* CONFIG_EDAC_DEBUG */

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
/*
 * keep those in sync with the enum mem_type
 */
const char *edac_mem_types[] = {
	"Empty csrow",
	"Reserved csrow type",
	"Unknown csrow type",
	"Fast page mode RAM",
	"Extended data out RAM",
	"Burst Extended data out RAM",
	"Single data rate SDRAM",
	"Registered single data rate SDRAM",
	"Double data rate SDRAM",
	"Registered Double data rate SDRAM",
	"Rambus DRAM",
	"Unbuffered DDR2 RAM",
	"Fully buffered DDR2",
	"Registered DDR2 RAM",
	"Rambus XDR",
	"Unbuffered DDR3 RAM",
	"Registered DDR3 RAM",
};
EXPORT_SYMBOL_GPL(edac_mem_types);

A
Alan Cox 已提交
104 105 106 107 108 109 110
/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
 * Adjust 'ptr' so that its alignment is at least as stringent as what the
 * compiler would provide for X and return the aligned result.
 *
 * If 'size' is a constant, the compiler will optimize this whole function
 * down to either a no-op or the addition of a constant to the value of 'ptr'.
 */
111
void *edac_align_ptr(void *ptr, unsigned size)
A
Alan Cox 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
{
	unsigned align, r;

	/* Here we assume that the alignment of a "long long" is the most
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
128
		return (char *)ptr;
A
Alan Cox 已提交
129 130 131 132

	r = size % align;

	if (r == 0)
133
		return (char *)ptr;
A
Alan Cox 已提交
134

135
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
}

/**
 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 * @size_pvt:	size of private storage needed
 * @nr_csrows:	Number of CWROWS needed for this MC
 * @nr_chans:	Number of channels for the MC
 *
 * Everything is kmalloc'ed as one big chunk - more efficient.
 * Only can be used if all structures have the same lifetime - otherwise
 * you have to allocate and initialize your own structures.
 *
 * Use edac_mc_free() to free mc structures allocated by this function.
 *
 * Returns:
 *	NULL allocation failed
 *	struct mem_ctl_info pointer
 */
struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
155
				unsigned nr_chans, int edac_index)
A
Alan Cox 已提交
156 157 158 159 160 161 162
{
	struct mem_ctl_info *mci;
	struct csrow_info *csi, *csrow;
	struct channel_info *chi, *chp, *chan;
	void *pvt;
	unsigned size;
	int row, chn;
163
	int err;
A
Alan Cox 已提交
164 165 166 167 168 169

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
170
	mci = (struct mem_ctl_info *)0;
171 172
	csi = edac_align_ptr(&mci[1], sizeof(*csi));
	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
173
	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
174
	size = ((unsigned long)pvt) + sz_pvt;
A
Alan Cox 已提交
175

176 177
	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
A
Alan Cox 已提交
178 179 180 181 182
		return NULL;

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
183 184 185
	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
A
Alan Cox 已提交
186

187 188
	/* setup index and various internal pointers */
	mci->mc_idx = edac_index;
A
Alan Cox 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
	mci->csrows = csi;
	mci->pvt_info = pvt;
	mci->nr_csrows = nr_csrows;

	for (row = 0; row < nr_csrows; row++) {
		csrow = &csi[row];
		csrow->csrow_idx = row;
		csrow->mci = mci;
		csrow->nr_channels = nr_chans;
		chp = &chi[row * nr_chans];
		csrow->channels = chp;

		for (chn = 0; chn < nr_chans; chn++) {
			chan = &chp[chn];
			chan->chan_idx = chn;
			chan->csrow = csrow;
		}
	}

208
	mci->op_state = OP_ALLOC;
209
	INIT_LIST_HEAD(&mci->grp_kobj_list);
210

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	/*
	 * Initialize the 'root' kobj for the edac_mc controller
	 */
	err = edac_mc_register_sysfs_main_kobj(mci);
	if (err) {
		kfree(mci);
		return NULL;
	}

	/* at this point, the root kobj is valid, and in order to
	 * 'free' the object, then the function:
	 *      edac_mc_unregister_sysfs_main_kobj() must be called
	 * which will perform kobj unregistration and the actual free
	 * will occur during the kobject callback operation
	 */
A
Alan Cox 已提交
226 227
	return mci;
}
228
EXPORT_SYMBOL_GPL(edac_mc_alloc);
A
Alan Cox 已提交
229 230

/**
231 232
 * edac_mc_free
 *	'Free' a previously allocated 'mci' structure
A
Alan Cox 已提交
233 234 235 236
 * @mci: pointer to a struct mem_ctl_info structure
 */
void edac_mc_free(struct mem_ctl_info *mci)
{
237 238
	debugf1("%s()\n", __func__);

239
	edac_mc_unregister_sysfs_main_kobj(mci);
240 241 242

	/* free the mci instance memory here */
	kfree(mci);
A
Alan Cox 已提交
243
}
244
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
245

246

247
/**
248 249 250 251
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
252
 * @dev: pointer to a struct device related with the MCI
253
 */
254
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
255 256 257 258
{
	struct mem_ctl_info *mci;
	struct list_head *item;

D
Dave Peterson 已提交
259
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
260 261 262 263

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

264
		if (mci->dev == dev)
A
Alan Cox 已提交
265 266 267 268 269
			return mci;
	}

	return NULL;
}
270
EXPORT_SYMBOL_GPL(find_mci_by_dev);
A
Alan Cox 已提交
271

272 273 274 275 276
/*
 * handler for EDAC to check if NMI type handler has asserted interrupt
 */
static int edac_mc_assert_error_check_and_clear(void)
{
277
	int old_state;
278

279
	if (edac_op_state == EDAC_OPSTATE_POLL)
280 281
		return 1;

282 283
	old_state = edac_err_assert;
	edac_err_assert = 0;
284

285
	return old_state;
286 287 288 289 290 291 292 293
}

/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
J
Jean Delvare 已提交
294
	struct delayed_work *d_work = to_delayed_work(work_req);
295 296 297 298
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

299 300 301 302 303 304
	/* if this control struct has movd to offline state, we are done */
	if (mci->op_state == OP_OFFLINE) {
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

305 306 307 308 309 310 311
	/* Only poll controllers that are running polled and have a check */
	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

	/* Reschedule */
D
Dave Jiang 已提交
312
	queue_delayed_work(edac_workqueue, &mci->work,
313
			msecs_to_jiffies(edac_mc_get_poll_msec()));
314 315 316 317 318 319
}

/*
 * edac_mc_workq_setup
 *	initialize a workq item for this mci
 *	passing in the new delay period in msec
320 321 322 323
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex held
324
 */
325
static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
326 327 328
{
	debugf0("%s()\n", __func__);

329 330 331 332
	/* if this instance is not in the POLL state, then simply return */
	if (mci->op_state != OP_RUNNING_POLL)
		return;

333 334 335 336 337 338 339
	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
}

/*
 * edac_mc_workq_teardown
 *	stop the workq processing on this mci
340 341 342 343
 *
 *	locking model:
 *
 *		called WITHOUT lock held
344
 */
345
static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
346 347 348
{
	int status;

349 350 351
	if (mci->op_state != OP_RUNNING_POLL)
		return;

352 353 354 355
	status = cancel_delayed_work(&mci->work);
	if (status == 0) {
		debugf0("%s() not canceled, flush the queue\n",
			__func__);
356

357 358
		/* workq instance might be running, wait for it */
		flush_workqueue(edac_workqueue);
359 360 361 362
	}
}

/*
363 364 365 366
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
367
 */
368
void edac_mc_reset_delay_period(int value)
369
{
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	/* scan the list and turn off all workq timers, doing so under lock
	 */
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->op_state == OP_RUNNING_POLL)
			cancel_delayed_work(&mci->work);
	}

	mutex_unlock(&mem_ctls_mutex);
385

386 387

	/* re-walk the list, and reset the poll delay */
388 389
	mutex_lock(&mem_ctls_mutex);

390 391 392 393 394
	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		edac_mc_workq_setup(mci, (unsigned long) value);
	}
395 396 397 398

	mutex_unlock(&mem_ctls_mutex);
}

399 400


401 402 403
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
404 405 406 407
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
408
 */
409
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
410 411 412 413
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

414
	insert_before = &mc_devices;
A
Alan Cox 已提交
415

416 417
	p = find_mci_by_dev(mci->dev);
	if (unlikely(p != NULL))
418
		goto fail0;
A
Alan Cox 已提交
419

420 421
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
422

423 424 425
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
426

427 428
			insert_before = item;
			break;
A
Alan Cox 已提交
429 430 431 432
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
D
Dave Jiang 已提交
433
	atomic_inc(&edac_handlers);
A
Alan Cox 已提交
434
	return 0;
435

436
fail0:
437
	edac_printk(KERN_WARNING, EDAC_MC,
438
		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
439
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
440 441
	return 1;

442
fail1:
443
	edac_printk(KERN_WARNING, EDAC_MC,
444 445
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
446
	return 1;
A
Alan Cox 已提交
447 448
}

D
Dave Peterson 已提交
449
static void del_mc_from_global_list(struct mem_ctl_info *mci)
450
{
D
Dave Jiang 已提交
451
	atomic_dec(&edac_handlers);
452
	list_del_rcu(&mci->link);
453 454 455 456 457 458

	/* these are for safe removal of devices from global list while
	 * NMI handlers may be traversing list
	 */
	synchronize_rcu();
	INIT_LIST_HEAD(&mci->link);
459 460
}

461 462 463 464 465 466 467 468
/**
 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 *
 * If found, return a pointer to the structure.
 * Else return NULL.
 *
 * Caller must hold mem_ctls_mutex.
 */
469
struct mem_ctl_info *edac_mc_find(int idx)
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
{
	struct list_head *item;
	struct mem_ctl_info *mci;

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

		if (mci->mc_idx >= idx) {
			if (mci->mc_idx == idx)
				return mci;

			break;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(edac_mc_find);

A
Alan Cox 已提交
489
/**
490 491
 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 *                 create sysfs entries associated with mci structure
A
Alan Cox 已提交
492
 * @mci: pointer to the mci structure to be added to the list
493
 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
A
Alan Cox 已提交
494 495 496 497 498 499 500
 *
 * Return:
 *	0	Success
 *	!0	Failure
 */

/* FIXME - should a warning be printed if no error detection? correction? */
501
int edac_mc_add_mc(struct mem_ctl_info *mci)
A
Alan Cox 已提交
502
{
D
Dave Peterson 已提交
503
	debugf0("%s()\n", __func__);
504

A
Alan Cox 已提交
505 506 507
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
508

A
Alan Cox 已提交
509 510 511 512 513
	if (edac_debug_level >= 4) {
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
			int j;
D
Dave Peterson 已提交
514

A
Alan Cox 已提交
515 516
			edac_mc_dump_csrow(&mci->csrows[i]);
			for (j = 0; j < mci->csrows[i].nr_channels; j++)
517
				edac_mc_dump_channel(&mci->csrows[i].
518
						channels[j]);
A
Alan Cox 已提交
519 520 521
		}
	}
#endif
522
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
523 524

	if (add_mc_to_global_list(mci))
525
		goto fail0;
A
Alan Cox 已提交
526 527 528 529

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

530 531
	if (edac_create_sysfs_mci_device(mci)) {
		edac_mc_printk(mci, KERN_WARNING,
532
			"failed to create sysfs device\n");
533 534
		goto fail1;
	}
A
Alan Cox 已提交
535

536 537 538 539 540 541 542 543 544 545
	/* If there IS a check routine, then we are running POLLED */
	if (mci->edac_check != NULL) {
		/* This instance is NOW RUNNING */
		mci->op_state = OP_RUNNING_POLL;

		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
546
	/* Report action taken */
547
	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
548
		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
A
Alan Cox 已提交
549

550
	mutex_unlock(&mem_ctls_mutex);
551
	return 0;
A
Alan Cox 已提交
552

553
fail1:
554 555
	del_mc_from_global_list(mci);

556
fail0:
557
	mutex_unlock(&mem_ctls_mutex);
558
	return 1;
A
Alan Cox 已提交
559
}
560
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
A
Alan Cox 已提交
561 562

/**
563 564
 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 *                 remove mci structure from global list
565
 * @pdev: Pointer to 'struct device' representing mci structure to remove.
A
Alan Cox 已提交
566
 *
567
 * Return pointer to removed mci structure, or NULL if device not found.
A
Alan Cox 已提交
568
 */
569
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
570
{
571
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
572

573 574
	debugf0("%s()\n", __func__);

575
	mutex_lock(&mem_ctls_mutex);
576

577 578 579
	/* find the requested mci struct in the global list */
	mci = find_mci_by_dev(dev);
	if (mci == NULL) {
580
		mutex_unlock(&mem_ctls_mutex);
581 582 583
		return NULL;
	}

A
Alan Cox 已提交
584
	del_mc_from_global_list(mci);
585
	mutex_unlock(&mem_ctls_mutex);
586

587
	/* flush workq processes */
588
	edac_mc_workq_teardown(mci);
589 590 591 592 593

	/* marking MCI offline */
	mci->op_state = OP_OFFLINE;

	/* remove from sysfs */
594 595
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
596
	edac_printk(KERN_INFO, EDAC_MC,
597
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
598
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
599

600
	return mci;
A
Alan Cox 已提交
601
}
602
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
603

604 605
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
606 607 608 609 610
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

D
Dave Peterson 已提交
611
	debugf3("%s()\n", __func__);
A
Alan Cox 已提交
612 613

	/* ECC error page was not in our memory. Ignore it. */
614
	if (!pfn_valid(page))
A
Alan Cox 已提交
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);

	/* Perform architecture specific atomic scrub operation */
	atomic_scrub(virt_addr + offset, size);

	/* Unmap and complete */
	kunmap_atomic(virt_addr, KM_BOUNCE_READ);

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
636
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
637 638 639 640
{
	struct csrow_info *csrows = mci->csrows;
	int row, i;

D
Dave Peterson 已提交
641
	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
A
Alan Cox 已提交
642 643 644 645 646 647 648 649
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
		struct csrow_info *csrow = &csrows[i];

		if (csrow->nr_pages == 0)
			continue;

D
Dave Peterson 已提交
650 651 652 653
		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
			"mask(0x%lx)\n", mci->mc_idx, __func__,
			csrow->first_page, page, csrow->last_page,
			csrow->page_mask);
A
Alan Cox 已提交
654 655 656 657 658 659 660 661 662 663 664

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
665
		edac_mc_printk(mci, KERN_ERR,
666 667
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
668 669 670

	return row;
}
671
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
672 673 674 675

/* FIXME - setable log (warning/emerg) levels */
/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
void edac_mc_handle_ce(struct mem_ctl_info *mci,
676 677 678
		unsigned long page_frame_number,
		unsigned long offset_in_page, unsigned long syndrome,
		int row, int channel, const char *msg)
A
Alan Cox 已提交
679 680 681
{
	unsigned long remapped_page;

D
Dave Peterson 已提交
682
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
683 684 685 686

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
687
		edac_mc_printk(mci, KERN_ERR,
688 689
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
690 691 692
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
D
Dave Peterson 已提交
693

A
Alan Cox 已提交
694 695
	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
696
		edac_mc_printk(mci, KERN_ERR,
697 698 699
			"INTERNAL ERROR: channel out of range "
			"(%d >= %d)\n", channel,
			mci->csrows[row].nr_channels);
A
Alan Cox 已提交
700 701 702 703
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
704
	if (edac_mc_get_log_ce())
A
Alan Cox 已提交
705
		/* FIXME - put in DIMM location */
D
Dave Peterson 已提交
706
		edac_mc_printk(mci, KERN_WARNING,
707 708 709 710 711
			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, syndrome, row, channel,
			mci->csrows[row].channels[channel].label, msg);
A
Alan Cox 已提交
712 713 714 715 716 717 718 719 720 721

	mci->ce_count++;
	mci->csrows[row].ce_count++;
	mci->csrows[row].channels[channel].ce_count++;

	if (mci->scrub_mode & SCRUB_SW_SRC) {
		/*
		 * Some MC's can remap memory so that it is still available
		 * at a different address when PCI devices map into memory.
		 * MC's that can't do this lose the memory where PCI devices
L
Lucas De Marchi 已提交
722
		 * are mapped.  This mapping is MC dependent and so we call
A
Alan Cox 已提交
723 724 725 726 727
		 * back into the MC driver for it to map the MC page to
		 * a physical (CPU) page which can then be mapped to a virtual
		 * page - which can then be scrubbed.
		 */
		remapped_page = mci->ctl_page_to_phys ?
728 729
			mci->ctl_page_to_phys(mci, page_frame_number) :
			page_frame_number;
A
Alan Cox 已提交
730 731

		edac_mc_scrub_block(remapped_page, offset_in_page,
732
				mci->csrows[row].grain);
A
Alan Cox 已提交
733 734
	}
}
735
EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
A
Alan Cox 已提交
736

D
Dave Peterson 已提交
737
void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
738
{
D
Dave Jiang 已提交
739
	if (edac_mc_get_log_ce())
D
Dave Peterson 已提交
740
		edac_mc_printk(mci, KERN_WARNING,
741
			"CE - no information available: %s\n", msg);
D
Dave Peterson 已提交
742

A
Alan Cox 已提交
743 744 745
	mci->ce_noinfo_count++;
	mci->ce_count++;
}
746
EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
A
Alan Cox 已提交
747 748

void edac_mc_handle_ue(struct mem_ctl_info *mci,
749 750
		unsigned long page_frame_number,
		unsigned long offset_in_page, int row, const char *msg)
A
Alan Cox 已提交
751 752 753 754 755 756 757
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chan;
	int chars;

D
Dave Peterson 已提交
758
	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
A
Alan Cox 已提交
759 760 761 762

	/* FIXME - maybe make panic on INTERNAL ERROR an option */
	if (row >= mci->nr_csrows || row < 0) {
		/* something is wrong */
D
Dave Peterson 已提交
763
		edac_mc_printk(mci, KERN_ERR,
764 765
			"INTERNAL ERROR: row out of range "
			"(%d >= %d)\n", row, mci->nr_csrows);
A
Alan Cox 已提交
766 767 768 769 770
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	chars = snprintf(pos, len + 1, "%s",
771
			 mci->csrows[row].channels[0].label);
A
Alan Cox 已提交
772 773
	len -= chars;
	pos += chars;
D
Dave Peterson 已提交
774

A
Alan Cox 已提交
775
	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
776
		chan++) {
A
Alan Cox 已提交
777
		chars = snprintf(pos, len + 1, ":%s",
778
				 mci->csrows[row].channels[chan].label);
A
Alan Cox 已提交
779 780 781 782
		len -= chars;
		pos += chars;
	}

D
Dave Jiang 已提交
783
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
784
		edac_mc_printk(mci, KERN_EMERG,
785 786 787 788
			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
			"labels \"%s\": %s\n", page_frame_number,
			offset_in_page, mci->csrows[row].grain, row,
			labels, msg);
A
Alan Cox 已提交
789

D
Dave Jiang 已提交
790
	if (edac_mc_get_panic_on_ue())
D
Dave Peterson 已提交
791
		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
792 793 794
			"row %d, labels \"%s\": %s\n", mci->mc_idx,
			page_frame_number, offset_in_page,
			mci->csrows[row].grain, row, labels, msg);
A
Alan Cox 已提交
795 796 797 798

	mci->ue_count++;
	mci->csrows[row].ue_count++;
}
799
EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
A
Alan Cox 已提交
800

D
Dave Peterson 已提交
801
void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
A
Alan Cox 已提交
802
{
D
Dave Jiang 已提交
803
	if (edac_mc_get_panic_on_ue())
A
Alan Cox 已提交
804 805
		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);

D
Dave Jiang 已提交
806
	if (edac_mc_get_log_ue())
D
Dave Peterson 已提交
807
		edac_mc_printk(mci, KERN_WARNING,
808
			"UE - no information available: %s\n", msg);
A
Alan Cox 已提交
809 810 811
	mci->ue_noinfo_count++;
	mci->ue_count++;
}
812
EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
A
Alan Cox 已提交
813

814 815 816 817 818
/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process UE events
 */
void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
819 820 821
			unsigned int csrow,
			unsigned int channela,
			unsigned int channelb, char *msg)
822 823 824 825 826 827 828 829 830
{
	int len = EDAC_MC_LABEL_LEN * 4;
	char labels[len + 1];
	char *pos = labels;
	int chars;

	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
831 832
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
833 834 835 836 837 838 839
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channela >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
840 841 842
			"INTERNAL ERROR: channel-a out of range "
			"(%d >= %d)\n",
			channela, mci->csrows[csrow].nr_channels);
843 844 845 846 847 848 849
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	if (channelb >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
850 851 852
			"INTERNAL ERROR: channel-b out of range "
			"(%d >= %d)\n",
			channelb, mci->csrows[csrow].nr_channels);
853 854 855 856 857 858 859 860 861 862
		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
		return;
	}

	mci->ue_count++;
	mci->csrows[csrow].ue_count++;

	/* Generate the DIMM labels from the specified channels */
	chars = snprintf(pos, len + 1, "%s",
			 mci->csrows[csrow].channels[channela].label);
863 864
	len -= chars;
	pos += chars;
865 866 867
	chars = snprintf(pos, len + 1, "-%s",
			 mci->csrows[csrow].channels[channelb].label);

D
Dave Jiang 已提交
868
	if (edac_mc_get_log_ue())
869
		edac_mc_printk(mci, KERN_EMERG,
870 871 872
			"UE row %d, channel-a= %d channel-b= %d "
			"labels \"%s\": %s\n", csrow, channela, channelb,
			labels, msg);
873

D
Dave Jiang 已提交
874
	if (edac_mc_get_panic_on_ue())
875
		panic("UE row %d, channel-a= %d channel-b= %d "
876 877
			"labels \"%s\": %s\n", csrow, channela,
			channelb, labels, msg);
878 879 880 881 882 883 884 885
}
EXPORT_SYMBOL(edac_mc_handle_fbd_ue);

/*************************************************************
 * On Fully Buffered DIMM modules, this help function is
 * called to process CE events
 */
void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
886
			unsigned int csrow, unsigned int channel, char *msg)
887 888 889 890 891 892
{

	/* Ensure boundary values */
	if (csrow >= mci->nr_csrows) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
893 894
			"INTERNAL ERROR: row out of range (%d >= %d)\n",
			csrow, mci->nr_csrows);
895 896 897 898 899 900
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}
	if (channel >= mci->csrows[csrow].nr_channels) {
		/* something is wrong */
		edac_mc_printk(mci, KERN_ERR,
901 902
			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
			channel, mci->csrows[csrow].nr_channels);
903 904 905 906
		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
		return;
	}

D
Dave Jiang 已提交
907
	if (edac_mc_get_log_ce())
908 909
		/* FIXME - put in DIMM location */
		edac_mc_printk(mci, KERN_WARNING,
910 911 912
			"CE row %d, channel %d, label \"%s\": %s\n",
			csrow, channel,
			mci->csrows[csrow].channels[channel].label, msg);
913 914 915 916 917

	mci->ce_count++;
	mci->csrows[csrow].ce_count++;
	mci->csrows[csrow].channels[channel].ce_count++;
}
918
EXPORT_SYMBOL(edac_mc_handle_fbd_ce);