edac_mc.c 28.4 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
29
#include <linux/edac.h>
30
#include <linux/bitops.h>
31
#include <linux/uaccess.h>
A
Alan Cox 已提交
32
#include <asm/page.h>
33
#include "edac_mc.h"
34
#include "edac_module.h"
35 36
#include <ras/ras_event.h>

37 38 39 40 41 42
#ifdef CONFIG_EDAC_ATOMIC_SCRUB
#include <asm/edac.h>
#else
#define edac_atomic_scrub(va, size) do { } while (0)
#endif

43 44 45
int edac_op_state = EDAC_OPSTATE_INVAL;
EXPORT_SYMBOL_GPL(edac_op_state);

A
Alan Cox 已提交
46
/* lock to memory controller's control array */
47
static DEFINE_MUTEX(mem_ctls_mutex);
48
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
49

50 51 52 53
/*
 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
 *	apei/ghes and i7core_edac to be used at the same time.
 */
54
static const char *edac_mc_owner;
55

56 57 58 59 60
static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
{
	return container_of(e, struct mem_ctl_info, error_desc);
}

61 62
unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
				     unsigned int len)
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
{
	struct mem_ctl_info *mci = dimm->mci;
	int i, n, count = 0;
	char *p = buf;

	for (i = 0; i < mci->n_layers; i++) {
		n = snprintf(p, len, "%s %d ",
			      edac_layer_name[mci->layers[i].type],
			      dimm->location[i]);
		p += n;
		len -= n;
		count += n;
		if (!len)
			break;
	}

	return count;
}

A
Alan Cox 已提交
82 83
#ifdef CONFIG_EDAC_DEBUG

84
static void edac_mc_dump_channel(struct rank_info *chan)
A
Alan Cox 已提交
85
{
86 87 88 89
	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
	edac_dbg(4, "    channel = %p\n", chan);
	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
90 91
}

92
static void edac_mc_dump_dimm(struct dimm_info *dimm)
93
{
94 95
	char location[80];

96 97 98
	if (!dimm->nr_pages)
		return;

99 100 101
	edac_dimm_info_location(dimm, location, sizeof(location));

	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
102
		 dimm->mci->csbased ? "rank" : "dimm",
103
		 dimm->idx, location, dimm->csrow, dimm->cschannel);
104 105 106 107 108
	edac_dbg(4, "  dimm = %p\n", dimm);
	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
A
Alan Cox 已提交
109 110
}

111
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
112
{
113 114 115 116 117 118 119 120
	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
	edac_dbg(4, "  csrow = %p\n", csrow);
	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
A
Alan Cox 已提交
121 122
}

123
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
124
{
125 126 127 128 129 130 131 132 133 134 135 136 137
	edac_dbg(3, "\tmci = %p\n", mci);
	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
		 mci->nr_csrows, mci->csrows);
	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
		 mci->tot_dimms, mci->dimms);
	edac_dbg(3, "\tdev = %p\n", mci->pdev);
	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
		 mci->mod_name, mci->ctl_name);
	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
A
Alan Cox 已提交
138 139
}

140 141
#endif				/* CONFIG_EDAC_DEBUG */

142
const char * const edac_mem_types[] = {
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	[MEM_EMPTY]	= "Empty",
	[MEM_RESERVED]	= "Reserved",
	[MEM_UNKNOWN]	= "Unknown",
	[MEM_FPM]	= "FPM",
	[MEM_EDO]	= "EDO",
	[MEM_BEDO]	= "BEDO",
	[MEM_SDR]	= "Unbuffered-SDR",
	[MEM_RDR]	= "Registered-SDR",
	[MEM_DDR]	= "Unbuffered-DDR",
	[MEM_RDDR]	= "Registered-DDR",
	[MEM_RMBS]	= "RMBS",
	[MEM_DDR2]	= "Unbuffered-DDR2",
	[MEM_FB_DDR2]	= "FullyBuffered-DDR2",
	[MEM_RDDR2]	= "Registered-DDR2",
	[MEM_XDR]	= "XDR",
	[MEM_DDR3]	= "Unbuffered-DDR3",
	[MEM_RDDR3]	= "Registered-DDR3",
	[MEM_LRDDR3]	= "Load-Reduced-DDR3-RAM",
Q
Qiuxu Zhuo 已提交
161
	[MEM_LPDDR3]	= "Low-Power-DDR3-RAM",
162
	[MEM_DDR4]	= "Unbuffered-DDR4",
163
	[MEM_RDDR4]	= "Registered-DDR4",
Q
Qiuxu Zhuo 已提交
164
	[MEM_LPDDR4]	= "Low-Power-DDR4-RAM",
165
	[MEM_LRDDR4]	= "Load-Reduced-DDR4-RAM",
Q
Qiuxu Zhuo 已提交
166
	[MEM_DDR5]	= "Unbuffered-DDR5",
167
	[MEM_NVDIMM]	= "Non-volatile-RAM",
Q
Qiuxu Zhuo 已提交
168
	[MEM_WIO2]	= "Wide-IO-2",
169
	[MEM_HBM2]	= "High-bandwidth-memory-Gen2",
170 171 172
};
EXPORT_SYMBOL_GPL(edac_mem_types);

173 174 175 176 177 178
/**
 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 * @p:		pointer to a pointer with the memory offset to be used. At
 *		return, this will be incremented to point to the next offset
 * @size:	Size of the data structure to be reserved
 * @n_elems:	Number of elements that should be reserved
A
Alan Cox 已提交
179 180
 *
 * If 'size' is a constant, the compiler will optimize this whole function
181 182 183 184 185 186 187 188 189
 * down to either a no-op or the addition of a constant to the value of '*p'.
 *
 * The 'p' pointer is absolutely needed to keep the proper advancing
 * further in memory to the proper offsets when allocating the struct along
 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 * above, for example.
 *
 * At return, the pointer 'p' will be incremented to be used on a next call
 * to this function.
A
Alan Cox 已提交
190
 */
191
void *edac_align_ptr(void **p, unsigned int size, int n_elems)
A
Alan Cox 已提交
192
{
193
	unsigned int align, r;
194
	void *ptr = *p;
A
Alan Cox 已提交
195

196 197 198 199 200 201 202 203
	*p += size * n_elems;

	/*
	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
	 * 'size'.  Adjust 'p' so that its alignment is at least as
	 * stringent as what the compiler would provide for X and return
	 * the aligned result.
	 * Here we assume that the alignment of a "long long" is the most
A
Alan Cox 已提交
204 205 206 207 208 209 210 211 212 213 214 215
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
216
		return (char *)ptr;
A
Alan Cox 已提交
217

218
	r = (unsigned long)p % align;
A
Alan Cox 已提交
219 220

	if (r == 0)
221
		return (char *)ptr;
A
Alan Cox 已提交
222

223 224
	*p += align - r;

225
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
226 227
}

228 229
static void _edac_mc_free(struct mem_ctl_info *mci)
{
230 231 232 233 234 235
	put_device(&mci->dev);
}

static void mci_release(struct device *dev)
{
	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
236
	struct csrow_info *csr;
237
	int i, chn, row;
238 239

	if (mci->dimms) {
240
		for (i = 0; i < mci->tot_dimms; i++)
241 242 243
			kfree(mci->dimms[i]);
		kfree(mci->dimms);
	}
244

245
	if (mci->csrows) {
246
		for (row = 0; row < mci->nr_csrows; row++) {
247
			csr = mci->csrows[row];
248 249 250 251 252 253 254
			if (!csr)
				continue;

			if (csr->channels) {
				for (chn = 0; chn < mci->num_cschannel; chn++)
					kfree(csr->channels[chn]);
				kfree(csr->channels);
255
			}
256
			kfree(csr);
257 258 259 260 261 262
		}
		kfree(mci->csrows);
	}
	kfree(mci);
}

263 264 265 266 267 268
static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
{
	unsigned int tot_channels = mci->num_cschannel;
	unsigned int tot_csrows = mci->nr_csrows;
	unsigned int row, chn;

269
	/*
270
	 * Alocate and fill the csrow/channels structs
271
	 */
J
Joe Perches 已提交
272
	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
273
	if (!mci->csrows)
274 275
		return -ENOMEM;

276
	for (row = 0; row < tot_csrows; row++) {
277 278
		struct csrow_info *csr;

279 280
		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
		if (!csr)
281 282
			return -ENOMEM;

283
		mci->csrows[row] = csr;
284 285 286
		csr->csrow_idx = row;
		csr->mci = mci;
		csr->nr_channels = tot_channels;
J
Joe Perches 已提交
287
		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
288 289
					GFP_KERNEL);
		if (!csr->channels)
290
			return -ENOMEM;
291 292

		for (chn = 0; chn < tot_channels; chn++) {
293 294
			struct rank_info *chan;

295 296
			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
			if (!chan)
297 298
				return -ENOMEM;

299
			csr->channels[chn] = chan;
A
Alan Cox 已提交
300
			chan->chan_idx = chn;
301 302 303 304
			chan->csrow = csr;
		}
	}

305 306 307 308 309 310 311 312 313 314
	return 0;
}

static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
{
	unsigned int pos[EDAC_MAX_LAYERS];
	unsigned int row, chn, idx;
	int layer;
	void *p;

315
	/*
316
	 * Allocate and fill the dimm structs
317
	 */
318
	mci->dimms  = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
319
	if (!mci->dimms)
320
		return -ENOMEM;
321

322 323 324
	memset(&pos, 0, sizeof(pos));
	row = 0;
	chn = 0;
325 326 327 328 329
	for (idx = 0; idx < mci->tot_dimms; idx++) {
		struct dimm_info *dimm;
		struct rank_info *chan;
		int n, len;

330
		chan = mci->csrows[row]->channels[chn];
331

332
		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
333
		if (!dimm)
334
			return -ENOMEM;
335
		mci->dimms[idx] = dimm;
336
		dimm->mci = mci;
337
		dimm->idx = idx;
338

339 340 341 342 343
		/*
		 * Copy DIMM location and initialize it.
		 */
		len = sizeof(dimm->label);
		p = dimm->label;
344
		n = snprintf(p, len, "mc#%u", mci->mc_idx);
345 346
		p += n;
		len -= n;
347
		for (layer = 0; layer < mci->n_layers; layer++) {
348
			n = snprintf(p, len, "%s#%u",
349 350
				     edac_layer_name[mci->layers[layer].type],
				     pos[layer]);
351 352
			p += n;
			len -= n;
353
			dimm->location[layer] = pos[layer];
354

355 356 357 358
			if (len <= 0)
				break;
		}

359 360 361 362 363 364
		/* Link it to the csrows old API data */
		chan->dimm = dimm;
		dimm->csrow = row;
		dimm->cschannel = chn;

		/* Increment csrow location */
365
		if (mci->layers[0].is_virt_csrow) {
366
			chn++;
367
			if (chn == mci->num_cschannel) {
368 369 370 371 372
				chn = 0;
				row++;
			}
		} else {
			row++;
373
			if (row == mci->nr_csrows) {
374 375 376
				row = 0;
				chn++;
			}
377
		}
378

379
		/* Increment dimm location */
380 381 382
		for (layer = mci->n_layers - 1; layer >= 0; layer--) {
			pos[layer]++;
			if (pos[layer] < mci->layers[layer].size)
383
				break;
384
			pos[layer] = 0;
A
Alan Cox 已提交
385 386 387
		}
	}

388
	return 0;
389
}
A
Alan Cox 已提交
390

391 392 393 394 395 396 397
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
				   unsigned int n_layers,
				   struct edac_mc_layer *layers,
				   unsigned int sz_pvt)
{
	struct mem_ctl_info *mci;
	struct edac_mc_layer *layer;
398 399
	unsigned int idx, size, tot_dimms = 1;
	unsigned int tot_csrows = 1, tot_channels = 1;
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
	void *pvt, *ptr = NULL;
	bool per_rank = false;

	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
		return NULL;

	/*
	 * Calculate the total amount of dimms and csrows/cschannels while
	 * in the old API emulation mode
	 */
	for (idx = 0; idx < n_layers; idx++) {
		tot_dimms *= layers[idx].size;

		if (layers[idx].is_virt_csrow)
			tot_csrows *= layers[idx].size;
		else
			tot_channels *= layers[idx].size;

		if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
			per_rank = true;
	}

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
427 428 429 430
	mci	= edac_align_ptr(&ptr, sizeof(*mci), 1);
	layer	= edac_align_ptr(&ptr, sizeof(*layer), n_layers);
	pvt	= edac_align_ptr(&ptr, sz_pvt, 1);
	size	= ((unsigned long)pvt) + sz_pvt;
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478

	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
		 size,
		 tot_dimms,
		 per_rank ? "ranks" : "dimms",
		 tot_csrows * tot_channels);

	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
		return NULL;

	mci->dev.release = mci_release;
	device_initialize(&mci->dev);

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;

	/* setup index and various internal pointers */
	mci->mc_idx = mc_num;
	mci->tot_dimms = tot_dimms;
	mci->pvt_info = pvt;
	mci->n_layers = n_layers;
	mci->layers = layer;
	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
	mci->nr_csrows = tot_csrows;
	mci->num_cschannel = tot_channels;
	mci->csbased = per_rank;

	if (edac_mc_alloc_csrows(mci))
		goto error;

	if (edac_mc_alloc_dimms(mci))
		goto error;

	mci->op_state = OP_ALLOC;

	return mci;

error:
	_edac_mc_free(mci);

	return NULL;
}
EXPORT_SYMBOL_GPL(edac_mc_alloc);

A
Alan Cox 已提交
479 480
void edac_mc_free(struct mem_ctl_info *mci)
{
481
	edac_dbg(1, "\n");
482

483
	_edac_mc_free(mci);
A
Alan Cox 已提交
484
}
485
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
486

487 488 489 490 491 492 493 494 495 496 497 498 499 500
bool edac_has_mcs(void)
{
	bool ret;

	mutex_lock(&mem_ctls_mutex);

	ret = list_empty(&mc_devices);

	mutex_unlock(&mem_ctls_mutex);

	return !ret;
}
EXPORT_SYMBOL_GPL(edac_has_mcs);

501 502
/* Caller must hold mem_ctls_mutex */
static struct mem_ctl_info *__find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
503 504 505 506
{
	struct mem_ctl_info *mci;
	struct list_head *item;

507
	edac_dbg(3, "\n");
A
Alan Cox 已提交
508 509 510 511

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

512
		if (mci->pdev == dev)
A
Alan Cox 已提交
513 514 515 516 517
			return mci;
	}

	return NULL;
}
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535

/**
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
 * @dev: pointer to a struct device related with the MCI
 */
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
{
	struct mem_ctl_info *ret;

	mutex_lock(&mem_ctls_mutex);
	ret = __find_mci_by_dev(dev);
	mutex_unlock(&mem_ctls_mutex);

	return ret;
}
536
EXPORT_SYMBOL_GPL(find_mci_by_dev);
A
Alan Cox 已提交
537

538 539 540 541 542 543
/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
J
Jean Delvare 已提交
544
	struct delayed_work *d_work = to_delayed_work(work_req);
545 546 547 548
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

549
	if (mci->op_state != OP_RUNNING_POLL) {
550 551 552 553
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

B
Borislav Petkov 已提交
554
	if (edac_op_state == EDAC_OPSTATE_POLL)
555 556 557 558
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

559
	/* Queue ourselves again. */
B
Borislav Petkov 已提交
560
	edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
561 562 563
}

/*
564 565 566 567
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
568
 */
569
void edac_mc_reset_delay_period(unsigned long value)
570
{
571 572 573 574 575 576 577 578
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

579 580
		if (mci->op_state == OP_RUNNING_POLL)
			edac_mod_work(&mci->work, value);
581
	}
582 583 584
	mutex_unlock(&mem_ctls_mutex);
}

585 586


587 588 589
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
590 591 592 593
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
594
 */
595
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
596 597 598 599
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

600
	insert_before = &mc_devices;
A
Alan Cox 已提交
601

602
	p = __find_mci_by_dev(mci->pdev);
603
	if (unlikely(p != NULL))
604
		goto fail0;
A
Alan Cox 已提交
605

606 607
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
608

609 610 611
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
612

613 614
			insert_before = item;
			break;
A
Alan Cox 已提交
615 616 617 618 619
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
	return 0;
620

621
fail0:
622
	edac_printk(KERN_WARNING, EDAC_MC,
623
		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
624
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
625 626
	return 1;

627
fail1:
628
	edac_printk(KERN_WARNING, EDAC_MC,
629 630
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
631
	return 1;
A
Alan Cox 已提交
632 633
}

634
static int del_mc_from_global_list(struct mem_ctl_info *mci)
635 636
{
	list_del_rcu(&mci->link);
637 638 639 640 641 642

	/* these are for safe removal of devices from global list while
	 * NMI handlers may be traversing list
	 */
	synchronize_rcu();
	INIT_LIST_HEAD(&mci->link);
643

B
Borislav Petkov 已提交
644
	return list_empty(&mc_devices);
645 646
}

647
struct mem_ctl_info *edac_mc_find(int idx)
648
{
649
	struct mem_ctl_info *mci;
650
	struct list_head *item;
651 652

	mutex_lock(&mem_ctls_mutex);
653 654 655

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);
656 657
		if (mci->mc_idx == idx)
			goto unlock;
658 659
	}

660
	mci = NULL;
661 662 663
unlock:
	mutex_unlock(&mem_ctls_mutex);
	return mci;
664 665 666
}
EXPORT_SYMBOL(edac_mc_find);

667 668 669 670 671
const char *edac_get_owner(void)
{
	return edac_mc_owner;
}
EXPORT_SYMBOL_GPL(edac_get_owner);
A
Alan Cox 已提交
672 673

/* FIXME - should a warning be printed if no error detection? correction? */
674 675
int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
			       const struct attribute_group **groups)
A
Alan Cox 已提交
676
{
677
	int ret = -EINVAL;
678
	edac_dbg(0, "\n");
679

A
Alan Cox 已提交
680 681 682
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
683

A
Alan Cox 已提交
684
	if (edac_debug_level >= 4) {
685
		struct dimm_info *dimm;
A
Alan Cox 已提交
686 687 688
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
689 690
			struct csrow_info *csrow = mci->csrows[i];
			u32 nr_pages = 0;
A
Alan Cox 已提交
691
			int j;
D
Dave Peterson 已提交
692

693 694 695 696 697 698 699 700
			for (j = 0; j < csrow->nr_channels; j++)
				nr_pages += csrow->channels[j]->dimm->nr_pages;
			if (!nr_pages)
				continue;
			edac_mc_dump_csrow(csrow);
			for (j = 0; j < csrow->nr_channels; j++)
				if (csrow->channels[j]->dimm->nr_pages)
					edac_mc_dump_channel(csrow->channels[j]);
A
Alan Cox 已提交
701
		}
702 703 704

		mci_for_each_dimm(mci, dimm)
			edac_mc_dump_dimm(dimm);
A
Alan Cox 已提交
705 706
	}
#endif
707
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
708

709 710 711 712 713
	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
		ret = -EPERM;
		goto fail0;
	}

A
Alan Cox 已提交
714
	if (add_mc_to_global_list(mci))
715
		goto fail0;
A
Alan Cox 已提交
716 717 718 719

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

720
	mci->bus = edac_get_sysfs_subsys();
B
Borislav Petkov 已提交
721

722
	if (edac_create_sysfs_mci_device(mci, groups)) {
723
		edac_mc_printk(mci, KERN_WARNING,
724
			"failed to create sysfs device\n");
725 726
		goto fail1;
	}
A
Alan Cox 已提交
727

728
	if (mci->edac_check) {
729 730
		mci->op_state = OP_RUNNING_POLL;

731 732 733
		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
		edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));

734 735 736 737
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
738
	/* Report action taken */
739 740 741 742
	edac_mc_printk(mci, KERN_INFO,
		"Giving out device to module %s controller %s: DEV %s (%s)\n",
		mci->mod_name, mci->ctl_name, mci->dev_name,
		edac_op_state_to_string(mci->op_state));
A
Alan Cox 已提交
743

744 745
	edac_mc_owner = mci->mod_name;

746
	mutex_unlock(&mem_ctls_mutex);
747
	return 0;
A
Alan Cox 已提交
748

749
fail1:
750 751
	del_mc_from_global_list(mci);

752
fail0:
753
	mutex_unlock(&mem_ctls_mutex);
754
	return ret;
A
Alan Cox 已提交
755
}
756
EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
A
Alan Cox 已提交
757

758
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
759
{
760
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
761

762
	edac_dbg(0, "\n");
763

764
	mutex_lock(&mem_ctls_mutex);
765

766
	/* find the requested mci struct in the global list */
767
	mci = __find_mci_by_dev(dev);
768
	if (mci == NULL) {
769
		mutex_unlock(&mem_ctls_mutex);
770 771 772
		return NULL;
	}

773 774 775
	/* mark MCI offline: */
	mci->op_state = OP_OFFLINE;

B
Borislav Petkov 已提交
776
	if (del_mc_from_global_list(mci))
777
		edac_mc_owner = NULL;
778

779
	mutex_unlock(&mem_ctls_mutex);
780

781
	if (mci->edac_check)
782
		edac_stop_work(&mci->work);
783 784

	/* remove from sysfs */
785 786
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
787
	edac_printk(KERN_INFO, EDAC_MC,
788
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
789
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
790

791
	return mci;
A
Alan Cox 已提交
792
}
793
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
794

795 796
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
797 798 799 800 801
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

802
	edac_dbg(3, "\n");
A
Alan Cox 已提交
803 804

	/* ECC error page was not in our memory. Ignore it. */
805
	if (!pfn_valid(page))
A
Alan Cox 已提交
806 807 808 809 810 811 812 813
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

814
	virt_addr = kmap_atomic(pg);
A
Alan Cox 已提交
815 816

	/* Perform architecture specific atomic scrub operation */
817
	edac_atomic_scrub(virt_addr + offset, size);
A
Alan Cox 已提交
818 819

	/* Unmap and complete */
820
	kunmap_atomic(virt_addr);
A
Alan Cox 已提交
821 822 823 824 825 826

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
827
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
828
{
829
	struct csrow_info **csrows = mci->csrows;
830
	int row, i, j, n;
A
Alan Cox 已提交
831

832
	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
A
Alan Cox 已提交
833 834 835
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
836
		struct csrow_info *csrow = csrows[i];
837 838
		n = 0;
		for (j = 0; j < csrow->nr_channels; j++) {
839
			struct dimm_info *dimm = csrow->channels[j]->dimm;
840 841 842
			n += dimm->nr_pages;
		}
		if (n == 0)
A
Alan Cox 已提交
843 844
			continue;

845 846 847 848
		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
			 mci->mc_idx,
			 csrow->first_page, page, csrow->last_page,
			 csrow->page_mask);
A
Alan Cox 已提交
849 850 851 852 853 854 855 856 857 858 859

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
860
		edac_mc_printk(mci, KERN_ERR,
861 862
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
863 864 865

	return row;
}
866
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
867

868 869 870 871 872
const char *edac_layer_name[] = {
	[EDAC_MC_LAYER_BRANCH] = "branch",
	[EDAC_MC_LAYER_CHANNEL] = "channel",
	[EDAC_MC_LAYER_SLOT] = "slot",
	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
873
	[EDAC_MC_LAYER_ALL_MEM] = "memory",
874 875 876
};
EXPORT_SYMBOL_GPL(edac_layer_name);

877
static void edac_inc_ce_error(struct edac_raw_error_desc *e)
A
Alan Cox 已提交
878
{
879 880
	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
	struct mem_ctl_info *mci = error_desc_to_mci(e);
881
	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
A
Alan Cox 已提交
882

883
	mci->ce_mc += e->error_count;
A
Alan Cox 已提交
884

885 886 887
	if (dimm)
		dimm->ce_count += e->error_count;
	else
888
		mci->ce_noinfo_count += e->error_count;
889 890
}

891
static void edac_inc_ue_error(struct edac_raw_error_desc *e)
892
{
893 894
	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
	struct mem_ctl_info *mci = error_desc_to_mci(e);
895
	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
896

897
	mci->ue_mc += e->error_count;
898

899 900 901
	if (dimm)
		dimm->ue_count += e->error_count;
	else
902
		mci->ue_noinfo_count += e->error_count;
903
}
A
Alan Cox 已提交
904

905
static void edac_ce_error(struct edac_raw_error_desc *e)
906
{
907
	struct mem_ctl_info *mci = error_desc_to_mci(e);
908 909 910
	unsigned long remapped_page;

	if (edac_mc_get_log_ce()) {
911 912 913 914 915 916 917 918
		edac_mc_printk(mci, KERN_WARNING,
			"%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
			e->error_count, e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain, e->syndrome,
			*e->other_detail ? " - " : "",
			e->other_detail);
919
	}
920 921

	edac_inc_ce_error(e);
A
Alan Cox 已提交
922

923
	if (mci->scrub_mode == SCRUB_SW_SRC) {
A
Alan Cox 已提交
924
		/*
925 926 927 928 929 930 931 932 933 934
			* Some memory controllers (called MCs below) can remap
			* memory so that it is still available at a different
			* address when PCI devices map into memory.
			* MC's that can't do this, lose the memory where PCI
			* devices are mapped. This mapping is MC-dependent
			* and so we call back into the MC driver for it to
			* map the MC page to a physical (CPU) page which can
			* then be mapped to a virtual page - which can then
			* be scrubbed.
			*/
A
Alan Cox 已提交
935
		remapped_page = mci->ctl_page_to_phys ?
936 937
			mci->ctl_page_to_phys(mci, e->page_frame_number) :
			e->page_frame_number;
A
Alan Cox 已提交
938

939
		edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
A
Alan Cox 已提交
940 941 942
	}
}

943
static void edac_ue_error(struct edac_raw_error_desc *e)
A
Alan Cox 已提交
944
{
945
	struct mem_ctl_info *mci = error_desc_to_mci(e);
946

947
	if (edac_mc_get_log_ue()) {
948 949 950 951 952 953 954 955
		edac_mc_printk(mci, KERN_WARNING,
			"%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
			e->error_count, e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain,
			*e->other_detail ? " - " : "",
			e->other_detail);
956
	}
D
Dave Peterson 已提交
957

958 959
	edac_inc_ue_error(e);

960
	if (edac_mc_get_panic_on_ue()) {
961 962 963 964 965 966 967
		panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
			e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain,
			*e->other_detail ? " - " : "",
			e->other_detail);
968
	}
A
Alan Cox 已提交
969 970
}

971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
{
	struct mem_ctl_info *mci = error_desc_to_mci(e);
	enum hw_event_mc_err_type type = e->type;
	u16 count = e->error_count;

	if (row < 0)
		return;

	edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);

	if (type == HW_EVENT_ERR_CORRECTED) {
		mci->csrows[row]->ce_count += count;
		if (chan >= 0)
			mci->csrows[row]->channels[chan]->ce_count += count;
	} else {
		mci->csrows[row]->ue_count += count;
	}
}

991
void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
992
{
993
	struct mem_ctl_info *mci = error_desc_to_mci(e);
994 995 996 997 998 999 1000 1001 1002 1003
	u8 grain_bits;

	/* Sanity-check driver-supplied grain value. */
	if (WARN_ON_ONCE(!e->grain))
		e->grain = 1;

	grain_bits = fls_long(e->grain - 1);

	/* Report the error via the trace interface */
	if (IS_ENABLED(CONFIG_RAS))
1004
		trace_mc_event(e->type, e->msg, e->label, e->error_count,
1005 1006 1007 1008
			       mci->mc_idx, e->top_layer, e->mid_layer,
			       e->low_layer,
			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
			       grain_bits, e->syndrome, e->other_detail);
1009

1010 1011 1012 1013
	if (e->type == HW_EVENT_ERR_CORRECTED)
		edac_ce_error(e);
	else
		edac_ue_error(e);
1014 1015
}
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1016

1017 1018
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
			  struct mem_ctl_info *mci,
1019
			  const u16 error_count,
1020 1021 1022
			  const unsigned long page_frame_number,
			  const unsigned long offset_in_page,
			  const unsigned long syndrome,
1023 1024 1025
			  const int top_layer,
			  const int mid_layer,
			  const int low_layer,
1026
			  const char *msg,
1027
			  const char *other_detail)
A
Alan Cox 已提交
1028
{
1029
	struct dimm_info *dimm;
1030 1031
	char *p;
	int row = -1, chan = -1;
1032
	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1033 1034
	int i, n_labels = 0;
	struct edac_raw_error_desc *e = &mci->error_desc;
1035
	bool any_memory = true;
A
Alan Cox 已提交
1036

1037
	edac_dbg(3, "MC%d\n", mci->mc_idx);
A
Alan Cox 已提交
1038

1039 1040 1041
	/* Fills the error report buffer */
	memset(e, 0, sizeof (*e));
	e->error_count = error_count;
1042
	e->type = type;
1043 1044 1045 1046 1047 1048
	e->top_layer = top_layer;
	e->mid_layer = mid_layer;
	e->low_layer = low_layer;
	e->page_frame_number = page_frame_number;
	e->offset_in_page = offset_in_page;
	e->syndrome = syndrome;
1049 1050 1051
	/* need valid strings here for both: */
	e->msg = msg ?: "";
	e->other_detail = other_detail ?: "";
1052

1053
	/*
1054
	 * Check if the event report is consistent and if the memory location is
1055 1056
	 * known. If it is, the DIMM(s) label info will be filled and the DIMM's
	 * error counters will be incremented.
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
	 */
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] >= (int)mci->layers[i].size) {

			edac_mc_printk(mci, KERN_ERR,
				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
				       edac_layer_name[mci->layers[i].type],
				       pos[i], mci->layers[i].size);
			/*
			 * Instead of just returning it, let's use what's
			 * known about the error. The increment routines and
			 * the DIMM filter logic will do the right thing by
			 * pointing the likely damaged DIMMs.
			 */
			pos[i] = -1;
		}
		if (pos[i] >= 0)
1074
			any_memory = false;
A
Alan Cox 已提交
1075 1076
	}

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
	/*
	 * Get the dimm label/grain that applies to the match criteria.
	 * As the error algorithm may not be able to point to just one memory
	 * stick, the logic here will get all possible labels that could
	 * pottentially be affected by the error.
	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
	 * to have only the MC channel and the MC dimm (also called "branch")
	 * but the channel is not known, as the memory is arranged in pairs,
	 * where each memory belongs to a separate channel within the same
	 * branch.
	 */
1088
	p = e->label;
1089
	*p = '\0';
1090

1091
	mci_for_each_dimm(mci, dimm) {
1092
		if (top_layer >= 0 && top_layer != dimm->location[0])
1093
			continue;
1094
		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1095
			continue;
1096
		if (low_layer >= 0 && low_layer != dimm->location[2])
1097
			continue;
A
Alan Cox 已提交
1098

1099
		/* get the max grain, over the error match range */
1100 1101
		if (dimm->grain > e->grain)
			e->grain = dimm->grain;
1102

1103 1104
		/*
		 * If the error is memory-controller wide, there's no need to
1105 1106 1107
		 * seek for the affected DIMMs because the whole channel/memory
		 * controller/... may be affected. Also, don't show errors for
		 * empty DIMM slots.
1108
		 */
1109
		if (!dimm->nr_pages)
1110
			continue;
1111

1112
		n_labels++;
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
		if (n_labels > EDAC_MAX_LABELS) {
			p = e->label;
			*p = '\0';
		} else {
			if (p != e->label) {
				strcpy(p, OTHER_LABEL);
				p += strlen(OTHER_LABEL);
			}
			strcpy(p, dimm->label);
			p += strlen(p);
1123
		}
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140

		/*
		 * get csrow/channel of the DIMM, in order to allow
		 * incrementing the compat API counters
		 */
		edac_dbg(4, "%s csrows map: (%d,%d)\n",
			mci->csbased ? "rank" : "dimm",
			dimm->csrow, dimm->cschannel);
		if (row == -1)
			row = dimm->csrow;
		else if (row >= 0 && row != dimm->csrow)
			row = -2;

		if (chan == -1)
			chan = dimm->cschannel;
		else if (chan >= 0 && chan != dimm->cschannel)
			chan = -2;
1141 1142
	}

1143
	if (any_memory)
1144
		strcpy(e->label, "any memory");
1145 1146 1147 1148
	else if (!*e->label)
		strcpy(e->label, "unknown memory");

	edac_inc_csrow(e, row, chan);
1149

1150
	/* Fill the RAM location data */
1151
	p = e->location;
1152

1153 1154 1155
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] < 0)
			continue;
1156

1157 1158 1159
		p += sprintf(p, "%s:%d ",
			     edac_layer_name[mci->layers[i].type],
			     pos[i]);
1160
	}
1161
	if (p > e->location)
1162 1163
		*(p - 1) = '\0';

1164
	edac_raw_mc_handle_error(e);
1165
}
1166
EXPORT_SYMBOL_GPL(edac_mc_handle_error);