edac_mc.c 28.5 KB
Newer Older
A
Alan Cox 已提交
1 2
/*
 * edac_mc kernel module
3
 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
A
Alan Cox 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Thayne Harbaugh
 * Based on work by Dan Hollis <goemon at anime dot net> and others.
 *	http://www.anime.net/~goemon/linux-ecc/
 *
 * Modified by Dave Peterson and Doug Thompson
 *
 */

#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/ctype.h>
D
Dave Jiang 已提交
29
#include <linux/edac.h>
30
#include <linux/bitops.h>
31
#include <linux/uaccess.h>
A
Alan Cox 已提交
32
#include <asm/page.h>
33
#include "edac_mc.h"
34
#include "edac_module.h"
35 36
#include <ras/ras_event.h>

37 38 39 40 41 42
#ifdef CONFIG_EDAC_ATOMIC_SCRUB
#include <asm/edac.h>
#else
#define edac_atomic_scrub(va, size) do { } while (0)
#endif

43 44 45
int edac_op_state = EDAC_OPSTATE_INVAL;
EXPORT_SYMBOL_GPL(edac_op_state);

A
Alan Cox 已提交
46
/* lock to memory controller's control array */
47
static DEFINE_MUTEX(mem_ctls_mutex);
48
static LIST_HEAD(mc_devices);
A
Alan Cox 已提交
49

50 51 52 53
/*
 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
 *	apei/ghes and i7core_edac to be used at the same time.
 */
54
static const char *edac_mc_owner;
55

56 57 58 59 60
static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
{
	return container_of(e, struct mem_ctl_info, error_desc);
}

61 62
unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
				     unsigned int len)
63 64 65 66 67 68
{
	struct mem_ctl_info *mci = dimm->mci;
	int i, n, count = 0;
	char *p = buf;

	for (i = 0; i < mci->n_layers; i++) {
69
		n = scnprintf(p, len, "%s %d ",
70 71 72 73 74 75 76 77 78 79
			      edac_layer_name[mci->layers[i].type],
			      dimm->location[i]);
		p += n;
		len -= n;
		count += n;
	}

	return count;
}

A
Alan Cox 已提交
80 81
#ifdef CONFIG_EDAC_DEBUG

82
static void edac_mc_dump_channel(struct rank_info *chan)
A
Alan Cox 已提交
83
{
84 85 86 87
	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
	edac_dbg(4, "    channel = %p\n", chan);
	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
88 89
}

90
static void edac_mc_dump_dimm(struct dimm_info *dimm)
91
{
92 93
	char location[80];

94 95 96
	if (!dimm->nr_pages)
		return;

97 98 99
	edac_dimm_info_location(dimm, location, sizeof(location));

	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
100
		 dimm->mci->csbased ? "rank" : "dimm",
101
		 dimm->idx, location, dimm->csrow, dimm->cschannel);
102 103 104 105 106
	edac_dbg(4, "  dimm = %p\n", dimm);
	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
A
Alan Cox 已提交
107 108
}

109
static void edac_mc_dump_csrow(struct csrow_info *csrow)
A
Alan Cox 已提交
110
{
111 112 113 114 115 116 117 118
	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
	edac_dbg(4, "  csrow = %p\n", csrow);
	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
A
Alan Cox 已提交
119 120
}

121
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
A
Alan Cox 已提交
122
{
123 124 125 126 127 128 129 130 131 132 133 134 135
	edac_dbg(3, "\tmci = %p\n", mci);
	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
		 mci->nr_csrows, mci->csrows);
	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
		 mci->tot_dimms, mci->dimms);
	edac_dbg(3, "\tdev = %p\n", mci->pdev);
	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
		 mci->mod_name, mci->ctl_name);
	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
A
Alan Cox 已提交
136 137
}

138 139
#endif				/* CONFIG_EDAC_DEBUG */

140
const char * const edac_mem_types[] = {
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
	[MEM_EMPTY]	= "Empty",
	[MEM_RESERVED]	= "Reserved",
	[MEM_UNKNOWN]	= "Unknown",
	[MEM_FPM]	= "FPM",
	[MEM_EDO]	= "EDO",
	[MEM_BEDO]	= "BEDO",
	[MEM_SDR]	= "Unbuffered-SDR",
	[MEM_RDR]	= "Registered-SDR",
	[MEM_DDR]	= "Unbuffered-DDR",
	[MEM_RDDR]	= "Registered-DDR",
	[MEM_RMBS]	= "RMBS",
	[MEM_DDR2]	= "Unbuffered-DDR2",
	[MEM_FB_DDR2]	= "FullyBuffered-DDR2",
	[MEM_RDDR2]	= "Registered-DDR2",
	[MEM_XDR]	= "XDR",
	[MEM_DDR3]	= "Unbuffered-DDR3",
	[MEM_RDDR3]	= "Registered-DDR3",
	[MEM_LRDDR3]	= "Load-Reduced-DDR3-RAM",
Q
Qiuxu Zhuo 已提交
159
	[MEM_LPDDR3]	= "Low-Power-DDR3-RAM",
160
	[MEM_DDR4]	= "Unbuffered-DDR4",
161
	[MEM_RDDR4]	= "Registered-DDR4",
Q
Qiuxu Zhuo 已提交
162
	[MEM_LPDDR4]	= "Low-Power-DDR4-RAM",
163
	[MEM_LRDDR4]	= "Load-Reduced-DDR4-RAM",
Q
Qiuxu Zhuo 已提交
164
	[MEM_DDR5]	= "Unbuffered-DDR5",
165 166
	[MEM_RDDR5]	= "Registered-DDR5",
	[MEM_LRDDR5]	= "Load-Reduced-DDR5-RAM",
167
	[MEM_NVDIMM]	= "Non-volatile-RAM",
Q
Qiuxu Zhuo 已提交
168
	[MEM_WIO2]	= "Wide-IO-2",
169
	[MEM_HBM2]	= "High-bandwidth-memory-Gen2",
170 171 172
};
EXPORT_SYMBOL_GPL(edac_mem_types);

173 174 175 176 177 178
/**
 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 * @p:		pointer to a pointer with the memory offset to be used. At
 *		return, this will be incremented to point to the next offset
 * @size:	Size of the data structure to be reserved
 * @n_elems:	Number of elements that should be reserved
A
Alan Cox 已提交
179 180
 *
 * If 'size' is a constant, the compiler will optimize this whole function
181 182 183 184 185 186 187 188 189
 * down to either a no-op or the addition of a constant to the value of '*p'.
 *
 * The 'p' pointer is absolutely needed to keep the proper advancing
 * further in memory to the proper offsets when allocating the struct along
 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 * above, for example.
 *
 * At return, the pointer 'p' will be incremented to be used on a next call
 * to this function.
A
Alan Cox 已提交
190
 */
191
void *edac_align_ptr(void **p, unsigned int size, int n_elems)
A
Alan Cox 已提交
192
{
193
	unsigned int align, r;
194
	void *ptr = *p;
A
Alan Cox 已提交
195

196 197 198 199 200 201 202 203
	*p += size * n_elems;

	/*
	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
	 * 'size'.  Adjust 'p' so that its alignment is at least as
	 * stringent as what the compiler would provide for X and return
	 * the aligned result.
	 * Here we assume that the alignment of a "long long" is the most
A
Alan Cox 已提交
204 205 206 207 208 209 210 211 212 213 214 215
	 * stringent alignment that the compiler will ever provide by default.
	 * As far as I know, this is a reasonable assumption.
	 */
	if (size > sizeof(long))
		align = sizeof(long long);
	else if (size > sizeof(int))
		align = sizeof(long);
	else if (size > sizeof(short))
		align = sizeof(int);
	else if (size > sizeof(char))
		align = sizeof(short);
	else
216
		return (char *)ptr;
A
Alan Cox 已提交
217

218
	r = (unsigned long)p % align;
A
Alan Cox 已提交
219 220

	if (r == 0)
221
		return (char *)ptr;
A
Alan Cox 已提交
222

223 224
	*p += align - r;

225
	return (void *)(((unsigned long)ptr) + align - r);
A
Alan Cox 已提交
226 227
}

228 229
static void _edac_mc_free(struct mem_ctl_info *mci)
{
230 231 232 233 234 235
	put_device(&mci->dev);
}

static void mci_release(struct device *dev)
{
	struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
236
	struct csrow_info *csr;
237
	int i, chn, row;
238 239

	if (mci->dimms) {
240
		for (i = 0; i < mci->tot_dimms; i++)
241 242 243
			kfree(mci->dimms[i]);
		kfree(mci->dimms);
	}
244

245
	if (mci->csrows) {
246
		for (row = 0; row < mci->nr_csrows; row++) {
247
			csr = mci->csrows[row];
248 249 250 251 252 253 254
			if (!csr)
				continue;

			if (csr->channels) {
				for (chn = 0; chn < mci->num_cschannel; chn++)
					kfree(csr->channels[chn]);
				kfree(csr->channels);
255
			}
256
			kfree(csr);
257 258 259 260 261 262
		}
		kfree(mci->csrows);
	}
	kfree(mci);
}

263 264 265 266 267 268
static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
{
	unsigned int tot_channels = mci->num_cschannel;
	unsigned int tot_csrows = mci->nr_csrows;
	unsigned int row, chn;

269
	/*
270
	 * Alocate and fill the csrow/channels structs
271
	 */
J
Joe Perches 已提交
272
	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
273
	if (!mci->csrows)
274 275
		return -ENOMEM;

276
	for (row = 0; row < tot_csrows; row++) {
277 278
		struct csrow_info *csr;

279 280
		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
		if (!csr)
281 282
			return -ENOMEM;

283
		mci->csrows[row] = csr;
284 285 286
		csr->csrow_idx = row;
		csr->mci = mci;
		csr->nr_channels = tot_channels;
J
Joe Perches 已提交
287
		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
288 289
					GFP_KERNEL);
		if (!csr->channels)
290
			return -ENOMEM;
291 292

		for (chn = 0; chn < tot_channels; chn++) {
293 294
			struct rank_info *chan;

295 296
			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
			if (!chan)
297 298
				return -ENOMEM;

299
			csr->channels[chn] = chan;
A
Alan Cox 已提交
300
			chan->chan_idx = chn;
301 302 303 304
			chan->csrow = csr;
		}
	}

305 306 307 308 309 310 311 312 313 314
	return 0;
}

static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
{
	unsigned int pos[EDAC_MAX_LAYERS];
	unsigned int row, chn, idx;
	int layer;
	void *p;

315
	/*
316
	 * Allocate and fill the dimm structs
317
	 */
318
	mci->dimms  = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
319
	if (!mci->dimms)
320
		return -ENOMEM;
321

322 323 324
	memset(&pos, 0, sizeof(pos));
	row = 0;
	chn = 0;
325 326 327 328 329
	for (idx = 0; idx < mci->tot_dimms; idx++) {
		struct dimm_info *dimm;
		struct rank_info *chan;
		int n, len;

330
		chan = mci->csrows[row]->channels[chn];
331

332
		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
333
		if (!dimm)
334
			return -ENOMEM;
335
		mci->dimms[idx] = dimm;
336
		dimm->mci = mci;
337
		dimm->idx = idx;
338

339 340 341 342 343
		/*
		 * Copy DIMM location and initialize it.
		 */
		len = sizeof(dimm->label);
		p = dimm->label;
344
		n = scnprintf(p, len, "mc#%u", mci->mc_idx);
345 346
		p += n;
		len -= n;
347
		for (layer = 0; layer < mci->n_layers; layer++) {
348 349 350
			n = scnprintf(p, len, "%s#%u",
				      edac_layer_name[mci->layers[layer].type],
				      pos[layer]);
351 352
			p += n;
			len -= n;
353
			dimm->location[layer] = pos[layer];
354 355
		}

356 357 358 359 360 361
		/* Link it to the csrows old API data */
		chan->dimm = dimm;
		dimm->csrow = row;
		dimm->cschannel = chn;

		/* Increment csrow location */
362
		if (mci->layers[0].is_virt_csrow) {
363
			chn++;
364
			if (chn == mci->num_cschannel) {
365 366 367 368 369
				chn = 0;
				row++;
			}
		} else {
			row++;
370
			if (row == mci->nr_csrows) {
371 372 373
				row = 0;
				chn++;
			}
374
		}
375

376
		/* Increment dimm location */
377 378 379
		for (layer = mci->n_layers - 1; layer >= 0; layer--) {
			pos[layer]++;
			if (pos[layer] < mci->layers[layer].size)
380
				break;
381
			pos[layer] = 0;
A
Alan Cox 已提交
382 383 384
		}
	}

385
	return 0;
386
}
A
Alan Cox 已提交
387

388 389 390 391 392 393 394
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
				   unsigned int n_layers,
				   struct edac_mc_layer *layers,
				   unsigned int sz_pvt)
{
	struct mem_ctl_info *mci;
	struct edac_mc_layer *layer;
395 396
	unsigned int idx, size, tot_dimms = 1;
	unsigned int tot_csrows = 1, tot_channels = 1;
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
	void *pvt, *ptr = NULL;
	bool per_rank = false;

	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
		return NULL;

	/*
	 * Calculate the total amount of dimms and csrows/cschannels while
	 * in the old API emulation mode
	 */
	for (idx = 0; idx < n_layers; idx++) {
		tot_dimms *= layers[idx].size;

		if (layers[idx].is_virt_csrow)
			tot_csrows *= layers[idx].size;
		else
			tot_channels *= layers[idx].size;

		if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
			per_rank = true;
	}

	/* Figure out the offsets of the various items from the start of an mc
	 * structure.  We want the alignment of each item to be at least as
	 * stringent as what the compiler would provide if we could simply
	 * hardcode everything into a single struct.
	 */
424 425 426 427
	mci	= edac_align_ptr(&ptr, sizeof(*mci), 1);
	layer	= edac_align_ptr(&ptr, sizeof(*layer), n_layers);
	pvt	= edac_align_ptr(&ptr, sz_pvt, 1);
	size	= ((unsigned long)pvt) + sz_pvt;
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475

	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
		 size,
		 tot_dimms,
		 per_rank ? "ranks" : "dimms",
		 tot_csrows * tot_channels);

	mci = kzalloc(size, GFP_KERNEL);
	if (mci == NULL)
		return NULL;

	mci->dev.release = mci_release;
	device_initialize(&mci->dev);

	/* Adjust pointers so they point within the memory we just allocated
	 * rather than an imaginary chunk of memory located at address 0.
	 */
	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;

	/* setup index and various internal pointers */
	mci->mc_idx = mc_num;
	mci->tot_dimms = tot_dimms;
	mci->pvt_info = pvt;
	mci->n_layers = n_layers;
	mci->layers = layer;
	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
	mci->nr_csrows = tot_csrows;
	mci->num_cschannel = tot_channels;
	mci->csbased = per_rank;

	if (edac_mc_alloc_csrows(mci))
		goto error;

	if (edac_mc_alloc_dimms(mci))
		goto error;

	mci->op_state = OP_ALLOC;

	return mci;

error:
	_edac_mc_free(mci);

	return NULL;
}
EXPORT_SYMBOL_GPL(edac_mc_alloc);

A
Alan Cox 已提交
476 477
void edac_mc_free(struct mem_ctl_info *mci)
{
478
	edac_dbg(1, "\n");
479

480
	_edac_mc_free(mci);
A
Alan Cox 已提交
481
}
482
EXPORT_SYMBOL_GPL(edac_mc_free);
A
Alan Cox 已提交
483

484 485 486 487 488 489 490 491 492 493 494 495 496 497
bool edac_has_mcs(void)
{
	bool ret;

	mutex_lock(&mem_ctls_mutex);

	ret = list_empty(&mc_devices);

	mutex_unlock(&mem_ctls_mutex);

	return !ret;
}
EXPORT_SYMBOL_GPL(edac_has_mcs);

498 499
/* Caller must hold mem_ctls_mutex */
static struct mem_ctl_info *__find_mci_by_dev(struct device *dev)
A
Alan Cox 已提交
500 501 502 503
{
	struct mem_ctl_info *mci;
	struct list_head *item;

504
	edac_dbg(3, "\n");
A
Alan Cox 已提交
505 506 507 508

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

509
		if (mci->pdev == dev)
A
Alan Cox 已提交
510 511 512 513 514
			return mci;
	}

	return NULL;
}
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532

/**
 * find_mci_by_dev
 *
 *	scan list of controllers looking for the one that manages
 *	the 'dev' device
 * @dev: pointer to a struct device related with the MCI
 */
struct mem_ctl_info *find_mci_by_dev(struct device *dev)
{
	struct mem_ctl_info *ret;

	mutex_lock(&mem_ctls_mutex);
	ret = __find_mci_by_dev(dev);
	mutex_unlock(&mem_ctls_mutex);

	return ret;
}
533
EXPORT_SYMBOL_GPL(find_mci_by_dev);
A
Alan Cox 已提交
534

535 536 537 538 539 540
/*
 * edac_mc_workq_function
 *	performs the operation scheduled by a workq request
 */
static void edac_mc_workq_function(struct work_struct *work_req)
{
J
Jean Delvare 已提交
541
	struct delayed_work *d_work = to_delayed_work(work_req);
542 543 544 545
	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);

	mutex_lock(&mem_ctls_mutex);

546
	if (mci->op_state != OP_RUNNING_POLL) {
547 548 549 550
		mutex_unlock(&mem_ctls_mutex);
		return;
	}

B
Borislav Petkov 已提交
551
	if (edac_op_state == EDAC_OPSTATE_POLL)
552 553 554 555
		mci->edac_check(mci);

	mutex_unlock(&mem_ctls_mutex);

556
	/* Queue ourselves again. */
B
Borislav Petkov 已提交
557
	edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));
558 559 560
}

/*
561 562 563 564
 * edac_mc_reset_delay_period(unsigned long value)
 *
 *	user space has updated our poll period value, need to
 *	reset our workq delays
565
 */
566
void edac_mc_reset_delay_period(unsigned long value)
567
{
568 569 570 571 572 573 574 575
	struct mem_ctl_info *mci;
	struct list_head *item;

	mutex_lock(&mem_ctls_mutex);

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);

576 577
		if (mci->op_state == OP_RUNNING_POLL)
			edac_mod_work(&mci->work, value);
578
	}
579 580 581
	mutex_unlock(&mem_ctls_mutex);
}

582 583


584 585 586
/* Return 0 on success, 1 on failure.
 * Before calling this function, caller must
 * assign a unique value to mci->mc_idx.
587 588 589 590
 *
 *	locking model:
 *
 *		called with the mem_ctls_mutex lock held
591
 */
592
static int add_mc_to_global_list(struct mem_ctl_info *mci)
A
Alan Cox 已提交
593 594 595 596
{
	struct list_head *item, *insert_before;
	struct mem_ctl_info *p;

597
	insert_before = &mc_devices;
A
Alan Cox 已提交
598

599
	p = __find_mci_by_dev(mci->pdev);
600
	if (unlikely(p != NULL))
601
		goto fail0;
A
Alan Cox 已提交
602

603 604
	list_for_each(item, &mc_devices) {
		p = list_entry(item, struct mem_ctl_info, link);
A
Alan Cox 已提交
605

606 607 608
		if (p->mc_idx >= mci->mc_idx) {
			if (unlikely(p->mc_idx == mci->mc_idx))
				goto fail1;
A
Alan Cox 已提交
609

610 611
			insert_before = item;
			break;
A
Alan Cox 已提交
612 613 614 615 616
		}
	}

	list_add_tail_rcu(&mci->link, insert_before);
	return 0;
617

618
fail0:
619
	edac_printk(KERN_WARNING, EDAC_MC,
620
		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
621
		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
622 623
	return 1;

624
fail1:
625
	edac_printk(KERN_WARNING, EDAC_MC,
626 627
		"bug in low-level driver: attempt to assign\n"
		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
628
	return 1;
A
Alan Cox 已提交
629 630
}

631
static int del_mc_from_global_list(struct mem_ctl_info *mci)
632 633
{
	list_del_rcu(&mci->link);
634 635 636 637 638 639

	/* these are for safe removal of devices from global list while
	 * NMI handlers may be traversing list
	 */
	synchronize_rcu();
	INIT_LIST_HEAD(&mci->link);
640

B
Borislav Petkov 已提交
641
	return list_empty(&mc_devices);
642 643
}

644
struct mem_ctl_info *edac_mc_find(int idx)
645
{
646
	struct mem_ctl_info *mci;
647
	struct list_head *item;
648 649

	mutex_lock(&mem_ctls_mutex);
650 651 652

	list_for_each(item, &mc_devices) {
		mci = list_entry(item, struct mem_ctl_info, link);
653 654
		if (mci->mc_idx == idx)
			goto unlock;
655 656
	}

657
	mci = NULL;
658 659 660
unlock:
	mutex_unlock(&mem_ctls_mutex);
	return mci;
661 662 663
}
EXPORT_SYMBOL(edac_mc_find);

664 665 666 667 668
const char *edac_get_owner(void)
{
	return edac_mc_owner;
}
EXPORT_SYMBOL_GPL(edac_get_owner);
A
Alan Cox 已提交
669 670

/* FIXME - should a warning be printed if no error detection? correction? */
671 672
int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
			       const struct attribute_group **groups)
A
Alan Cox 已提交
673
{
674
	int ret = -EINVAL;
675
	edac_dbg(0, "\n");
676

A
Alan Cox 已提交
677 678 679
#ifdef CONFIG_EDAC_DEBUG
	if (edac_debug_level >= 3)
		edac_mc_dump_mci(mci);
D
Dave Peterson 已提交
680

A
Alan Cox 已提交
681
	if (edac_debug_level >= 4) {
682
		struct dimm_info *dimm;
A
Alan Cox 已提交
683 684 685
		int i;

		for (i = 0; i < mci->nr_csrows; i++) {
686 687
			struct csrow_info *csrow = mci->csrows[i];
			u32 nr_pages = 0;
A
Alan Cox 已提交
688
			int j;
D
Dave Peterson 已提交
689

690 691 692 693 694 695 696 697
			for (j = 0; j < csrow->nr_channels; j++)
				nr_pages += csrow->channels[j]->dimm->nr_pages;
			if (!nr_pages)
				continue;
			edac_mc_dump_csrow(csrow);
			for (j = 0; j < csrow->nr_channels; j++)
				if (csrow->channels[j]->dimm->nr_pages)
					edac_mc_dump_channel(csrow->channels[j]);
A
Alan Cox 已提交
698
		}
699 700 701

		mci_for_each_dimm(mci, dimm)
			edac_mc_dump_dimm(dimm);
A
Alan Cox 已提交
702 703
	}
#endif
704
	mutex_lock(&mem_ctls_mutex);
A
Alan Cox 已提交
705

706 707 708 709 710
	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
		ret = -EPERM;
		goto fail0;
	}

A
Alan Cox 已提交
711
	if (add_mc_to_global_list(mci))
712
		goto fail0;
A
Alan Cox 已提交
713 714 715 716

	/* set load time so that error rate can be tracked */
	mci->start_time = jiffies;

717
	mci->bus = edac_get_sysfs_subsys();
B
Borislav Petkov 已提交
718

719
	if (edac_create_sysfs_mci_device(mci, groups)) {
720
		edac_mc_printk(mci, KERN_WARNING,
721
			"failed to create sysfs device\n");
722 723
		goto fail1;
	}
A
Alan Cox 已提交
724

725
	if (mci->edac_check) {
726 727
		mci->op_state = OP_RUNNING_POLL;

728 729 730
		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
		edac_queue_work(&mci->work, msecs_to_jiffies(edac_mc_get_poll_msec()));

731 732 733 734
	} else {
		mci->op_state = OP_RUNNING_INTERRUPT;
	}

A
Alan Cox 已提交
735
	/* Report action taken */
736 737 738 739
	edac_mc_printk(mci, KERN_INFO,
		"Giving out device to module %s controller %s: DEV %s (%s)\n",
		mci->mod_name, mci->ctl_name, mci->dev_name,
		edac_op_state_to_string(mci->op_state));
A
Alan Cox 已提交
740

741 742
	edac_mc_owner = mci->mod_name;

743
	mutex_unlock(&mem_ctls_mutex);
744
	return 0;
A
Alan Cox 已提交
745

746
fail1:
747 748
	del_mc_from_global_list(mci);

749
fail0:
750
	mutex_unlock(&mem_ctls_mutex);
751
	return ret;
A
Alan Cox 已提交
752
}
753
EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
A
Alan Cox 已提交
754

755
struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
A
Alan Cox 已提交
756
{
757
	struct mem_ctl_info *mci;
A
Alan Cox 已提交
758

759
	edac_dbg(0, "\n");
760

761
	mutex_lock(&mem_ctls_mutex);
762

763
	/* find the requested mci struct in the global list */
764
	mci = __find_mci_by_dev(dev);
765
	if (mci == NULL) {
766
		mutex_unlock(&mem_ctls_mutex);
767 768 769
		return NULL;
	}

770 771 772
	/* mark MCI offline: */
	mci->op_state = OP_OFFLINE;

B
Borislav Petkov 已提交
773
	if (del_mc_from_global_list(mci))
774
		edac_mc_owner = NULL;
775

776
	mutex_unlock(&mem_ctls_mutex);
777

778
	if (mci->edac_check)
779
		edac_stop_work(&mci->work);
780 781

	/* remove from sysfs */
782 783
	edac_remove_sysfs_mci_device(mci);

D
Dave Peterson 已提交
784
	edac_printk(KERN_INFO, EDAC_MC,
785
		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
786
		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
787

788
	return mci;
A
Alan Cox 已提交
789
}
790
EXPORT_SYMBOL_GPL(edac_mc_del_mc);
A
Alan Cox 已提交
791

792 793
static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
				u32 size)
A
Alan Cox 已提交
794 795 796 797 798
{
	struct page *pg;
	void *virt_addr;
	unsigned long flags = 0;

799
	edac_dbg(3, "\n");
A
Alan Cox 已提交
800 801

	/* ECC error page was not in our memory. Ignore it. */
802
	if (!pfn_valid(page))
A
Alan Cox 已提交
803 804 805 806 807 808 809 810
		return;

	/* Find the actual page structure then map it and fix */
	pg = pfn_to_page(page);

	if (PageHighMem(pg))
		local_irq_save(flags);

811
	virt_addr = kmap_atomic(pg);
A
Alan Cox 已提交
812 813

	/* Perform architecture specific atomic scrub operation */
814
	edac_atomic_scrub(virt_addr + offset, size);
A
Alan Cox 已提交
815 816

	/* Unmap and complete */
817
	kunmap_atomic(virt_addr);
A
Alan Cox 已提交
818 819 820 821 822 823

	if (PageHighMem(pg))
		local_irq_restore(flags);
}

/* FIXME - should return -1 */
D
Dave Peterson 已提交
824
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
A
Alan Cox 已提交
825
{
826
	struct csrow_info **csrows = mci->csrows;
827
	int row, i, j, n;
A
Alan Cox 已提交
828

829
	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
A
Alan Cox 已提交
830 831 832
	row = -1;

	for (i = 0; i < mci->nr_csrows; i++) {
833
		struct csrow_info *csrow = csrows[i];
834 835
		n = 0;
		for (j = 0; j < csrow->nr_channels; j++) {
836
			struct dimm_info *dimm = csrow->channels[j]->dimm;
837 838 839
			n += dimm->nr_pages;
		}
		if (n == 0)
A
Alan Cox 已提交
840 841
			continue;

842 843 844 845
		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
			 mci->mc_idx,
			 csrow->first_page, page, csrow->last_page,
			 csrow->page_mask);
A
Alan Cox 已提交
846 847 848 849 850 851 852 853 854 855 856

		if ((page >= csrow->first_page) &&
		    (page <= csrow->last_page) &&
		    ((page & csrow->page_mask) ==
		     (csrow->first_page & csrow->page_mask))) {
			row = i;
			break;
		}
	}

	if (row == -1)
D
Dave Peterson 已提交
857
		edac_mc_printk(mci, KERN_ERR,
858 859
			"could not look up page error address %lx\n",
			(unsigned long)page);
A
Alan Cox 已提交
860 861 862

	return row;
}
863
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
A
Alan Cox 已提交
864

865 866 867 868 869
const char *edac_layer_name[] = {
	[EDAC_MC_LAYER_BRANCH] = "branch",
	[EDAC_MC_LAYER_CHANNEL] = "channel",
	[EDAC_MC_LAYER_SLOT] = "slot",
	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
870
	[EDAC_MC_LAYER_ALL_MEM] = "memory",
871 872 873
};
EXPORT_SYMBOL_GPL(edac_layer_name);

874
static void edac_inc_ce_error(struct edac_raw_error_desc *e)
A
Alan Cox 已提交
875
{
876 877
	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
	struct mem_ctl_info *mci = error_desc_to_mci(e);
878
	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
A
Alan Cox 已提交
879

880
	mci->ce_mc += e->error_count;
A
Alan Cox 已提交
881

882 883 884
	if (dimm)
		dimm->ce_count += e->error_count;
	else
885
		mci->ce_noinfo_count += e->error_count;
886 887
}

888
static void edac_inc_ue_error(struct edac_raw_error_desc *e)
889
{
890 891
	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
	struct mem_ctl_info *mci = error_desc_to_mci(e);
892
	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
893

894
	mci->ue_mc += e->error_count;
895

896 897 898
	if (dimm)
		dimm->ue_count += e->error_count;
	else
899
		mci->ue_noinfo_count += e->error_count;
900
}
A
Alan Cox 已提交
901

902
static void edac_ce_error(struct edac_raw_error_desc *e)
903
{
904
	struct mem_ctl_info *mci = error_desc_to_mci(e);
905 906 907
	unsigned long remapped_page;

	if (edac_mc_get_log_ce()) {
908 909 910 911 912 913 914 915
		edac_mc_printk(mci, KERN_WARNING,
			"%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
			e->error_count, e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain, e->syndrome,
			*e->other_detail ? " - " : "",
			e->other_detail);
916
	}
917 918

	edac_inc_ce_error(e);
A
Alan Cox 已提交
919

920
	if (mci->scrub_mode == SCRUB_SW_SRC) {
A
Alan Cox 已提交
921
		/*
922 923 924 925 926 927 928 929 930 931
			* Some memory controllers (called MCs below) can remap
			* memory so that it is still available at a different
			* address when PCI devices map into memory.
			* MC's that can't do this, lose the memory where PCI
			* devices are mapped. This mapping is MC-dependent
			* and so we call back into the MC driver for it to
			* map the MC page to a physical (CPU) page which can
			* then be mapped to a virtual page - which can then
			* be scrubbed.
			*/
A
Alan Cox 已提交
932
		remapped_page = mci->ctl_page_to_phys ?
933 934
			mci->ctl_page_to_phys(mci, e->page_frame_number) :
			e->page_frame_number;
A
Alan Cox 已提交
935

936
		edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
A
Alan Cox 已提交
937 938 939
	}
}

940
static void edac_ue_error(struct edac_raw_error_desc *e)
A
Alan Cox 已提交
941
{
942
	struct mem_ctl_info *mci = error_desc_to_mci(e);
943

944
	if (edac_mc_get_log_ue()) {
945 946 947 948 949 950 951 952
		edac_mc_printk(mci, KERN_WARNING,
			"%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
			e->error_count, e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain,
			*e->other_detail ? " - " : "",
			e->other_detail);
953
	}
D
Dave Peterson 已提交
954

955 956
	edac_inc_ue_error(e);

957
	if (edac_mc_get_panic_on_ue()) {
958 959 960 961 962 963 964
		panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
			e->msg,
			*e->msg ? " " : "",
			e->label, e->location, e->page_frame_number, e->offset_in_page,
			e->grain,
			*e->other_detail ? " - " : "",
			e->other_detail);
965
	}
A
Alan Cox 已提交
966 967
}

968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
{
	struct mem_ctl_info *mci = error_desc_to_mci(e);
	enum hw_event_mc_err_type type = e->type;
	u16 count = e->error_count;

	if (row < 0)
		return;

	edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);

	if (type == HW_EVENT_ERR_CORRECTED) {
		mci->csrows[row]->ce_count += count;
		if (chan >= 0)
			mci->csrows[row]->channels[chan]->ce_count += count;
	} else {
		mci->csrows[row]->ue_count += count;
	}
}

988
void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
989
{
990
	struct mem_ctl_info *mci = error_desc_to_mci(e);
991 992 993 994 995 996 997 998 999 1000
	u8 grain_bits;

	/* Sanity-check driver-supplied grain value. */
	if (WARN_ON_ONCE(!e->grain))
		e->grain = 1;

	grain_bits = fls_long(e->grain - 1);

	/* Report the error via the trace interface */
	if (IS_ENABLED(CONFIG_RAS))
1001
		trace_mc_event(e->type, e->msg, e->label, e->error_count,
1002 1003 1004 1005
			       mci->mc_idx, e->top_layer, e->mid_layer,
			       e->low_layer,
			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
			       grain_bits, e->syndrome, e->other_detail);
1006

1007 1008 1009 1010
	if (e->type == HW_EVENT_ERR_CORRECTED)
		edac_ce_error(e);
	else
		edac_ue_error(e);
1011 1012
}
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1013

1014 1015
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
			  struct mem_ctl_info *mci,
1016
			  const u16 error_count,
1017 1018 1019
			  const unsigned long page_frame_number,
			  const unsigned long offset_in_page,
			  const unsigned long syndrome,
1020 1021 1022
			  const int top_layer,
			  const int mid_layer,
			  const int low_layer,
1023
			  const char *msg,
1024
			  const char *other_detail)
A
Alan Cox 已提交
1025
{
1026
	struct dimm_info *dimm;
1027
	char *p, *end;
1028
	int row = -1, chan = -1;
1029
	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1030 1031
	int i, n_labels = 0;
	struct edac_raw_error_desc *e = &mci->error_desc;
1032
	bool any_memory = true;
1033
	const char *prefix;
A
Alan Cox 已提交
1034

1035
	edac_dbg(3, "MC%d\n", mci->mc_idx);
A
Alan Cox 已提交
1036

1037 1038 1039
	/* Fills the error report buffer */
	memset(e, 0, sizeof (*e));
	e->error_count = error_count;
1040
	e->type = type;
1041 1042 1043 1044 1045 1046
	e->top_layer = top_layer;
	e->mid_layer = mid_layer;
	e->low_layer = low_layer;
	e->page_frame_number = page_frame_number;
	e->offset_in_page = offset_in_page;
	e->syndrome = syndrome;
1047 1048 1049
	/* need valid strings here for both: */
	e->msg = msg ?: "";
	e->other_detail = other_detail ?: "";
1050

1051
	/*
1052
	 * Check if the event report is consistent and if the memory location is
1053 1054
	 * known. If it is, the DIMM(s) label info will be filled and the DIMM's
	 * error counters will be incremented.
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
	 */
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] >= (int)mci->layers[i].size) {

			edac_mc_printk(mci, KERN_ERR,
				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
				       edac_layer_name[mci->layers[i].type],
				       pos[i], mci->layers[i].size);
			/*
			 * Instead of just returning it, let's use what's
			 * known about the error. The increment routines and
			 * the DIMM filter logic will do the right thing by
			 * pointing the likely damaged DIMMs.
			 */
			pos[i] = -1;
		}
		if (pos[i] >= 0)
1072
			any_memory = false;
A
Alan Cox 已提交
1073 1074
	}

1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
	/*
	 * Get the dimm label/grain that applies to the match criteria.
	 * As the error algorithm may not be able to point to just one memory
	 * stick, the logic here will get all possible labels that could
	 * pottentially be affected by the error.
	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
	 * to have only the MC channel and the MC dimm (also called "branch")
	 * but the channel is not known, as the memory is arranged in pairs,
	 * where each memory belongs to a separate channel within the same
	 * branch.
	 */
1086
	p = e->label;
1087
	*p = '\0';
1088 1089
	end = p + sizeof(e->label);
	prefix = "";
1090

1091
	mci_for_each_dimm(mci, dimm) {
1092
		if (top_layer >= 0 && top_layer != dimm->location[0])
1093
			continue;
1094
		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1095
			continue;
1096
		if (low_layer >= 0 && low_layer != dimm->location[2])
1097
			continue;
A
Alan Cox 已提交
1098

1099
		/* get the max grain, over the error match range */
1100 1101
		if (dimm->grain > e->grain)
			e->grain = dimm->grain;
1102

1103 1104
		/*
		 * If the error is memory-controller wide, there's no need to
1105 1106 1107
		 * seek for the affected DIMMs because the whole channel/memory
		 * controller/... may be affected. Also, don't show errors for
		 * empty DIMM slots.
1108
		 */
1109
		if (!dimm->nr_pages)
1110
			continue;
1111

1112
		n_labels++;
1113 1114 1115 1116
		if (n_labels > EDAC_MAX_LABELS) {
			p = e->label;
			*p = '\0';
		} else {
1117 1118
			p += scnprintf(p, end - p, "%s%s", prefix, dimm->label);
			prefix = OTHER_LABEL;
1119
		}
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136

		/*
		 * get csrow/channel of the DIMM, in order to allow
		 * incrementing the compat API counters
		 */
		edac_dbg(4, "%s csrows map: (%d,%d)\n",
			mci->csbased ? "rank" : "dimm",
			dimm->csrow, dimm->cschannel);
		if (row == -1)
			row = dimm->csrow;
		else if (row >= 0 && row != dimm->csrow)
			row = -2;

		if (chan == -1)
			chan = dimm->cschannel;
		else if (chan >= 0 && chan != dimm->cschannel)
			chan = -2;
1137 1138
	}

1139
	if (any_memory)
1140
		strscpy(e->label, "any memory", sizeof(e->label));
1141
	else if (!*e->label)
1142
		strscpy(e->label, "unknown memory", sizeof(e->label));
1143 1144

	edac_inc_csrow(e, row, chan);
1145

1146
	/* Fill the RAM location data */
1147
	p = e->location;
1148 1149
	end = p + sizeof(e->location);
	prefix = "";
1150

1151 1152 1153
	for (i = 0; i < mci->n_layers; i++) {
		if (pos[i] < 0)
			continue;
1154

1155 1156 1157
		p += scnprintf(p, end - p, "%s%s:%d", prefix,
			       edac_layer_name[mci->layers[i].type], pos[i]);
		prefix = " ";
1158
	}
1159

1160
	edac_raw_mc_handle_error(e);
1161
}
1162
EXPORT_SYMBOL_GPL(edac_mc_handle_error);