i82860_edac.c 9.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Intel 82860 Memory Controller kernel module
 * (C) 2005 Red Hat (http://www.redhat.com)
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Ben Woodard <woodard@redhat.com>
 * shamelessly copied from and based upon the edac_i82875 driver
 * by Thayne Harbaugh of Linux Networx. (http://lnxi.com)
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
16
#include <linux/edac.h>
17
#include "edac_core.h"
18

M
Michal Marek 已提交
19
#define  I82860_REVISION " Ver: 2.0.2"
20
#define EDAC_MOD_STR	"i82860_edac"
21

D
Dave Peterson 已提交
22
#define i82860_printk(level, fmt, arg...) \
D
Dave Peterson 已提交
23
	edac_printk(level, "i82860", fmt, ##arg)
D
Dave Peterson 已提交
24 25

#define i82860_mc_printk(mci, level, fmt, arg...) \
D
Dave Peterson 已提交
26
	edac_mc_chipset_printk(mci, level, "i82860", fmt, ##arg)
D
Dave Peterson 已提交
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
#ifndef PCI_DEVICE_ID_INTEL_82860_0
#define PCI_DEVICE_ID_INTEL_82860_0	0x2531
#endif				/* PCI_DEVICE_ID_INTEL_82860_0 */

#define I82860_MCHCFG 0x50
#define I82860_GBA 0x60
#define I82860_GBA_MASK 0x7FF
#define I82860_GBA_SHIFT 24
#define I82860_ERRSTS 0xC8
#define I82860_EAP 0xE4
#define I82860_DERRCTL_STS 0xE2

enum i82860_chips {
	I82860 = 0,
};

struct i82860_dev_info {
	const char *ctl_name;
};

struct i82860_error_info {
	u16 errsts;
	u32 eap;
	u16 derrsyn;
	u16 errsts2;
};

static const struct i82860_dev_info i82860_devs[] = {
	[I82860] = {
57
		.ctl_name = "i82860"},
58 59
};

60
static struct pci_dev *mci_pdev;	/* init dev: in case that AGP code
D
Dave Peterson 已提交
61 62
					 * has already registered driver
					 */
63
static struct edac_pci_ctl_info *i82860_pci;
64

D
Dave Peterson 已提交
65
static void i82860_get_error_info(struct mem_ctl_info *mci,
66
				struct i82860_error_info *info)
67
{
68 69 70 71
	struct pci_dev *pdev;

	pdev = to_pci_dev(mci->dev);

72 73 74 75 76
	/*
	 * This is a mess because there is no atomic way to read all the
	 * registers at once and the registers can transition from CE being
	 * overwritten by UE.
	 */
77 78 79 80
	pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts);
	pci_read_config_dword(pdev, I82860_EAP, &info->eap);
	pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
	pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts2);
81

82
	pci_write_bits16(pdev, I82860_ERRSTS, 0x0003, 0x0003);
83 84 85 86 87 88 89 90

	/*
	 * If the error is the same for both reads then the first set of reads
	 * is valid.  If there is a change then there is a CE no info and the
	 * second set of reads is valid and should be UE info.
	 */
	if (!(info->errsts2 & 0x0003))
		return;
D
Dave Peterson 已提交
91

92
	if ((info->errsts ^ info->errsts2) & 0x0003) {
93
		pci_read_config_dword(pdev, I82860_EAP, &info->eap);
D
Dave Jiang 已提交
94
		pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
95 96 97
	}
}

D
Dave Peterson 已提交
98
static int i82860_process_error_info(struct mem_ctl_info *mci,
99 100
				struct i82860_error_info *info,
				int handle_errors)
101
{
102
	struct dimm_info *dimm;
103 104 105 106 107 108 109 110 111
	int row;

	if (!(info->errsts2 & 0x0003))
		return 0;

	if (!handle_errors)
		return 1;

	if ((info->errsts ^ info->errsts2) & 0x0003) {
112 113
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
				     -1, -1, -1, "UE overwrote CE", "", NULL);
114 115 116 117 118
		info->errsts = info->errsts2;
	}

	info->eap >>= PAGE_SHIFT;
	row = edac_mc_find_csrow_by_page(mci, info->eap);
119
	dimm = mci->csrows[row].channels[0].dimm;
120 121

	if (info->errsts & 0x0002)
122 123 124 125
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     info->eap, 0, 0,
				     dimm->location[0], dimm->location[1], -1,
				     "i82860 UE", "", NULL);
126
	else
127 128 129 130
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     info->eap, 0, info->derrsyn,
				     dimm->location[0], dimm->location[1], -1,
				     "i82860 CE", "", NULL);
131 132 133 134 135 136 137 138

	return 1;
}

static void i82860_check(struct mem_ctl_info *mci)
{
	struct i82860_error_info info;

D
Dave Peterson 已提交
139
	debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
140 141 142 143
	i82860_get_error_info(mci, &info);
	i82860_process_error_info(mci, &info, 1);
}

144
static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
145 146
{
	unsigned long last_cumul_size;
D
Dave Jiang 已提交
147
	u16 mchcfg_ddim;	/* DRAM Data Integrity Mode 0=none, 2=edac */
148 149 150
	u16 value;
	u32 cumul_size;
	struct csrow_info *csrow;
151
	struct dimm_info *dimm;
152 153 154 155 156 157 158 159 160 161 162 163 164
	int index;

	pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
	mchcfg_ddim = mchcfg_ddim & 0x180;
	last_cumul_size = 0;

	/* The group row boundary (GRA) reg values are boundary address
	 * for each DRAM row with a granularity of 16MB.  GRA regs are
	 * cumulative; therefore GRA15 will contain the total memory contained
	 * in all eight rows.
	 */
	for (index = 0; index < mci->nr_csrows; index++) {
		csrow = &mci->csrows[index];
165 166
		dimm = csrow->channels[0].dimm;

167 168
		pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
		cumul_size = (value & I82860_GBA_MASK) <<
169
			(I82860_GBA_SHIFT - PAGE_SHIFT);
170 171
		debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
			cumul_size);
172

173 174 175 176 177
		if (cumul_size == last_cumul_size)
			continue;	/* not populated */

		csrow->first_page = last_cumul_size;
		csrow->last_page = cumul_size - 1;
178
		dimm->nr_pages = cumul_size - last_cumul_size;
179
		last_cumul_size = cumul_size;
180 181 182 183
		dimm->grain = 1 << 12;	/* I82860_EAP has 4KiB reolution */
		dimm->mtype = MEM_RMBS;
		dimm->dtype = DEV_UNKNOWN;
		dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
184 185 186 187 188 189
	}
}

static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
	struct mem_ctl_info *mci;
190
	struct edac_mc_layer layers[2];
191
	struct i82860_error_info discard;
192

193 194 195 196 197 198 199 200 201
	/*
	 * RDRAM has channels but these don't map onto the csrow abstraction.
	 * According with the datasheet, there are 2 Rambus channels, supporting
	 * up to 16 direct RDRAM devices.
	 * The device groups from the GRA registers seem to map reasonably
	 * well onto the notion of a chip select row.
	 * There are 16 GRA registers and since the name is associated with
	 * the channel and the GRA registers map to physical devices so we are
	 * going to make 1 channel for group.
202
	 */
203 204 205 206 207 208 209
	layers[0].type = EDAC_MC_LAYER_CHANNEL;
	layers[0].size = 2;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_SLOT;
	layers[1].size = 8;
	layers[1].is_virt_csrow = true;
	mci = new_edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
210 211 212
	if (!mci)
		return -ENOMEM;

D
Dave Peterson 已提交
213
	debugf3("%s(): init mci\n", __func__);
214
	mci->dev = &pdev->dev;
215 216 217 218
	mci->mtype_cap = MEM_FLAG_DDR;
	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
	/* I"m not sure about this but I think that all RDRAM is SECDED */
	mci->edac_cap = EDAC_FLAG_SECDED;
D
Dave Peterson 已提交
219
	mci->mod_name = EDAC_MOD_STR;
220
	mci->mod_ver = I82860_REVISION;
221
	mci->ctl_name = i82860_devs[dev_idx].ctl_name;
222
	mci->dev_name = pci_name(pdev);
223 224
	mci->edac_check = i82860_check;
	mci->ctl_page_to_phys = NULL;
225
	i82860_init_csrows(mci, pdev);
D
Dave Jiang 已提交
226
	i82860_get_error_info(mci, &discard);	/* clear counters */
227

228 229 230
	/* Here we assume that we will never see multiple instances of this
	 * type of memory controller.  The ID is therefore hardcoded to 0.
	 */
231
	if (edac_mc_add_mc(mci)) {
D
Dave Peterson 已提交
232
		debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
233
		goto fail;
234
	}
D
Dave Peterson 已提交
235

236 237 238 239 240 241 242 243 244 245 246
	/* allocating generic PCI control info */
	i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
	if (!i82860_pci) {
		printk(KERN_WARNING
			"%s(): Unable to create PCI control\n",
			__func__);
		printk(KERN_WARNING
			"%s(): PCI error report via EDAC not setup\n",
			__func__);
	}

247 248 249 250 251
	/* get this far and it's successful */
	debugf3("%s(): success\n", __func__);

	return 0;

252
fail:
253 254
	edac_mc_free(mci);
	return -ENODEV;
255 256 257 258
}

/* returns count (>= 0), or negative on error */
static int __devinit i82860_init_one(struct pci_dev *pdev,
259
				const struct pci_device_id *ent)
260 261 262
{
	int rc;

D
Dave Peterson 已提交
263 264
	debugf0("%s()\n", __func__);
	i82860_printk(KERN_INFO, "i82860 init one\n");
D
Dave Peterson 已提交
265 266

	if (pci_enable_device(pdev) < 0)
267
		return -EIO;
D
Dave Peterson 已提交
268

269
	rc = i82860_probe1(pdev, ent->driver_data);
D
Dave Peterson 已提交
270 271

	if (rc == 0)
272
		mci_pdev = pci_dev_get(pdev);
D
Dave Peterson 已提交
273

274 275 276 277 278 279 280
	return rc;
}

static void __devexit i82860_remove_one(struct pci_dev *pdev)
{
	struct mem_ctl_info *mci;

D
Dave Peterson 已提交
281
	debugf0("%s()\n", __func__);
282

283 284 285
	if (i82860_pci)
		edac_pci_release_generic_ctl(i82860_pci);

286
	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
287 288 289
		return;

	edac_mc_free(mci);
290 291
}

292
static DEFINE_PCI_DEVICE_TABLE(i82860_pci_tbl) = {
D
Dave Peterson 已提交
293
	{
D
Dave Jiang 已提交
294 295
	 PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
	 I82860},
D
Dave Peterson 已提交
296
	{
D
Dave Jiang 已提交
297 298
	 0,
	 }			/* 0 terminated list. */
299 300 301 302 303
};

MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);

static struct pci_driver i82860_driver = {
D
Dave Peterson 已提交
304
	.name = EDAC_MOD_STR,
305 306 307 308 309
	.probe = i82860_init_one,
	.remove = __devexit_p(i82860_remove_one),
	.id_table = i82860_pci_tbl,
};

A
Alan Cox 已提交
310
static int __init i82860_init(void)
311 312 313
{
	int pci_rc;

D
Dave Peterson 已提交
314
	debugf3("%s()\n", __func__);
D
Dave Peterson 已提交
315

316 317 318
       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
       opstate_init();

319
	if ((pci_rc = pci_register_driver(&i82860_driver)) < 0)
D
Dave Peterson 已提交
320
		goto fail0;
321 322 323

	if (!mci_pdev) {
		mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
324
					PCI_DEVICE_ID_INTEL_82860_0, NULL);
D
Dave Peterson 已提交
325

326 327
		if (mci_pdev == NULL) {
			debugf0("860 pci_get_device fail\n");
D
Dave Peterson 已提交
328 329
			pci_rc = -ENODEV;
			goto fail1;
330
		}
D
Dave Peterson 已提交
331

332
		pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
D
Dave Peterson 已提交
333

334 335
		if (pci_rc < 0) {
			debugf0("860 init fail\n");
D
Dave Peterson 已提交
336 337
			pci_rc = -ENODEV;
			goto fail1;
338 339
		}
	}
D
Dave Peterson 已提交
340

341
	return 0;
D
Dave Peterson 已提交
342

343
fail1:
D
Dave Peterson 已提交
344 345
	pci_unregister_driver(&i82860_driver);

346
fail0:
D
Dave Peterson 已提交
347 348 349 350
	if (mci_pdev != NULL)
		pci_dev_put(mci_pdev);

	return pci_rc;
351 352 353 354
}

static void __exit i82860_exit(void)
{
D
Dave Peterson 已提交
355
	debugf3("%s()\n", __func__);
356 357

	pci_unregister_driver(&i82860_driver);
D
Dave Peterson 已提交
358 359

	if (mci_pdev != NULL)
360 361 362 363 364 365 366
		pci_dev_put(mci_pdev);
}

module_init(i82860_init);
module_exit(i82860_exit);

MODULE_LICENSE("GPL");
D
Dave Peterson 已提交
367
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
368
		"Ben Woodard <woodard@redhat.com>");
369
MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
370 371 372

module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");