i82860_edac.c 9.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Intel 82860 Memory Controller kernel module
 * (C) 2005 Red Hat (http://www.redhat.com)
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * Written by Ben Woodard <woodard@redhat.com>
 * shamelessly copied from and based upon the edac_i82875 driver
 * by Thayne Harbaugh of Linux Networx. (http://lnxi.com)
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
16
#include <linux/edac.h>
17
#include "edac_module.h"
18

M
Michal Marek 已提交
19
#define  I82860_REVISION " Ver: 2.0.2"
20
#define EDAC_MOD_STR	"i82860_edac"
21

D
Dave Peterson 已提交
22
#define i82860_printk(level, fmt, arg...) \
D
Dave Peterson 已提交
23
	edac_printk(level, "i82860", fmt, ##arg)
D
Dave Peterson 已提交
24 25

#define i82860_mc_printk(mci, level, fmt, arg...) \
D
Dave Peterson 已提交
26
	edac_mc_chipset_printk(mci, level, "i82860", fmt, ##arg)
D
Dave Peterson 已提交
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
#ifndef PCI_DEVICE_ID_INTEL_82860_0
#define PCI_DEVICE_ID_INTEL_82860_0	0x2531
#endif				/* PCI_DEVICE_ID_INTEL_82860_0 */

#define I82860_MCHCFG 0x50
#define I82860_GBA 0x60
#define I82860_GBA_MASK 0x7FF
#define I82860_GBA_SHIFT 24
#define I82860_ERRSTS 0xC8
#define I82860_EAP 0xE4
#define I82860_DERRCTL_STS 0xE2

enum i82860_chips {
	I82860 = 0,
};

struct i82860_dev_info {
	const char *ctl_name;
};

struct i82860_error_info {
	u16 errsts;
	u32 eap;
	u16 derrsyn;
	u16 errsts2;
};

static const struct i82860_dev_info i82860_devs[] = {
	[I82860] = {
57
		.ctl_name = "i82860"},
58 59
};

60
static struct pci_dev *mci_pdev;	/* init dev: in case that AGP code
D
Dave Peterson 已提交
61 62
					 * has already registered driver
					 */
63
static struct edac_pci_ctl_info *i82860_pci;
64

D
Dave Peterson 已提交
65
static void i82860_get_error_info(struct mem_ctl_info *mci,
66
				struct i82860_error_info *info)
67
{
68 69
	struct pci_dev *pdev;

70
	pdev = to_pci_dev(mci->pdev);
71

72 73 74 75 76
	/*
	 * This is a mess because there is no atomic way to read all the
	 * registers at once and the registers can transition from CE being
	 * overwritten by UE.
	 */
77 78 79 80
	pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts);
	pci_read_config_dword(pdev, I82860_EAP, &info->eap);
	pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
	pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts2);
81

82
	pci_write_bits16(pdev, I82860_ERRSTS, 0x0003, 0x0003);
83 84 85 86 87 88 89 90

	/*
	 * If the error is the same for both reads then the first set of reads
	 * is valid.  If there is a change then there is a CE no info and the
	 * second set of reads is valid and should be UE info.
	 */
	if (!(info->errsts2 & 0x0003))
		return;
D
Dave Peterson 已提交
91

92
	if ((info->errsts ^ info->errsts2) & 0x0003) {
93
		pci_read_config_dword(pdev, I82860_EAP, &info->eap);
D
Dave Jiang 已提交
94
		pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
95 96 97
	}
}

D
Dave Peterson 已提交
98
static int i82860_process_error_info(struct mem_ctl_info *mci,
99 100
				struct i82860_error_info *info,
				int handle_errors)
101
{
102
	struct dimm_info *dimm;
103 104 105 106 107 108 109 110 111
	int row;

	if (!(info->errsts2 & 0x0003))
		return 0;

	if (!handle_errors)
		return 1;

	if ((info->errsts ^ info->errsts2) & 0x0003) {
112
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
113
				     -1, -1, -1, "UE overwrote CE", "");
114 115 116 117 118
		info->errsts = info->errsts2;
	}

	info->eap >>= PAGE_SHIFT;
	row = edac_mc_find_csrow_by_page(mci, info->eap);
119
	dimm = mci->csrows[row]->channels[0]->dimm;
120 121

	if (info->errsts & 0x0002)
122
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
123 124
				     info->eap, 0, 0,
				     dimm->location[0], dimm->location[1], -1,
125
				     "i82860 UE", "");
126
	else
127
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
128 129
				     info->eap, 0, info->derrsyn,
				     dimm->location[0], dimm->location[1], -1,
130
				     "i82860 CE", "");
131 132 133 134 135 136 137 138

	return 1;
}

static void i82860_check(struct mem_ctl_info *mci)
{
	struct i82860_error_info info;

139
	edac_dbg(1, "MC%d\n", mci->mc_idx);
140 141 142 143
	i82860_get_error_info(mci, &info);
	i82860_process_error_info(mci, &info, 1);
}

144
static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
145 146
{
	unsigned long last_cumul_size;
D
Dave Jiang 已提交
147
	u16 mchcfg_ddim;	/* DRAM Data Integrity Mode 0=none, 2=edac */
148 149 150
	u16 value;
	u32 cumul_size;
	struct csrow_info *csrow;
151
	struct dimm_info *dimm;
152 153 154 155 156 157 158 159 160 161 162 163
	int index;

	pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
	mchcfg_ddim = mchcfg_ddim & 0x180;
	last_cumul_size = 0;

	/* The group row boundary (GRA) reg values are boundary address
	 * for each DRAM row with a granularity of 16MB.  GRA regs are
	 * cumulative; therefore GRA15 will contain the total memory contained
	 * in all eight rows.
	 */
	for (index = 0; index < mci->nr_csrows; index++) {
164 165
		csrow = mci->csrows[index];
		dimm = csrow->channels[0]->dimm;
166

167 168
		pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
		cumul_size = (value & I82860_GBA_MASK) <<
169
			(I82860_GBA_SHIFT - PAGE_SHIFT);
170
		edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
171

172 173 174 175 176
		if (cumul_size == last_cumul_size)
			continue;	/* not populated */

		csrow->first_page = last_cumul_size;
		csrow->last_page = cumul_size - 1;
177
		dimm->nr_pages = cumul_size - last_cumul_size;
178
		last_cumul_size = cumul_size;
179 180 181 182
		dimm->grain = 1 << 12;	/* I82860_EAP has 4KiB reolution */
		dimm->mtype = MEM_RMBS;
		dimm->dtype = DEV_UNKNOWN;
		dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
183 184 185 186 187 188
	}
}

static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
	struct mem_ctl_info *mci;
189
	struct edac_mc_layer layers[2];
190
	struct i82860_error_info discard;
191

192 193 194 195 196 197 198 199 200
	/*
	 * RDRAM has channels but these don't map onto the csrow abstraction.
	 * According with the datasheet, there are 2 Rambus channels, supporting
	 * up to 16 direct RDRAM devices.
	 * The device groups from the GRA registers seem to map reasonably
	 * well onto the notion of a chip select row.
	 * There are 16 GRA registers and since the name is associated with
	 * the channel and the GRA registers map to physical devices so we are
	 * going to make 1 channel for group.
201
	 */
202 203 204 205 206 207
	layers[0].type = EDAC_MC_LAYER_CHANNEL;
	layers[0].size = 2;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_SLOT;
	layers[1].size = 8;
	layers[1].is_virt_csrow = true;
208
	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
209 210 211
	if (!mci)
		return -ENOMEM;

212
	edac_dbg(3, "init mci\n");
213
	mci->pdev = &pdev->dev;
214 215 216 217
	mci->mtype_cap = MEM_FLAG_DDR;
	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
	/* I"m not sure about this but I think that all RDRAM is SECDED */
	mci->edac_cap = EDAC_FLAG_SECDED;
D
Dave Peterson 已提交
218
	mci->mod_name = EDAC_MOD_STR;
219
	mci->mod_ver = I82860_REVISION;
220
	mci->ctl_name = i82860_devs[dev_idx].ctl_name;
221
	mci->dev_name = pci_name(pdev);
222 223
	mci->edac_check = i82860_check;
	mci->ctl_page_to_phys = NULL;
224
	i82860_init_csrows(mci, pdev);
D
Dave Jiang 已提交
225
	i82860_get_error_info(mci, &discard);	/* clear counters */
226

227 228 229
	/* Here we assume that we will never see multiple instances of this
	 * type of memory controller.  The ID is therefore hardcoded to 0.
	 */
230
	if (edac_mc_add_mc(mci)) {
231
		edac_dbg(3, "failed edac_mc_add_mc()\n");
232
		goto fail;
233
	}
D
Dave Peterson 已提交
234

235 236 237 238 239 240 241 242 243 244 245
	/* allocating generic PCI control info */
	i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
	if (!i82860_pci) {
		printk(KERN_WARNING
			"%s(): Unable to create PCI control\n",
			__func__);
		printk(KERN_WARNING
			"%s(): PCI error report via EDAC not setup\n",
			__func__);
	}

246
	/* get this far and it's successful */
247
	edac_dbg(3, "success\n");
248 249 250

	return 0;

251
fail:
252 253
	edac_mc_free(mci);
	return -ENODEV;
254 255 256
}

/* returns count (>= 0), or negative on error */
257 258
static int i82860_init_one(struct pci_dev *pdev,
			   const struct pci_device_id *ent)
259 260 261
{
	int rc;

262
	edac_dbg(0, "\n");
D
Dave Peterson 已提交
263
	i82860_printk(KERN_INFO, "i82860 init one\n");
D
Dave Peterson 已提交
264 265

	if (pci_enable_device(pdev) < 0)
266
		return -EIO;
D
Dave Peterson 已提交
267

268
	rc = i82860_probe1(pdev, ent->driver_data);
D
Dave Peterson 已提交
269 270

	if (rc == 0)
271
		mci_pdev = pci_dev_get(pdev);
D
Dave Peterson 已提交
272

273 274 275
	return rc;
}

276
static void i82860_remove_one(struct pci_dev *pdev)
277 278 279
{
	struct mem_ctl_info *mci;

280
	edac_dbg(0, "\n");
281

282 283 284
	if (i82860_pci)
		edac_pci_release_generic_ctl(i82860_pci);

285
	if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
286 287 288
		return;

	edac_mc_free(mci);
289 290
}

291
static const struct pci_device_id i82860_pci_tbl[] = {
D
Dave Peterson 已提交
292
	{
D
Dave Jiang 已提交
293 294
	 PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
	 I82860},
D
Dave Peterson 已提交
295
	{
D
Dave Jiang 已提交
296 297
	 0,
	 }			/* 0 terminated list. */
298 299 300 301 302
};

MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);

static struct pci_driver i82860_driver = {
D
Dave Peterson 已提交
303
	.name = EDAC_MOD_STR,
304
	.probe = i82860_init_one,
305
	.remove = i82860_remove_one,
306 307 308
	.id_table = i82860_pci_tbl,
};

A
Alan Cox 已提交
309
static int __init i82860_init(void)
310 311 312
{
	int pci_rc;

313
	edac_dbg(3, "\n");
D
Dave Peterson 已提交
314

315 316 317
       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
       opstate_init();

318
	if ((pci_rc = pci_register_driver(&i82860_driver)) < 0)
D
Dave Peterson 已提交
319
		goto fail0;
320 321 322

	if (!mci_pdev) {
		mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
323
					PCI_DEVICE_ID_INTEL_82860_0, NULL);
D
Dave Peterson 已提交
324

325
		if (mci_pdev == NULL) {
326
			edac_dbg(0, "860 pci_get_device fail\n");
D
Dave Peterson 已提交
327 328
			pci_rc = -ENODEV;
			goto fail1;
329
		}
D
Dave Peterson 已提交
330

331
		pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
D
Dave Peterson 已提交
332

333
		if (pci_rc < 0) {
334
			edac_dbg(0, "860 init fail\n");
D
Dave Peterson 已提交
335 336
			pci_rc = -ENODEV;
			goto fail1;
337 338
		}
	}
D
Dave Peterson 已提交
339

340
	return 0;
D
Dave Peterson 已提交
341

342
fail1:
D
Dave Peterson 已提交
343 344
	pci_unregister_driver(&i82860_driver);

345
fail0:
346
	pci_dev_put(mci_pdev);
D
Dave Peterson 已提交
347
	return pci_rc;
348 349 350 351
}

static void __exit i82860_exit(void)
{
352
	edac_dbg(3, "\n");
353
	pci_unregister_driver(&i82860_driver);
354
	pci_dev_put(mci_pdev);
355 356 357 358 359 360
}

module_init(i82860_init);
module_exit(i82860_exit);

MODULE_LICENSE("GPL");
D
Dave Peterson 已提交
361
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
362
		"Ben Woodard <woodard@redhat.com>");
363
MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
364 365 366

module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");