提交 9ada9fd5 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC fixes from Borislav Petkov:

 - EDAC core error path fix, from Denis Kirjanov.

 - Generalization of AMD MCE bank names and some minor error reporting
   improvements.

 - EDAC core cleanups and simplifications, from Wei Yongjun.

 - amd64_edac fixes for sysfs-reported values, from Josh Hunt.

 - some heavy amd64_edac error reporting path shaving, leading to
   removing a bunch of code.

 - amd64_edac error injection method improvements.

 - EDAC core cleanups and fixes

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (24 commits)
  EDAC, pci_sysfs: Use for_each_pci_dev to simplify the code
  EDAC: Handle error path in edac_mc_sysfs_init() properly
  MCE, AMD: Dump error status
  MCE, AMD: Report decoded error type first
  MCE, AMD: Dump CPU f/m/s triple with the error
  MCE, AMD: Remove functional unit references
  EDAC: Convert to use simple_open()
  EDAC, Calxeda highbank: Convert to use simple_open()
  EDAC: Fix mc size reported in sysfs
  EDAC: Fix csrow size reported in sysfs
  EDAC: Pass mci parent
  EDAC: Add memory controller flags
  amd64_edac: Fix csrows size and pages computation
  amd64_edac: Use DBAM_DIMM macro
  amd64_edac: Fix K8 chip select reporting
  amd64_edac: Reorganize error reporting path
  amd64_edac: Do not check whether error address is valid
  amd64_edac: Improve error injection
  amd64_edac: Cleanup error injection code
  amd64_edac: Small fixlets and cleanups
  ...
...@@ -42,10 +42,10 @@ config EDAC_LEGACY_SYSFS ...@@ -42,10 +42,10 @@ config EDAC_LEGACY_SYSFS
config EDAC_DEBUG config EDAC_DEBUG
bool "Debugging" bool "Debugging"
help help
This turns on debugging information for the entire EDAC This turns on debugging information for the entire EDAC subsystem.
sub-system. You can insert module with "debug_level=x", current You do so by inserting edac_module with "edac_debug_level=x." Valid
there're four debug levels (x=0,1,2,3 from low to high). levels are 0-4 (from low to high) and by default it is set to 2.
Usually you should select 'N'. Usually you should select 'N' here.
config EDAC_DECODE_MCE config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)" tristate "Decode MCEs in human-readable form (only on AMD for now)"
......
...@@ -60,8 +60,8 @@ struct scrubrate { ...@@ -60,8 +60,8 @@ struct scrubrate {
{ 0x00, 0UL}, /* scrubbing off */ { 0x00, 0UL}, /* scrubbing off */
}; };
static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func) u32 *val, const char *func)
{ {
int err = 0; int err = 0;
...@@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, ...@@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size) u64 *hole_offset, u64 *hole_size)
{ {
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
u64 base;
/* only revE and later have the DRAM Hole Address Register */ /* only revE and later have the DRAM Hole Address Register */
if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
...@@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, ...@@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
* addresses in the hole so that they start at 0x100000000. * addresses in the hole so that they start at 0x100000000.
*/ */
base = dhar_base(pvt); *hole_base = dhar_base(pvt);
*hole_size = (1ULL << 32) - *hole_base;
*hole_base = base;
*hole_size = (0x1ull << 32) - base;
if (boot_cpu_data.x86 > 0xf) if (boot_cpu_data.x86 > 0xf)
*hole_offset = f10_dhar_offset(pvt); *hole_offset = f10_dhar_offset(pvt);
...@@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) ...@@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{ {
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
int ret = 0; int ret;
dram_base = get_dram_base(pvt, pvt->mc_node_id); dram_base = get_dram_base(pvt, pvt->mc_node_id);
ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
&hole_size); &hole_size);
if (!ret) { if (!ret) {
if ((sys_addr >= (1ull << 32)) && if ((sys_addr >= (1ULL << 32)) &&
(sys_addr < ((1ull << 32) + hole_size))) { (sys_addr < ((1ULL << 32) + hole_size))) {
/* use DHAR to translate SysAddr to DramAddr */ /* use DHAR to translate SysAddr to DramAddr */
dram_addr = sys_addr - hole_offset; dram_addr = sys_addr - hole_offset;
...@@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci, ...@@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
/* Map the Error address to a PAGE and PAGE OFFSET. */ /* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address, static inline void error_address_to_page_and_offset(u64 error_address,
u32 *page, u32 *offset) struct err_info *err)
{ {
*page = (u32) (error_address >> PAGE_SHIFT); err->page = (u32) (error_address >> PAGE_SHIFT);
*offset = ((u32) error_address) & ~PAGE_MASK; err->offset = ((u32) error_address) & ~PAGE_MASK;
} }
/* /*
...@@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) ...@@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
} }
static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
u16 syndrome) struct err_info *err)
{ {
struct mem_ctl_info *src_mci;
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
int channel, csrow;
u32 page, offset;
error_address_to_page_and_offset(sys_addr, &page, &offset); error_address_to_page_and_offset(sys_addr, err);
/* /*
* Find out which node the error address belongs to. This may be * Find out which node the error address belongs to. This may be
* different from the node that detected the error. * different from the node that detected the error.
*/ */
src_mci = find_mc_by_sys_addr(mci, sys_addr); err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) { if (!err->src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr); (unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err->err_code = ERR_NODE;
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a node",
"");
return; return;
} }
/* Now map the sys_addr to a CSROW */ /* Now map the sys_addr to a CSROW */
csrow = sys_addr_to_csrow(src_mci, sys_addr); err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
if (csrow < 0) { if (err->csrow < 0) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err->err_code = ERR_CSROW;
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a csrow",
"");
return; return;
} }
/* CHIPKILL enabled */ /* CHIPKILL enabled */
if (pvt->nbcfg & NBCFG_CHIPKILL) { if (pvt->nbcfg & NBCFG_CHIPKILL) {
channel = get_channel_from_ecc_syndrome(mci, syndrome); err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
if (channel < 0) { if (err->channel < 0) {
/* /*
* Syndrome didn't map, so we don't know which of the * Syndrome didn't map, so we don't know which of the
* 2 DIMMs is in error. So we need to ID 'both' of them * 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect. * as suspect.
*/ */
amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - " amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
"possible error reporting race\n", "possible error reporting race\n",
syndrome); err->syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err->err_code = ERR_CHANNEL;
page, offset, syndrome,
csrow, -1, -1,
"unknown syndrome - possible error reporting race",
"");
return; return;
} }
} else { } else {
...@@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* was obtained from email communication with someone at AMD. * was obtained from email communication with someone at AMD.
* (Wish the email was placed in this comment - norsk) * (Wish the email was placed in this comment - norsk)
*/ */
channel = ((sys_addr & BIT(3)) != 0); err->channel = ((sys_addr & BIT(3)) != 0);
} }
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
page, offset, syndrome,
csrow, channel, -1,
"", "");
} }
static int ddr2_cs_size(unsigned i, bool dct_width) static int ddr2_cs_size(unsigned i, bool dct_width)
...@@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) ...@@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
/* For a given @dram_range, check if @sys_addr falls within it. */ /* For a given @dram_range, check if @sys_addr falls within it. */
static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
u64 sys_addr, int *nid, int *chan_sel) u64 sys_addr, int *chan_sel)
{ {
int cs_found = -EINVAL; int cs_found = -EINVAL;
u64 chan_addr; u64 chan_addr;
...@@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, ...@@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
if (cs_found >= 0) { if (cs_found >= 0)
*nid = node_id;
*chan_sel = channel; *chan_sel = channel;
}
return cs_found; return cs_found;
} }
static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
int *node, int *chan_sel) int *chan_sel)
{ {
int cs_found = -EINVAL; int cs_found = -EINVAL;
unsigned range; unsigned range;
...@@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, ...@@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
(get_dram_limit(pvt, range) >= sys_addr)) { (get_dram_limit(pvt, range) >= sys_addr)) {
cs_found = f1x_match_to_this_node(pvt, range, cs_found = f1x_match_to_this_node(pvt, range,
sys_addr, node, sys_addr, chan_sel);
chan_sel);
if (cs_found >= 0) if (cs_found >= 0)
break; break;
} }
...@@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, ...@@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
* (MCX_ADDR). * (MCX_ADDR).
*/ */
static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
u16 syndrome) struct err_info *err)
{ {
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
u32 page, offset;
int nid, csrow, chan = 0;
error_address_to_page_and_offset(sys_addr, &page, &offset); error_address_to_page_and_offset(sys_addr, err);
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
if (err->csrow < 0) {
if (csrow < 0) { err->err_code = ERR_CSROW;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
-1, -1, -1,
"failed to map error addr to a csrow",
"");
return; return;
} }
...@@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
* this point. * this point.
*/ */
if (dct_ganging_enabled(pvt)) if (dct_ganging_enabled(pvt))
chan = get_channel_from_ecc_syndrome(mci, syndrome); err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
page, offset, syndrome,
csrow, chan, -1,
"", "");
} }
/* /*
...@@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, ...@@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
*/ */
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
{ {
int dimm, size0, size1, factor = 0; int dimm, size0, size1;
u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
if (boot_cpu_data.x86 == 0xf) { if (boot_cpu_data.x86 == 0xf) {
if (pvt->dclr0 & WIDTH_128)
factor = 1;
/* K8 families < revF not supported yet */ /* K8 families < revF not supported yet */
if (pvt->ext_model < K8_REV_F) if (pvt->ext_model < K8_REV_F)
return; return;
...@@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) ...@@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
DBAM_DIMM(dimm, dbam)); DBAM_DIMM(dimm, dbam));
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
dimm * 2, size0 << factor, dimm * 2, size0,
dimm * 2 + 1, size1 << factor); dimm * 2 + 1, size1);
} }
} }
...@@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) ...@@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
} }
/* static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
* Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR u8 ecc_type)
* ADDRESS and process.
*/
static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
{
struct amd64_pvt *pvt = mci->pvt_info;
u64 sys_addr;
u16 syndrome;
/* Ensure that the Error Address is VALID */
if (!(m->status & MCI_STATUS_ADDRV)) {
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, 0,
-1, -1, -1,
"HW has no ERROR_ADDRESS available",
"");
return;
}
sys_addr = get_error_address(m);
syndrome = extract_syndrome(m->status);
amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
}
/* Handle any Un-correctable Errors (UEs) */
static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
{ {
struct mem_ctl_info *log_mci, *src_mci = NULL; enum hw_event_mc_err_type err_type;
int csrow; const char *string;
u64 sys_addr;
u32 page, offset;
log_mci = mci;
if (!(m->status & MCI_STATUS_ADDRV)) { if (ecc_type == 2)
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); err_type = HW_EVENT_ERR_CORRECTED;
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, else if (ecc_type == 1)
0, 0, 0, err_type = HW_EVENT_ERR_UNCORRECTED;
-1, -1, -1, else {
"HW has no ERROR_ADDRESS available", WARN(1, "Something is rotten in the state of Denmark.\n");
"");
return; return;
} }
sys_addr = get_error_address(m); switch (err->err_code) {
error_address_to_page_and_offset(sys_addr, &page, &offset); case DECODE_OK:
string = "";
/* break;
* Find out which node the error address belongs to. This may be case ERR_NODE:
* different from the node that detected the error. string = "Failed to map error addr to a node";
*/ break;
src_mci = find_mc_by_sys_addr(mci, sys_addr); case ERR_CSROW:
if (!src_mci) { string = "Failed to map error addr to a csrow";
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", break;
(unsigned long)sys_addr); case ERR_CHANNEL:
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, string = "unknown syndrome - possible error reporting race";
page, offset, 0, break;
-1, -1, -1, default:
"ERROR ADDRESS NOT mapped to a MC", string = "WTF error";
""); break;
return;
} }
log_mci = src_mci; edac_mc_handle_error(err_type, mci, 1,
err->page, err->offset, err->syndrome,
csrow = sys_addr_to_csrow(log_mci, sys_addr); err->csrow, err->channel, -1,
if (csrow < 0) { string, "");
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
(unsigned long)sys_addr);
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offset, 0,
-1, -1, -1,
"ERROR ADDRESS NOT mapped to CS",
"");
} else {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
page, offset, 0,
csrow, -1, -1,
"", "");
}
} }
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
struct mce *m) struct mce *m)
{ {
u16 ec = EC(m->status); struct amd64_pvt *pvt = mci->pvt_info;
u8 xec = XEC(m->status, 0x1f);
u8 ecc_type = (m->status >> 45) & 0x3; u8 ecc_type = (m->status >> 45) & 0x3;
u8 xec = XEC(m->status, 0x1f);
u16 ec = EC(m->status);
u64 sys_addr;
struct err_info err;
/* Bail early out if this was an 'observed' error */ /* Bail out early if this was an 'observed' error */
if (PP(ec) == NBSL_PP_OBS) if (PP(ec) == NBSL_PP_OBS)
return; return;
...@@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, ...@@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
if (xec && xec != F10_NBSL_EXT_ERR_ECC) if (xec && xec != F10_NBSL_EXT_ERR_ECC)
return; return;
memset(&err, 0, sizeof(err));
sys_addr = get_error_address(m);
if (ecc_type == 2) if (ecc_type == 2)
amd64_handle_ce(mci, m); err.syndrome = extract_syndrome(m->status);
else if (ecc_type == 1)
amd64_handle_ue(mci, m); pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
__log_bus_error(mci, &err, ecc_type);
} }
void amd64_decode_bus_error(int node_id, struct mce *m) void amd64_decode_bus_error(int node_id, struct mce *m)
...@@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) ...@@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
u32 cs_mode, nr_pages; u32 cs_mode, nr_pages;
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
/* /*
* The math on this doesn't look right on the surface because x/2*4 can * The math on this doesn't look right on the surface because x/2*4 can
* be simplified to x*2 but this expression makes use of the fact that * be simplified to x*2 but this expression makes use of the fact that
...@@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) ...@@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
* number of bits to shift the DBAM register to extract the proper CSROW * number of bits to shift the DBAM register to extract the proper CSROW
* field. * field.
*/ */
cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF; cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n", csrow_nr, dct, cs_mode);
nr_pages, pvt->channel_count); edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
return nr_pages; return nr_pages;
} }
...@@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) ...@@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
*/ */
static int init_csrows(struct mem_ctl_info *mci) static int init_csrows(struct mem_ctl_info *mci)
{ {
struct amd64_pvt *pvt = mci->pvt_info;
struct csrow_info *csrow; struct csrow_info *csrow;
struct dimm_info *dimm; struct dimm_info *dimm;
struct amd64_pvt *pvt = mci->pvt_info;
u64 base, mask;
u32 val;
int i, j, empty = 1;
enum mem_type mtype;
enum edac_type edac_mode; enum edac_type edac_mode;
enum mem_type mtype;
int i, j, empty = 1;
int nr_pages = 0; int nr_pages = 0;
u32 val;
amd64_read_pci_cfg(pvt->F3, NBCFG, &val); amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
...@@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci) ...@@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci)
pvt->mc_node_id, val, pvt->mc_node_id, val,
!!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
/*
* We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
*/
for_each_chip_select(i, 0, pvt) { for_each_chip_select(i, 0, pvt) {
csrow = mci->csrows[i]; bool row_dct0 = !!csrow_enabled(i, 0, pvt);
bool row_dct1 = false;
if (boot_cpu_data.x86 != 0xf)
row_dct1 = !!csrow_enabled(i, 1, pvt);
if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) { if (!row_dct0 && !row_dct1)
edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
i, pvt->mc_node_id);
continue; continue;
}
csrow = mci->csrows[i];
empty = 0; empty = 0;
if (csrow_enabled(i, 0, pvt))
edac_dbg(1, "MC node: %d, csrow: %d\n",
pvt->mc_node_id, i);
if (row_dct0)
nr_pages = amd64_csrow_nr_pages(pvt, 0, i); nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
if (csrow_enabled(i, 1, pvt))
nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
get_cs_base_and_mask(pvt, i, 0, &base, &mask); /* K8 has only one DCT */
/* 8 bytes of resolution */ if (boot_cpu_data.x86 != 0xf && row_dct1)
nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
mtype = amd64_determine_memory_type(pvt, i); mtype = amd64_determine_memory_type(pvt, i);
edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i); edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
edac_dbg(1, " nr_pages: %u\n",
nr_pages * pvt->channel_count);
/* /*
* determine whether CHIPKILL or JUST ECC or NO ECC is operating * determine whether CHIPKILL or JUST ECC or NO ECC is operating
...@@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci) ...@@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci)
dimm->edac_mode = edac_mode; dimm->edac_mode = edac_mode;
dimm->nr_pages = nr_pages; dimm->nr_pages = nr_pages;
} }
csrow->nr_pages = nr_pages;
} }
return empty; return empty;
...@@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) ...@@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
mci->pvt_info = pvt; mci->pvt_info = pvt;
mci->pdev = &pvt->F2->dev; mci->pdev = &pvt->F2->dev;
mci->csbased = 1;
setup_mci_misc_attrs(mci, fam_type); setup_mci_misc_attrs(mci, fam_type);
......
...@@ -219,7 +219,7 @@ ...@@ -219,7 +219,7 @@
#define DBAM1 0x180 #define DBAM1 0x180
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */ /* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF) #define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
#define DBAM_MAX_VALUE 11 #define DBAM_MAX_VALUE 11
...@@ -267,18 +267,20 @@ ...@@ -267,18 +267,20 @@
#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7) #define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
#define F10_NB_ARRAY_ADDR 0xB8 #define F10_NB_ARRAY_ADDR 0xB8
#define F10_NB_ARRAY_DRAM_ECC BIT(31) #define F10_NB_ARRAY_DRAM BIT(31)
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */ /* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1) #define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
#define F10_NB_ARRAY_DATA 0xBC #define F10_NB_ARRAY_DATA 0xBC
#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \ #define F10_NB_ARR_ECC_WR_REQ BIT(17)
(BIT(((word) & 0xF) + 20) | \ #define SET_NB_DRAM_INJECTION_WRITE(inj) \
BIT(17) | bits) (BIT(((inj.word) & 0xF) + 20) | \
#define SET_NB_DRAM_INJECTION_READ(word, bits) \ F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
(BIT(((word) & 0xF) + 20) | \ #define SET_NB_DRAM_INJECTION_READ(inj) \
BIT(16) | bits) (BIT(((inj.word) & 0xF) + 20) | \
BIT(16) | inj.bit_map)
#define NBCAP 0xE8 #define NBCAP 0xE8
#define NBCAP_CHIPKILL BIT(4) #define NBCAP_CHIPKILL BIT(4)
...@@ -305,9 +307,9 @@ enum amd_families { ...@@ -305,9 +307,9 @@ enum amd_families {
/* Error injection control structure */ /* Error injection control structure */
struct error_injection { struct error_injection {
u32 section; u32 section;
u32 word; u32 word;
u32 bit_map; u32 bit_map;
}; };
/* low and high part of PCI config space regs */ /* low and high part of PCI config space regs */
...@@ -374,6 +376,23 @@ struct amd64_pvt { ...@@ -374,6 +376,23 @@ struct amd64_pvt {
struct error_injection injection; struct error_injection injection;
}; };
enum err_codes {
DECODE_OK = 0,
ERR_NODE = -1,
ERR_CSROW = -2,
ERR_CHANNEL = -3,
};
struct err_info {
int err_code;
struct mem_ctl_info *src_mci;
int csrow;
int channel;
u16 syndrome;
u32 page;
u32 offset;
};
static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
{ {
u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
...@@ -447,7 +466,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) ...@@ -447,7 +466,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
struct low_ops { struct low_ops {
int (*early_channel_count) (struct amd64_pvt *pvt); int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
u16 syndrome); struct err_info *);
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
u32 *val, const char *func); u32 *val, const char *func);
...@@ -459,6 +478,8 @@ struct amd64_family_type { ...@@ -459,6 +478,8 @@ struct amd64_family_type {
struct low_ops ops; struct low_ops ops;
}; };
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func);
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 val, const char *func); u32 val, const char *func);
...@@ -475,3 +496,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, ...@@ -475,3 +496,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size); u64 *hole_offset, u64 *hole_size);
#define to_mci(k) container_of(k, struct mem_ctl_info, dev) #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
/* Injection helpers */
static inline void disable_caches(void *dummy)
{
write_cr0(read_cr0() | X86_CR0_CD);
wbinvd();
}
static inline void enable_caches(void *dummy)
{
write_cr0(read_cr0() & ~X86_CR0_CD);
}
...@@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev, ...@@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value; unsigned long value;
int ret = 0; int ret;
ret = strict_strtoul(data, 10, &value); ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) { if (ret < 0)
return ret;
if (value > 3) { if (value > 3) {
amd64_warn("%s: invalid section 0x%lx\n", __func__, value); amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
return -EINVAL; return -EINVAL;
}
pvt->injection.section = (u32) value;
return count;
} }
return ret;
pvt->injection.section = (u32) value;
return count;
} }
static ssize_t amd64_inject_word_show(struct device *dev, static ssize_t amd64_inject_word_show(struct device *dev,
...@@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev, ...@@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value; unsigned long value;
int ret = 0; int ret;
ret = strict_strtoul(data, 10, &value); ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) { if (ret < 0)
return ret;
if (value > 8) { if (value > 8) {
amd64_warn("%s: invalid word 0x%lx\n", __func__, value); amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
return -EINVAL; return -EINVAL;
}
pvt->injection.word = (u32) value;
return count;
} }
return ret;
pvt->injection.word = (u32) value;
return count;
} }
static ssize_t amd64_inject_ecc_vector_show(struct device *dev, static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
...@@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev, ...@@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value; unsigned long value;
int ret = 0; int ret;
ret = strict_strtoul(data, 16, &value); ret = strict_strtoul(data, 16, &value);
if (ret != -EINVAL) { if (ret < 0)
return ret;
if (value & 0xFFFF0000) { if (value & 0xFFFF0000) {
amd64_warn("%s: invalid EccVector: 0x%lx\n", amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
__func__, value); return -EINVAL;
return -EINVAL;
}
pvt->injection.bit_map = (u32) value;
return count;
} }
return ret;
pvt->injection.bit_map = (u32) value;
return count;
} }
/* /*
...@@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev, ...@@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev,
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value; unsigned long value;
u32 section, word_bits; u32 section, word_bits;
int ret = 0; int ret;
ret = strict_strtoul(data, 10, &value); ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) { if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */ /* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM_ECC | section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word, amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
pvt->injection.bit_map);
/* Issue 'word' and 'bit' along with the READ request */ word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", /* Issue 'word' and 'bit' along with the READ request */
section, word_bits); amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
return count; edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
}
return ret; return count;
} }
/* /*
...@@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev, ...@@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev,
{ {
struct mem_ctl_info *mci = to_mci(dev); struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info; struct amd64_pvt *pvt = mci->pvt_info;
u32 section, word_bits, tmp;
unsigned long value; unsigned long value;
u32 section, word_bits; int ret;
int ret = 0;
ret = strict_strtoul(data, 10, &value); ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) { if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
/* Form value to choose 16-byte section of cacheline */ word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
section = F10_NB_ARRAY_DRAM_ECC |
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word, pr_notice_once("Don't forget to decrease MCE polling interval in\n"
pvt->injection.bit_map); "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
"so that you can get the error report faster.\n");
/* Issue 'word' and 'bit' along with the READ request */ on_each_cpu(disable_caches, NULL, 1);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", /* Issue 'word' and 'bit' along with the READ request */
section, word_bits); amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
return count; retry:
/* wait until injection happens */
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
if (tmp & F10_NB_ARR_ECC_WR_REQ) {
cpu_relax();
goto retry;
} }
return ret;
on_each_cpu(enable_caches, NULL, 1);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count;
} }
/* /*
......
...@@ -974,20 +974,22 @@ static void edac_ce_error(struct mem_ctl_info *mci, ...@@ -974,20 +974,22 @@ static void edac_ce_error(struct mem_ctl_info *mci,
long grain) long grain)
{ {
unsigned long remapped_page; unsigned long remapped_page;
char *msg_aux = "";
if (*msg)
msg_aux = " ";
if (edac_mc_get_log_ce()) { if (edac_mc_get_log_ce()) {
if (other_detail && *other_detail) if (other_detail && *other_detail)
edac_mc_printk(mci, KERN_WARNING, edac_mc_printk(mci, KERN_WARNING,
"%d CE %s on %s (%s %s - %s)\n", "%d CE %s%son %s (%s %s - %s)\n",
error_count, error_count, msg, msg_aux, label,
msg, label, location, location, detail, other_detail);
detail, other_detail);
else else
edac_mc_printk(mci, KERN_WARNING, edac_mc_printk(mci, KERN_WARNING,
"%d CE %s on %s (%s %s)\n", "%d CE %s%son %s (%s %s)\n",
error_count, error_count, msg, msg_aux, label,
msg, label, location, location, detail);
detail);
} }
edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
...@@ -1022,27 +1024,31 @@ static void edac_ue_error(struct mem_ctl_info *mci, ...@@ -1022,27 +1024,31 @@ static void edac_ue_error(struct mem_ctl_info *mci,
const char *other_detail, const char *other_detail,
const bool enable_per_layer_report) const bool enable_per_layer_report)
{ {
char *msg_aux = "";
if (*msg)
msg_aux = " ";
if (edac_mc_get_log_ue()) { if (edac_mc_get_log_ue()) {
if (other_detail && *other_detail) if (other_detail && *other_detail)
edac_mc_printk(mci, KERN_WARNING, edac_mc_printk(mci, KERN_WARNING,
"%d UE %s on %s (%s %s - %s)\n", "%d UE %s%son %s (%s %s - %s)\n",
error_count, error_count, msg, msg_aux, label,
msg, label, location, detail, location, detail, other_detail);
other_detail);
else else
edac_mc_printk(mci, KERN_WARNING, edac_mc_printk(mci, KERN_WARNING,
"%d UE %s on %s (%s %s)\n", "%d UE %s%son %s (%s %s)\n",
error_count, error_count, msg, msg_aux, label,
msg, label, location, detail); location, detail);
} }
if (edac_mc_get_panic_on_ue()) { if (edac_mc_get_panic_on_ue()) {
if (other_detail && *other_detail) if (other_detail && *other_detail)
panic("UE %s on %s (%s%s - %s)\n", panic("UE %s%son %s (%s%s - %s)\n",
msg, label, location, detail, other_detail); msg, msg_aux, label, location, detail, other_detail);
else else
panic("UE %s on %s (%s%s)\n", panic("UE %s%son %s (%s%s)\n",
msg, label, location, detail); msg, msg_aux, label, location, detail);
} }
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
...@@ -1101,10 +1107,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, ...@@ -1101,10 +1107,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
*/ */
for (i = 0; i < mci->n_layers; i++) { for (i = 0; i < mci->n_layers; i++) {
if (pos[i] >= (int)mci->layers[i].size) { if (pos[i] >= (int)mci->layers[i].size) {
if (type == HW_EVENT_ERR_CORRECTED)
p = "CE";
else
p = "UE";
edac_mc_printk(mci, KERN_ERR, edac_mc_printk(mci, KERN_ERR,
"INTERNAL ERROR: %s value is out of range (%d >= %d)\n", "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
...@@ -1136,6 +1138,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, ...@@ -1136,6 +1138,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
grain = 0; grain = 0;
p = label; p = label;
*p = '\0'; *p = '\0';
for (i = 0; i < mci->tot_dimms; i++) { for (i = 0; i < mci->tot_dimms; i++) {
struct dimm_info *dimm = mci->dimms[i]; struct dimm_info *dimm = mci->dimms[i];
...@@ -1203,6 +1206,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, ...@@ -1203,6 +1206,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/* Fill the RAM location data */ /* Fill the RAM location data */
p = location; p = location;
for (i = 0; i < mci->n_layers; i++) { for (i = 0; i < mci->n_layers; i++) {
if (pos[i] < 0) if (pos[i] < 0)
continue; continue;
...@@ -1215,7 +1219,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, ...@@ -1215,7 +1219,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
*(p - 1) = '\0'; *(p - 1) = '\0';
/* Report the error via the trace interface */ /* Report the error via the trace interface */
grain_bits = fls_long(grain) + 1; grain_bits = fls_long(grain) + 1;
trace_mc_event(type, msg, label, error_count, trace_mc_event(type, msg, label, error_count,
mci->mc_idx, top_layer, mid_layer, low_layer, mci->mc_idx, top_layer, mid_layer, low_layer,
......
...@@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev, ...@@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev,
int i; int i;
u32 nr_pages = 0; u32 nr_pages = 0;
if (csrow->mci->csbased)
return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
for (i = 0; i < csrow->nr_channels; i++) for (i = 0; i < csrow->nr_channels; i++)
nr_pages += csrow->channels[i]->dimm->nr_pages; nr_pages += csrow->channels[i]->dimm->nr_pages;
return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
...@@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, ...@@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
csrow->dev.bus = &mci->bus; csrow->dev.bus = &mci->bus;
device_initialize(&csrow->dev); device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev; csrow->dev.parent = &mci->dev;
csrow->mci = mci;
dev_set_name(&csrow->dev, "csrow%d", index); dev_set_name(&csrow->dev, "csrow%d", index);
dev_set_drvdata(&csrow->dev, csrow); dev_set_drvdata(&csrow->dev, csrow);
...@@ -777,10 +781,14 @@ static ssize_t mci_size_mb_show(struct device *dev, ...@@ -777,10 +781,14 @@ static ssize_t mci_size_mb_show(struct device *dev,
for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
struct csrow_info *csrow = mci->csrows[csrow_idx]; struct csrow_info *csrow = mci->csrows[csrow_idx];
for (j = 0; j < csrow->nr_channels; j++) { if (csrow->mci->csbased) {
struct dimm_info *dimm = csrow->channels[j]->dimm; total_pages += csrow->nr_pages;
} else {
for (j = 0; j < csrow->nr_channels; j++) {
struct dimm_info *dimm = csrow->channels[j]->dimm;
total_pages += dimm->nr_pages; total_pages += dimm->nr_pages;
}
} }
} }
...@@ -838,14 +846,8 @@ static ssize_t edac_fake_inject_write(struct file *file, ...@@ -838,14 +846,8 @@ static ssize_t edac_fake_inject_write(struct file *file,
return count; return count;
} }
static int debugfs_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
static const struct file_operations debug_fake_inject_fops = { static const struct file_operations debug_fake_inject_fops = {
.open = debugfs_open, .open = simple_open,
.write = edac_fake_inject_write, .write = edac_fake_inject_write,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
...@@ -1124,10 +1126,15 @@ int __init edac_mc_sysfs_init(void) ...@@ -1124,10 +1126,15 @@ int __init edac_mc_sysfs_init(void)
edac_subsys = edac_get_sysfs_subsys(); edac_subsys = edac_get_sysfs_subsys();
if (edac_subsys == NULL) { if (edac_subsys == NULL) {
edac_dbg(1, "no edac_subsys\n"); edac_dbg(1, "no edac_subsys\n");
return -EINVAL; err = -EINVAL;
goto out;
} }
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
if (!mci_pdev) {
err = -ENOMEM;
goto out_put_sysfs;
}
mci_pdev->bus = edac_subsys; mci_pdev->bus = edac_subsys;
mci_pdev->type = &mc_attr_type; mci_pdev->type = &mc_attr_type;
...@@ -1136,11 +1143,18 @@ int __init edac_mc_sysfs_init(void) ...@@ -1136,11 +1143,18 @@ int __init edac_mc_sysfs_init(void)
err = device_add(mci_pdev); err = device_add(mci_pdev);
if (err < 0) if (err < 0)
return err; goto out_dev_free;
edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
return 0; return 0;
out_dev_free:
kfree(mci_pdev);
out_put_sysfs:
edac_put_sysfs_subsys();
out:
return err;
} }
void __exit edac_mc_sysfs_exit(void) void __exit edac_mc_sysfs_exit(void)
...@@ -1148,4 +1162,5 @@ void __exit edac_mc_sysfs_exit(void) ...@@ -1148,4 +1162,5 @@ void __exit edac_mc_sysfs_exit(void)
put_device(mci_pdev); put_device(mci_pdev);
device_del(mci_pdev); device_del(mci_pdev);
edac_put_sysfs_subsys(); edac_put_sysfs_subsys();
kfree(mci_pdev);
} }
...@@ -18,9 +18,29 @@ ...@@ -18,9 +18,29 @@
#define EDAC_VERSION "Ver: 3.0.0" #define EDAC_VERSION "Ver: 3.0.0"
#ifdef CONFIG_EDAC_DEBUG #ifdef CONFIG_EDAC_DEBUG
static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
{
unsigned long val;
int ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
if (val < 0 || val > 4)
return -EINVAL;
return param_set_int(buf, kp);
}
/* Values of 0 to 4 will generate output */ /* Values of 0 to 4 will generate output */
int edac_debug_level = 2; int edac_debug_level = 2;
EXPORT_SYMBOL_GPL(edac_debug_level); EXPORT_SYMBOL_GPL(edac_debug_level);
module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
&edac_debug_level, 0644);
MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
#endif #endif
/* scope is to module level only */ /* scope is to module level only */
...@@ -132,10 +152,3 @@ module_exit(edac_exit); ...@@ -132,10 +152,3 @@ module_exit(edac_exit);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
MODULE_DESCRIPTION("Core library routines for EDAC reporting"); MODULE_DESCRIPTION("Core library routines for EDAC reporting");
/* refer to *_sysfs.c files for parameters that are exported via sysfs */
#ifdef CONFIG_EDAC_DEBUG
module_param(edac_debug_level, int, 0644);
MODULE_PARM_DESC(edac_debug_level, "Debug level");
#endif
...@@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, ...@@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
pci->mod_name = mod_name; pci->mod_name = mod_name;
pci->ctl_name = EDAC_PCI_GENCTL_NAME; pci->ctl_name = EDAC_PCI_GENCTL_NAME;
pci->edac_check = edac_pci_generic_check; if (edac_op_state == EDAC_OPSTATE_POLL)
pci->edac_check = edac_pci_generic_check;
pdata->edac_idx = edac_pci_idx++; pdata->edac_idx = edac_pci_idx++;
......
...@@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); ...@@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
/* /*
* pci_dev parity list iterator * pci_dev parity list iterator
* Scan the PCI device list for one pass, looking for SERRORs *
* Master Parity ERRORS or Parity ERRORs on primary or secondary devices * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
* Parity ERRORs on primary or secondary devices.
*/ */
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
{ {
struct pci_dev *dev = NULL; struct pci_dev *dev = NULL;
/* request for kernel access to the next PCI device, if any, for_each_pci_dev(dev)
* and while we are looking at it have its reference count
* bumped until we are done with it
*/
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
fn(dev); fn(dev);
}
} }
/* /*
......
...@@ -113,14 +113,8 @@ static ssize_t highbank_mc_err_inject_write(struct file *file, ...@@ -113,14 +113,8 @@ static ssize_t highbank_mc_err_inject_write(struct file *file,
return count; return count;
} }
static int debugfs_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
static const struct file_operations highbank_mc_debug_inject_fops = { static const struct file_operations highbank_mc_debug_inject_fops = {
.open = debugfs_open, .open = simple_open,
.write = highbank_mc_err_inject_write, .write = highbank_mc_err_inject_write,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
......
...@@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs); ...@@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs);
const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
EXPORT_SYMBOL_GPL(ii_msgs); EXPORT_SYMBOL_GPL(ii_msgs);
static const char * const f15h_ic_mce_desc[] = { static const char * const f15h_mc1_mce_desc[] = {
"UC during a demand linefill from L2", "UC during a demand linefill from L2",
"Parity error during data load from IC", "Parity error during data load from IC",
"Parity error for IC valid bit", "Parity error for IC valid bit",
...@@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = { ...@@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = {
"fetch address FIFO" "fetch address FIFO"
}; };
static const char * const f15h_cu_mce_desc[] = { static const char * const f15h_mc2_mce_desc[] = {
"Fill ECC error on data fills", /* xec = 0x4 */ "Fill ECC error on data fills", /* xec = 0x4 */
"Fill parity error on insn fills", "Fill parity error on insn fills",
"Prefetcher request FIFO parity error", "Prefetcher request FIFO parity error",
...@@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = { ...@@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = {
"PRB address parity error" "PRB address parity error"
}; };
static const char * const nb_mce_desc[] = { static const char * const mc4_mce_desc[] = {
"DRAM ECC error detected on the NB", "DRAM ECC error detected on the NB",
"CRC error detected on HT link", "CRC error detected on HT link",
"Link-defined sync error packets detected on HT link", "Link-defined sync error packets detected on HT link",
...@@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = { ...@@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = {
"ECC Error in the Probe Filter directory" "ECC Error in the Probe Filter directory"
}; };
static const char * const fr_ex_mce_desc[] = { static const char * const mc5_mce_desc[] = {
"CPU Watchdog timer expire", "CPU Watchdog timer expire",
"Wakeup array dest tag", "Wakeup array dest tag",
"AG payload array", "AG payload array",
...@@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = { ...@@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = {
"DE error occurred" "DE error occurred"
}; };
static bool f12h_dc_mce(u16 ec, u8 xec) static bool f12h_mc0_mce(u16 ec, u8 xec)
{ {
bool ret = false; bool ret = false;
...@@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec) ...@@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static bool f10h_dc_mce(u16 ec, u8 xec) static bool f10h_mc0_mce(u16 ec, u8 xec)
{ {
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
pr_cont("during data scrub.\n"); pr_cont("during data scrub.\n");
return true; return true;
} }
return f12h_dc_mce(ec, xec); return f12h_mc0_mce(ec, xec);
} }
static bool k8_dc_mce(u16 ec, u8 xec) static bool k8_mc0_mce(u16 ec, u8 xec)
{ {
if (BUS_ERROR(ec)) { if (BUS_ERROR(ec)) {
pr_cont("during system linefill.\n"); pr_cont("during system linefill.\n");
return true; return true;
} }
return f10h_dc_mce(ec, xec); return f10h_mc0_mce(ec, xec);
} }
static bool f14h_dc_mce(u16 ec, u8 xec) static bool f14h_mc0_mce(u16 ec, u8 xec)
{ {
u8 r4 = R4(ec); u8 r4 = R4(ec);
bool ret = true; bool ret = true;
...@@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) ...@@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static bool f15h_dc_mce(u16 ec, u8 xec) static bool f15h_mc0_mce(u16 ec, u8 xec)
{ {
bool ret = true; bool ret = true;
...@@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec) ...@@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static void amd_decode_dc_mce(struct mce *m) static void decode_mc0_mce(struct mce *m)
{ {
u16 ec = EC(m->status); u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Data Cache Error: "); pr_emerg(HW_ERR "MC0 Error: ");
/* TLB error signatures are the same across families */ /* TLB error signatures are the same across families */
if (TLB_ERROR(ec)) { if (TLB_ERROR(ec)) {
...@@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m) ...@@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m)
: (xec ? "multimatch" : "parity"))); : (xec ? "multimatch" : "parity")));
return; return;
} }
} else if (fam_ops->dc_mce(ec, xec)) } else if (fam_ops->mc0_mce(ec, xec))
; ;
else else
pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
} }
static bool k8_ic_mce(u16 ec, u8 xec) static bool k8_mc1_mce(u16 ec, u8 xec)
{ {
u8 ll = LL(ec); u8 ll = LL(ec);
bool ret = true; bool ret = true;
...@@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec) ...@@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static bool f14h_ic_mce(u16 ec, u8 xec) static bool f14h_mc1_mce(u16 ec, u8 xec)
{ {
u8 r4 = R4(ec); u8 r4 = R4(ec);
bool ret = true; bool ret = true;
...@@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec) ...@@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static bool f15h_ic_mce(u16 ec, u8 xec) static bool f15h_mc1_mce(u16 ec, u8 xec)
{ {
bool ret = true; bool ret = true;
...@@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec) ...@@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
switch (xec) { switch (xec) {
case 0x0 ... 0xa: case 0x0 ... 0xa:
pr_cont("%s.\n", f15h_ic_mce_desc[xec]); pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
break; break;
case 0xd: case 0xd:
pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
break; break;
case 0x10: case 0x10:
pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]); pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
break; break;
case 0x11 ... 0x14: case 0x11 ... 0x14:
pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
break; break;
default: default:
...@@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec) ...@@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
return ret; return ret;
} }
static void amd_decode_ic_mce(struct mce *m) static void decode_mc1_mce(struct mce *m)
{ {
u16 ec = EC(m->status); u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Instruction Cache Error: "); pr_emerg(HW_ERR "MC1 Error: ");
if (TLB_ERROR(ec)) if (TLB_ERROR(ec))
pr_cont("%s TLB %s.\n", LL_MSG(ec), pr_cont("%s TLB %s.\n", LL_MSG(ec),
...@@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m) ...@@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m)
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
} else if (fam_ops->ic_mce(ec, xec)) } else if (fam_ops->mc1_mce(ec, xec))
; ;
else else
pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
} }
static void amd_decode_bu_mce(struct mce *m) static void decode_mc2_mce(struct mce *m)
{ {
u16 ec = EC(m->status); u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Bus Unit Error"); pr_emerg(HW_ERR "MC2 Error");
if (xec == 0x1) if (xec == 0x1)
pr_cont(" in the write data buffers.\n"); pr_cont(" in the write data buffers.\n");
...@@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m) ...@@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m)
pr_cont(": %s parity/ECC error during data " pr_cont(": %s parity/ECC error during data "
"access from L2.\n", R4_MSG(ec)); "access from L2.\n", R4_MSG(ec));
else else
goto wrong_bu_mce; goto wrong_mc2_mce;
} else } else
goto wrong_bu_mce; goto wrong_mc2_mce;
} else } else
goto wrong_bu_mce; goto wrong_mc2_mce;
return; return;
wrong_bu_mce: wrong_mc2_mce:
pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
} }
static void amd_decode_cu_mce(struct mce *m) static void decode_f15_mc2_mce(struct mce *m)
{ {
u16 ec = EC(m->status); u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Combined Unit Error: "); pr_emerg(HW_ERR "MC2 Error: ");
if (TLB_ERROR(ec)) { if (TLB_ERROR(ec)) {
if (xec == 0x0) if (xec == 0x0)
...@@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m) ...@@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m)
else if (xec == 0x1) else if (xec == 0x1)
pr_cont("Poison data provided for TLB fill.\n"); pr_cont("Poison data provided for TLB fill.\n");
else else
goto wrong_cu_mce; goto wrong_f15_mc2_mce;
} else if (BUS_ERROR(ec)) { } else if (BUS_ERROR(ec)) {
if (xec > 2) if (xec > 2)
goto wrong_cu_mce; goto wrong_f15_mc2_mce;
pr_cont("Error during attempted NB data read.\n"); pr_cont("Error during attempted NB data read.\n");
} else if (MEM_ERROR(ec)) { } else if (MEM_ERROR(ec)) {
switch (xec) { switch (xec) {
case 0x4 ... 0xc: case 0x4 ... 0xc:
pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
break; break;
case 0x10 ... 0x14: case 0x10 ... 0x14:
pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
break; break;
default: default:
goto wrong_cu_mce; goto wrong_f15_mc2_mce;
} }
} }
return; return;
wrong_cu_mce: wrong_f15_mc2_mce:
pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
} }
static void amd_decode_ls_mce(struct mce *m) static void decode_mc3_mce(struct mce *m)
{ {
u16 ec = EC(m->status); u16 ec = EC(m->status);
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
if (boot_cpu_data.x86 >= 0x14) { if (boot_cpu_data.x86 >= 0x14) {
pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
" please report on LKML.\n"); " please report on LKML.\n");
return; return;
} }
pr_emerg(HW_ERR "Load Store Error"); pr_emerg(HW_ERR "MC3 Error");
if (xec == 0x0) { if (xec == 0x0) {
u8 r4 = R4(ec); u8 r4 = R4(ec);
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
goto wrong_ls_mce; goto wrong_mc3_mce;
pr_cont(" during %s.\n", R4_MSG(ec)); pr_cont(" during %s.\n", R4_MSG(ec));
} else } else
goto wrong_ls_mce; goto wrong_mc3_mce;
return; return;
wrong_ls_mce: wrong_mc3_mce:
pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
} }
void amd_decode_nb_mce(struct mce *m) static void decode_mc4_mce(struct mce *m)
{ {
struct cpuinfo_x86 *c = &boot_cpu_data; struct cpuinfo_x86 *c = &boot_cpu_data;
int node_id = amd_get_nb_id(m->extcpu); int node_id = amd_get_nb_id(m->extcpu);
...@@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m) ...@@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m)
u8 xec = XEC(m->status, 0x1f); u8 xec = XEC(m->status, 0x1f);
u8 offset = 0; u8 offset = 0;
pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
switch (xec) { switch (xec) {
case 0x0 ... 0xe: case 0x0 ... 0xe:
...@@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m) ...@@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m)
if (xec == 0x0 || xec == 0x8) { if (xec == 0x0 || xec == 0x8) {
/* no ECCs on F11h */ /* no ECCs on F11h */
if (c->x86 == 0x11) if (c->x86 == 0x11)
goto wrong_nb_mce; goto wrong_mc4_mce;
pr_cont("%s.\n", nb_mce_desc[xec]); pr_cont("%s.\n", mc4_mce_desc[xec]);
if (nb_bus_decoder) if (nb_bus_decoder)
nb_bus_decoder(node_id, m); nb_bus_decoder(node_id, m);
...@@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m) ...@@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m)
else if (BUS_ERROR(ec)) else if (BUS_ERROR(ec))
pr_cont("DMA Exclusion Vector Table Walk error.\n"); pr_cont("DMA Exclusion Vector Table Walk error.\n");
else else
goto wrong_nb_mce; goto wrong_mc4_mce;
return; return;
case 0x19: case 0x19:
if (boot_cpu_data.x86 == 0x15) if (boot_cpu_data.x86 == 0x15)
pr_cont("Compute Unit Data Error.\n"); pr_cont("Compute Unit Data Error.\n");
else else
goto wrong_nb_mce; goto wrong_mc4_mce;
return; return;
case 0x1c ... 0x1f: case 0x1c ... 0x1f:
...@@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m) ...@@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m)
break; break;
default: default:
goto wrong_nb_mce; goto wrong_mc4_mce;
} }
pr_cont("%s.\n", nb_mce_desc[xec - offset]); pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
return; return;
wrong_nb_mce: wrong_mc4_mce:
pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
} }
EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
static void amd_decode_fr_mce(struct mce *m) static void decode_mc5_mce(struct mce *m)
{ {
struct cpuinfo_x86 *c = &boot_cpu_data; struct cpuinfo_x86 *c = &boot_cpu_data;
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
if (c->x86 == 0xf || c->x86 == 0x11) if (c->x86 == 0xf || c->x86 == 0x11)
goto wrong_fr_mce; goto wrong_mc5_mce;
pr_emerg(HW_ERR "%s Error: ", pr_emerg(HW_ERR "MC5 Error: ");
(c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
if (xec == 0x0 || xec == 0xc) if (xec == 0x0 || xec == 0xc)
pr_cont("%s.\n", fr_ex_mce_desc[xec]); pr_cont("%s.\n", mc5_mce_desc[xec]);
else if (xec < 0xd) else if (xec < 0xd)
pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
else else
goto wrong_fr_mce; goto wrong_mc5_mce;
return; return;
wrong_fr_mce: wrong_mc5_mce:
pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
} }
static void amd_decode_fp_mce(struct mce *m) static void decode_mc6_mce(struct mce *m)
{ {
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Floating Point Unit Error: "); pr_emerg(HW_ERR "MC6 Error: ");
switch (xec) { switch (xec) {
case 0x1: case 0x1:
...@@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m) ...@@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m)
break; break;
default: default:
goto wrong_fp_mce; goto wrong_mc6_mce;
break; break;
} }
...@@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m) ...@@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m)
return; return;
wrong_fp_mce: wrong_mc6_mce:
pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
} }
static inline void amd_decode_err_code(u16 ec) static inline void amd_decode_err_code(u16 ec)
...@@ -669,74 +667,94 @@ static bool amd_filter_mce(struct mce *m) ...@@ -669,74 +667,94 @@ static bool amd_filter_mce(struct mce *m)
return false; return false;
} }
static const char *decode_error_status(struct mce *m)
{
if (m->status & MCI_STATUS_UC) {
if (m->status & MCI_STATUS_PCC)
return "System Fatal error.";
if (m->mcgstatus & MCG_STATUS_RIPV)
return "Uncorrected, software restartable error.";
return "Uncorrected, software containable error.";
}
if (m->status & MCI_STATUS_DEFERRED)
return "Deferred error.";
return "Corrected error, no action required.";
}
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{ {
struct mce *m = (struct mce *)data; struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &boot_cpu_data; struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc; int ecc;
if (amd_filter_mce(m)) if (amd_filter_mce(m))
return NOTIFY_STOP; return NOTIFY_STOP;
pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s",
m->extcpu, m->bank,
((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
if (c->x86 == 0x15)
pr_cont("|%s|%s",
((m->status & BIT_64(44)) ? "Deferred" : "-"),
((m->status & BIT_64(43)) ? "Poison" : "-"));
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
switch (m->bank) { switch (m->bank) {
case 0: case 0:
amd_decode_dc_mce(m); decode_mc0_mce(m);
break; break;
case 1: case 1:
amd_decode_ic_mce(m); decode_mc1_mce(m);
break; break;
case 2: case 2:
if (c->x86 == 0x15) if (c->x86 == 0x15)
amd_decode_cu_mce(m); decode_f15_mc2_mce(m);
else else
amd_decode_bu_mce(m); decode_mc2_mce(m);
break; break;
case 3: case 3:
amd_decode_ls_mce(m); decode_mc3_mce(m);
break; break;
case 4: case 4:
amd_decode_nb_mce(m); decode_mc4_mce(m);
break; break;
case 5: case 5:
amd_decode_fr_mce(m); decode_mc5_mce(m);
break; break;
case 6: case 6:
amd_decode_fp_mce(m); decode_mc6_mce(m);
break; break;
default: default:
break; break;
} }
pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m));
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
m->extcpu,
c->x86, c->x86_model, c->x86_mask,
m->bank,
((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
if (c->x86 == 0x15)
pr_cont("|%s|%s",
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
amd_decode_err_code(m->status & 0xffff); amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP; return NOTIFY_STOP;
...@@ -763,35 +781,35 @@ static int __init mce_amd_init(void) ...@@ -763,35 +781,35 @@ static int __init mce_amd_init(void)
switch (c->x86) { switch (c->x86) {
case 0xf: case 0xf:
fam_ops->dc_mce = k8_dc_mce; fam_ops->mc0_mce = k8_mc0_mce;
fam_ops->ic_mce = k8_ic_mce; fam_ops->mc1_mce = k8_mc1_mce;
break; break;
case 0x10: case 0x10:
fam_ops->dc_mce = f10h_dc_mce; fam_ops->mc0_mce = f10h_mc0_mce;
fam_ops->ic_mce = k8_ic_mce; fam_ops->mc1_mce = k8_mc1_mce;
break; break;
case 0x11: case 0x11:
fam_ops->dc_mce = k8_dc_mce; fam_ops->mc0_mce = k8_mc0_mce;
fam_ops->ic_mce = k8_ic_mce; fam_ops->mc1_mce = k8_mc1_mce;
break; break;
case 0x12: case 0x12:
fam_ops->dc_mce = f12h_dc_mce; fam_ops->mc0_mce = f12h_mc0_mce;
fam_ops->ic_mce = k8_ic_mce; fam_ops->mc1_mce = k8_mc1_mce;
break; break;
case 0x14: case 0x14:
nb_err_cpumask = 0x3; nb_err_cpumask = 0x3;
fam_ops->dc_mce = f14h_dc_mce; fam_ops->mc0_mce = f14h_mc0_mce;
fam_ops->ic_mce = f14h_ic_mce; fam_ops->mc1_mce = f14h_mc1_mce;
break; break;
case 0x15: case 0x15:
xec_mask = 0x1f; xec_mask = 0x1f;
fam_ops->dc_mce = f15h_dc_mce; fam_ops->mc0_mce = f15h_mc0_mce;
fam_ops->ic_mce = f15h_ic_mce; fam_ops->mc1_mce = f15h_mc1_mce;
break; break;
default: default:
......
...@@ -29,10 +29,8 @@ ...@@ -29,10 +29,8 @@
#define R4(x) (((x) >> 4) & 0xf) #define R4(x) (((x) >> 4) & 0xf)
#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
/* #define MCI_STATUS_DEFERRED BIT_64(44)
* F3x4C bits (MCi_STATUS' high half) #define MCI_STATUS_POISON BIT_64(43)
*/
#define NBSH_ERR_CPU_VAL BIT(24)
enum tt_ids { enum tt_ids {
TT_INSTR = 0, TT_INSTR = 0,
...@@ -78,14 +76,13 @@ extern const char * const ii_msgs[]; ...@@ -78,14 +76,13 @@ extern const char * const ii_msgs[];
* per-family decoder ops * per-family decoder ops
*/ */
struct amd_decoder_ops { struct amd_decoder_ops {
bool (*dc_mce)(u16, u8); bool (*mc0_mce)(u16, u8);
bool (*ic_mce)(u16, u8); bool (*mc1_mce)(u16, u8);
}; };
void amd_report_gart_errors(bool); void amd_report_gart_errors(bool);
void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_register_ecc_decoder(void (*f)(int, struct mce *));
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
void amd_decode_nb_mce(struct mce *);
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
#endif /* _EDAC_MCE_AMD_H */ #endif /* _EDAC_MCE_AMD_H */
...@@ -533,6 +533,7 @@ struct csrow_info { ...@@ -533,6 +533,7 @@ struct csrow_info {
u32 ue_count; /* Uncorrectable Errors for this csrow */ u32 ue_count; /* Uncorrectable Errors for this csrow */
u32 ce_count; /* Correctable Errors for this csrow */ u32 ce_count; /* Correctable Errors for this csrow */
u32 nr_pages; /* combined pages count of all channels */
struct mem_ctl_info *mci; /* the parent */ struct mem_ctl_info *mci; /* the parent */
...@@ -667,6 +668,8 @@ struct mem_ctl_info { ...@@ -667,6 +668,8 @@ struct mem_ctl_info {
u32 fake_inject_ue; u32 fake_inject_ue;
u16 fake_inject_count; u16 fake_inject_count;
#endif #endif
__u8 csbased : 1, /* csrow-based memory controller */
__resv : 7;
}; };
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册