提交 36168d71 编写于 作者: L Linus Torvalds

Merge tag 'edac_for_4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:
 "The EDAC tree was busier than usual this cycle as the shortlog below
  shows.

  Also, this pull request is carrying an ACPI DSM driver which is used
  to ask the platform to supply the DIMM location of a reported hardware
  error and thus simplify all the EDAC logic when trying to map the
  error address to the respective DIMM.

  Core EDAC updates:

   - amd64_edac: AMD family 0x17, models 0x10-0x2f support (Michael Jin)
     Hygon Dhyana support (Pu Wen)

   - sb_edac: New maintainer + fixes (Tony Luck) Error reporting
     improvements and fixes (Qiuxu Zhuo)

   - ghes_edac: SMBIOS handle type 17 for DIMM locating and per-DIMM
     error accounting (Fan Wu)

   - altera_edac: Stratix10 support and refactoring (Thor Thayer)

  Out of tree addition:

   - acpi_adxl: Address Translation interface using an ACPI DSM (Tony
     Luck)

   - the usual amount of other misc fixes and cleanups all over"

* tag 'edac_for_4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (22 commits)
  ACPI/ADXL: Add address translation interface using an ACPI DSM
  EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr()
  EDAC, skx_edac: Fix logical channel intermediate decoding
  EDAC, {i7core,sb,skx}_edac: Fix uncorrected error counting
  EDAC, altera: Work around int-to-pointer-cast warnings
  EDAC, amd64: Add Hygon Dhyana support
  EDAC: Raise the maximum number of memory controllers
  arm64: dts: stratix10: Add peripheral EDAC nodes
  EDAC, altera: Add Stratix10 peripheral support
  EDAC, altera: Merge Stratix10 into the Arria10 SDRAM probe routine
  arm64: dts: stratix10: Add SDRAM node
  EDAC, altera: Combine Stratix10 and Arria10 probe functions
  arm64: dts: stratix10: Additions to EDAC System Manager
  EDAC, i7core: Remove set but not used variable pvt
  EDAC, ghes: Use CPER module handles to locate DIMMs
  EDAC: Correct DIMM capacity unit symbol
  EDAC, sb_edac: Fix signedness bugs in *_get_ha() functions
  EDAC, sb_edac: Fix reporting for patrol scrubber errors
  EDAC, sb_edac: Return early on ADDRV bit and address type test
  MAINTAINERS: Update maintainer for drivers/edac/sb_edac.c
  ...
......@@ -5364,7 +5364,8 @@ S: Maintained
F: drivers/edac/r82600_edac.c
EDAC-SBRIDGE
M: Mauro Carvalho Chehab <mchehab@kernel.org>
M: Tony Luck <tony.luck@intel.com>
R: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/sb_edac.c
......
......@@ -473,16 +473,51 @@ watchdog3: watchdog@ffd00500 {
status = "disabled";
};
sdr: sdr@f8011100 {
compatible = "altr,sdr-ctl", "syscon";
reg = <0xf8011100 0xc0>;
};
eccmgr {
compatible = "altr,socfpga-s10-ecc-manager";
compatible = "altr,socfpga-a10-ecc-manager";
altr,sysmgr-syscon = <&sysmgr>;
#address-cells = <1>;
#size-cells = <1>;
interrupts = <0 15 4>, <0 95 4>;
interrupt-controller;
#interrupt-cells = <2>;
ranges;
sdramedac {
compatible = "altr,sdram-edac-s10";
altr,sdr-syscon = <&sdr>;
interrupts = <16 4>, <48 4>;
};
usb0-ecc@ff8c4000 {
compatible = "altr,socfpga-usb-ecc";
reg = <0xff8c4000 0x100>;
altr,ecc-parent = <&usb0>;
interrupts = <2 4>,
<34 4>;
};
emac0-rx-ecc@ff8c0000 {
compatible = "altr,socfpga-eth-mac-ecc";
reg = <0xff8c0000 0x100>;
altr,ecc-parent = <&gmac0>;
interrupts = <4 4>,
<36 4>;
};
emac0-tx-ecc@ff8c0400 {
compatible = "altr,socfpga-eth-mac-ecc";
reg = <0xff8c0400 0x100>;
altr,ecc-parent = <&gmac0>;
interrupts = <5 4>,
<37 4>;
};
};
qspi: spi@ff8d2000 {
......
......@@ -492,6 +492,9 @@ config ACPI_EXTLOG
driver adds support for that functionality with corresponding
tracepoint which carries that information to userspace.
config ACPI_ADXL
bool
menuconfig PMIC_OPREGION
bool "PMIC (Power Management Integrated Circuit) operation region support"
help
......
......@@ -61,6 +61,9 @@ acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o
acpi-$(CONFIG_ACPI_GENERIC_GSI) += irq.o
acpi-$(CONFIG_ACPI_WATCHDOG) += acpi_watchdog.o
# Address translation
acpi-$(CONFIG_ACPI_ADXL) += acpi_adxl.o
# These are (potentially) separate modules
# IPMI may be used by other drivers, so it has to initialise before them
......
// SPDX-License-Identifier: GPL-2.0
/*
* Address translation interface via ACPI DSM.
* Copyright (C) 2018 Intel Corporation
*
* Specification for this interface is available at:
*
* https://cdrdv2.intel.com/v1/dl/getContent/603354
*/
#include <linux/acpi.h>
#include <linux/adxl.h>
#define ADXL_REVISION 0x1
#define ADXL_IDX_GET_ADDR_PARAMS 0x1
#define ADXL_IDX_FORWARD_TRANSLATE 0x2
#define ACPI_ADXL_PATH "\\_SB.ADXL"
/*
* The specification doesn't provide a limit on how many
* components are in a memory address. But since we allocate
* memory based on the number the BIOS tells us, we should
* defend against insane values.
*/
#define ADXL_MAX_COMPONENTS 500
#undef pr_fmt
#define pr_fmt(fmt) "ADXL: " fmt
static acpi_handle handle;
static union acpi_object *params;
static const guid_t adxl_guid =
GUID_INIT(0xAA3C050A, 0x7EA4, 0x4C1F,
0xAF, 0xDA, 0x12, 0x67, 0xDF, 0xD3, 0xD4, 0x8D);
static int adxl_count;
static char **adxl_component_names;
static union acpi_object *adxl_dsm(int cmd, union acpi_object argv[])
{
union acpi_object *obj, *o;
obj = acpi_evaluate_dsm_typed(handle, &adxl_guid, ADXL_REVISION,
cmd, argv, ACPI_TYPE_PACKAGE);
if (!obj) {
pr_info("DSM call failed for cmd=%d\n", cmd);
return NULL;
}
if (obj->package.count != 2) {
pr_info("Bad pkg count %d\n", obj->package.count);
goto err;
}
o = obj->package.elements;
if (o->type != ACPI_TYPE_INTEGER) {
pr_info("Bad 1st element type %d\n", o->type);
goto err;
}
if (o->integer.value) {
pr_info("Bad ret val %llu\n", o->integer.value);
goto err;
}
o = obj->package.elements + 1;
if (o->type != ACPI_TYPE_PACKAGE) {
pr_info("Bad 2nd element type %d\n", o->type);
goto err;
}
return obj;
err:
ACPI_FREE(obj);
return NULL;
}
/**
* adxl_get_component_names - get list of memory component names
* Returns NULL terminated list of string names
*
* Give the caller a pointer to the list of memory component names
* e.g. { "SystemAddress", "ProcessorSocketId", "ChannelId", ... NULL }
* Caller should count how many strings in order to allocate a buffer
* for the return from adxl_decode().
*/
const char * const *adxl_get_component_names(void)
{
return (const char * const *)adxl_component_names;
}
EXPORT_SYMBOL_GPL(adxl_get_component_names);
/**
* adxl_decode - ask BIOS to decode a system address to memory address
* @addr: the address to decode
* @component_values: pointer to array of values for each component
* Returns 0 on success, negative error code otherwise
*
* The index of each value returned in the array matches the index of
* each component name returned by adxl_get_component_names().
* Components that are not defined for this address translation (e.g.
* mirror channel number for a non-mirrored address) are set to ~0ull.
*/
int adxl_decode(u64 addr, u64 component_values[])
{
union acpi_object argv4[2], *results, *r;
int i, cnt;
if (!adxl_component_names)
return -EOPNOTSUPP;
argv4[0].type = ACPI_TYPE_PACKAGE;
argv4[0].package.count = 1;
argv4[0].package.elements = &argv4[1];
argv4[1].integer.type = ACPI_TYPE_INTEGER;
argv4[1].integer.value = addr;
results = adxl_dsm(ADXL_IDX_FORWARD_TRANSLATE, argv4);
if (!results)
return -EINVAL;
r = results->package.elements + 1;
cnt = r->package.count;
if (cnt != adxl_count) {
ACPI_FREE(results);
return -EINVAL;
}
r = r->package.elements;
for (i = 0; i < cnt; i++)
component_values[i] = r[i].integer.value;
ACPI_FREE(results);
return 0;
}
EXPORT_SYMBOL_GPL(adxl_decode);
static int __init adxl_init(void)
{
char *path = ACPI_ADXL_PATH;
union acpi_object *p;
acpi_status status;
int i;
status = acpi_get_handle(NULL, path, &handle);
if (ACPI_FAILURE(status)) {
pr_debug("No ACPI handle for path %s\n", path);
return -ENODEV;
}
if (!acpi_has_method(handle, "_DSM")) {
pr_info("No DSM method\n");
return -ENODEV;
}
if (!acpi_check_dsm(handle, &adxl_guid, ADXL_REVISION,
ADXL_IDX_GET_ADDR_PARAMS |
ADXL_IDX_FORWARD_TRANSLATE)) {
pr_info("DSM method does not support forward translate\n");
return -ENODEV;
}
params = adxl_dsm(ADXL_IDX_GET_ADDR_PARAMS, NULL);
if (!params) {
pr_info("Failed to get component names\n");
return -ENODEV;
}
p = params->package.elements + 1;
adxl_count = p->package.count;
if (adxl_count > ADXL_MAX_COMPONENTS) {
pr_info("Insane number of address component names %d\n", adxl_count);
ACPI_FREE(params);
return -ENODEV;
}
p = p->package.elements;
/*
* Allocate one extra for NULL termination.
*/
adxl_component_names = kcalloc(adxl_count + 1, sizeof(char *), GFP_KERNEL);
if (!adxl_component_names) {
ACPI_FREE(params);
return -ENOMEM;
}
for (i = 0; i < adxl_count; i++)
adxl_component_names[i] = p[i].string.pointer;
return 0;
}
subsys_initcall(adxl_init);
此差异已折叠。
......@@ -156,34 +156,6 @@
#define A10_INTMASK_CLR_OFST 0x10
#define A10_DDR0_IRQ_MASK BIT(17)
/************* Stratix10 Defines **************/
/* SDRAM Controller EccCtrl Register */
#define S10_ECCCTRL1_OFST 0xF8011100
/* SDRAM Controller DRAM IRQ Register */
#define S10_ERRINTEN_OFST 0xF8011110
/* SDRAM Interrupt Mode Register */
#define S10_INTMODE_OFST 0xF801111C
/* SDRAM Controller Error Status Register */
#define S10_INTSTAT_OFST 0xF8011120
/* SDRAM Controller ECC Error Address Register */
#define S10_DERRADDR_OFST 0xF801112C
#define S10_SERRADDR_OFST 0xF8011130
/* SDRAM Controller ECC Diagnostic Register */
#define S10_DIAGINTTEST_OFST 0xF8011124
/* SDRAM Single Bit Error Count Compare Set Register */
#define S10_SERRCNTREG_OFST 0xF801113C
/* Sticky registers for Uncorrected Errors */
#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220
#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224
struct altr_sdram_prv_data {
int ecc_ctrl_offset;
int ecc_ctl_en_mask;
......@@ -319,15 +291,40 @@ struct altr_sdram_mc_data {
/************* Stratix10 Defines **************/
/* Stratix10 ECC Manager Defines */
#define S10_SYSMGR_ECC_INTMASK_VAL_OFST 0xFFD12090
#define S10_SYSMGR_ECC_INTMASK_SET_OFST 0xFFD12094
#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0xFFD12098
#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C
#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0
/* Sticky registers for Uncorrected Errors */
#define S10_SYSMGR_UE_VAL_OFST 0x120
#define S10_SYSMGR_UE_ADDR_OFST 0x124
#define S10_DDR0_IRQ_MASK BIT(16)
/* Define ECC Block Offsets for peripherals */
#define ECC_BLK_ADDRESS_OFST 0x40
#define ECC_BLK_RDATA0_OFST 0x44
#define ECC_BLK_RDATA1_OFST 0x48
#define ECC_BLK_RDATA2_OFST 0x4C
#define ECC_BLK_RDATA3_OFST 0x50
#define ECC_BLK_WDATA0_OFST 0x54
#define ECC_BLK_WDATA1_OFST 0x58
#define ECC_BLK_WDATA2_OFST 0x5C
#define ECC_BLK_WDATA3_OFST 0x60
#define ECC_BLK_RECC0_OFST 0x64
#define ECC_BLK_RECC1_OFST 0x68
#define ECC_BLK_WECC0_OFST 0x6C
#define ECC_BLK_WECC1_OFST 0x70
#define ECC_BLK_DBYTECTRL_OFST 0x74
#define ECC_BLK_ACCCTRL_OFST 0x78
#define ECC_BLK_STARTACC_OFST 0x7C
#define ECC_XACT_KICK 0x10000
#define ECC_WORD_WRITE 0xF
#define ECC_WRITE_DOVR 0x101
#define ECC_WRITE_EDOVR 0x103
#define ECC_READ_EOVR 0x2
#define ECC_READ_EDOVR 0x3
struct altr_edac_device_dev;
struct edac_device_prv_data {
......@@ -370,6 +367,7 @@ struct altr_arria10_edac {
struct irq_domain *domain;
struct irq_chip irq_chip;
struct list_head a10_ecc_devices;
struct notifier_block panic_notifier;
};
/*
......@@ -437,13 +435,4 @@ struct altr_arria10_edac {
#define INTEL_SIP_SMC_REG_WRITE \
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
struct altr_stratix10_edac {
struct device *dev;
int sb_irq;
struct irq_domain *domain;
struct irq_chip irq_chip;
struct list_head s10_ecc_devices;
struct notifier_block panic_notifier;
};
#endif /* #ifndef _ALTERA_EDAC_H */
......@@ -211,7 +211,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
scrubval = scrubrates[i].scrubval;
if (pvt->fam == 0x17) {
if (pvt->fam == 0x17 || pvt->fam == 0x18) {
__f17h_set_scrubval(pvt, scrubval);
} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0);
......@@ -264,6 +264,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
break;
case 0x17:
case 0x18:
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
if (scrubval & BIT(0)) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
......@@ -1044,6 +1045,7 @@ static void determine_memory_type(struct amd64_pvt *pvt)
goto ddr3;
case 0x17:
case 0x18:
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
pvt->dram_type = MEM_LRDDR4;
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
......@@ -2200,6 +2202,15 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_base_addr_to_cs_size,
}
},
[F17_M10H_CPUS] = {
.ctl_name = "F17h_M10h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
.ops = {
.early_channel_count = f17_early_channel_count,
.dbam_to_cs = f17_base_addr_to_cs_size,
}
},
};
/*
......@@ -3188,8 +3199,18 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
break;
case 0x17:
if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
fam_type = &family_types[F17_M10H_CPUS];
pvt->ops = &family_types[F17_M10H_CPUS].ops;
break;
}
/* fall through */
case 0x18:
fam_type = &family_types[F17_CPUS];
pvt->ops = &family_types[F17_CPUS].ops;
if (pvt->fam == 0x18)
family_types[F17_CPUS].ctl_name = "F18h";
break;
default:
......@@ -3428,6 +3449,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
{ X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ }
};
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
......
......@@ -115,6 +115,8 @@
#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
#define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460
#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
/*
* Function 1 - Address Map
......@@ -281,6 +283,7 @@ enum amd_families {
F16_CPUS,
F16_M30H_CPUS,
F17_CPUS,
F17_M10H_CPUS,
NUM_FAMILIES,
};
......
......@@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
(*num_dimm)++;
}
static int get_dimm_smbios_index(u16 handle)
{
struct mem_ctl_info *mci = ghes_pvt->mci;
int i;
for (i = 0; i < mci->tot_dimms; i++) {
if (mci->dimms[i]->smbios_handle == handle)
return i;
}
return -1;
}
static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
struct ghes_edac_dimm_fill *dimm_fill = arg;
......@@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
entry->total_width, entry->data_width);
}
dimm->smbios_handle = entry->handle;
dimm_fill->count++;
}
}
......@@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
const char *bank = NULL, *device = NULL;
int index = -1;
dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
if (bank != NULL && device != NULL)
p += sprintf(p, "DIMM location:%s %s ", bank, device);
else
p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
mem_err->mem_dev_handle);
index = get_dimm_smbios_index(mem_err->mem_dev_handle);
if (index >= 0) {
e->top_layer = index;
e->enable_per_layer_report = true;
}
}
if (p > e->location)
*(p - 1) = '\0';
......
......@@ -399,7 +399,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
if (nr_pages == 0)
continue;
edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j,
stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
dimm->nr_pages = nr_pages;
......
......@@ -597,7 +597,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
/* DDR3 has 8 I/O banks */
size = (rows * cols * banks * ranks) >> (20 - 3);
edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
j, size,
RANKOFFSET(dimm_dod[j]),
banks, ranks, rows, cols);
......@@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
u32 errnum = find_first_bit(&error, 32);
if (uncorrected_error) {
core_err_cnt = 1;
if (ripv)
tp_event = HW_EVENT_ERR_FATAL;
else
......@@ -1815,14 +1816,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
struct i7core_dev *i7_dev;
struct mem_ctl_info *mci;
struct i7core_pvt *pvt;
i7_dev = get_i7core_dev(mce->socketid);
if (!i7_dev)
return NOTIFY_DONE;
mci = i7_dev->mci;
pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
......
......@@ -1059,7 +1059,8 @@ static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor != X86_VENDOR_AMD)
if (c->x86_vendor != X86_VENDOR_AMD &&
c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
......@@ -1113,6 +1114,7 @@ static int __init mce_amd_init(void)
break;
case 0x17:
case 0x18:
xec_mask = 0x3f;
if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
......
......@@ -326,6 +326,7 @@ struct sbridge_info {
const struct interleave_pkg *interleave_pkg;
u8 max_sad;
u8 (*get_node_id)(struct sbridge_pvt *pvt);
u8 (*get_ha)(u8 bank);
enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
struct pci_dev *pci_vtd;
......@@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt)
return GET_BITFIELD(reg, 0, 2);
}
/*
* Use the reporting bank number to determine which memory
* controller (also known as "ha" for "home agent"). Sandy
* Bridge only has one memory controller per socket, so the
* answer is always zero.
*/
static u8 sbridge_get_ha(u8 bank)
{
return 0;
}
/*
* On Ivy Bridge, Haswell and Broadwell the error may be in a
* home agent bank (7, 8), or one of the per-channel memory
* controller banks (9 .. 16).
*/
static u8 ibridge_get_ha(u8 bank)
{
switch (bank) {
case 7 ... 8:
return bank - 7;
case 9 ... 16:
return (bank - 9) / 4;
default:
return 0xff;
}
}
/* Not used, but included for safety/symmetry */
static u8 knl_get_ha(u8 bank)
{
return 0xff;
}
static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
{
......@@ -1622,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
size, npages,
banks, ranks, rows, cols);
......@@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
return 0;
}
static int get_memory_error_data_from_mce(struct mem_ctl_info *mci,
const struct mce *m, u8 *socket,
u8 *ha, long *channel_mask,
char *msg)
{
u32 reg, channel = GET_BITFIELD(m->status, 0, 3);
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt;
struct pci_dev *pci_ha;
bool tad0;
if (channel >= NUM_CHANNELS) {
sprintf(msg, "Invalid channel 0x%x", channel);
return -EINVAL;
}
pvt = mci->pvt_info;
if (!pvt->info.get_ha) {
sprintf(msg, "No get_ha()");
return -EINVAL;
}
*ha = pvt->info.get_ha(m->bank);
if (*ha != 0 && *ha != 1) {
sprintf(msg, "Impossible bank %d", m->bank);
return -EINVAL;
}
*socket = m->socketid;
new_mci = get_mci_for_node_id(*socket, *ha);
if (!new_mci) {
strcpy(msg, "mci socket got corrupted!");
return -EINVAL;
}
pvt = new_mci->pvt_info;
pci_ha = pvt->pci_ha;
pci_read_config_dword(pci_ha, tad_dram_rule[0], &reg);
tad0 = m->addr <= TAD_LIMIT(reg);
*channel_mask = 1 << channel;
if (pvt->mirror_mode == FULL_MIRRORING ||
(pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) {
*channel_mask |= 1 << ((channel + 2) % 4);
pvt->is_cur_addr_mirrored = true;
} else {
pvt->is_cur_addr_mirrored = false;
}
if (pvt->is_lockstep)
*channel_mask |= 1 << ((channel + 1) % 4);
return 0;
}
/****************************************************************************
Device initialization routines: put/get, init/exit
****************************************************************************/
......@@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
u32 errcode = GET_BITFIELD(m->status, 0, 15);
u32 channel = GET_BITFIELD(m->status, 0, 3);
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
/*
* Bits 5-0 of MCi_MISC give the least significant bit that is valid.
* A value 6 is for cache line aligned address, a value 12 is for page
* aligned address reported by patrol scrubber.
*/
u32 lsb = GET_BITFIELD(m->misc, 0, 5);
long channel_mask, first_channel;
u8 rank, socket, ha;
u8 rank = 0xff, socket, ha;
int rc, dimm;
char *area_type = NULL;
char *area_type = "DRAM";
if (pvt->info.type != SANDY_BRIDGE)
recoverable = true;
......@@ -2888,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
......@@ -2911,35 +3006,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
* cccc = channel
* If the mask doesn't match, report an error to the parsing logic
*/
if (! ((errcode & 0xef80) == 0x80)) {
optype = "Can't parse: it is not a mem";
} else {
switch (optypenum) {
case 0:
optype = "generic undef request error";
break;
case 1:
optype = "memory read error";
break;
case 2:
optype = "memory write error";
break;
case 3:
optype = "addr/cmd error";
break;
case 4:
optype = "memory scrubbing error";
break;
default:
optype = "reserved";
break;
}
switch (optypenum) {
case 0:
optype = "generic undef request error";
break;
case 1:
optype = "memory read error";
break;
case 2:
optype = "memory write error";
break;
case 3:
optype = "addr/cmd error";
break;
case 4:
optype = "memory scrubbing error";
break;
default:
optype = "reserved";
break;
}
/* Only decode errors with an valid address (ADDRV) */
if (!GET_BITFIELD(m->status, 58, 58))
return;
if (pvt->info.type == KNIGHTS_LANDING) {
if (channel == 14) {
edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n",
......@@ -2972,9 +3059,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
optype, msg);
}
return;
} else {
} else if (lsb < 12) {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
&channel_mask, &rank, &area_type, msg);
&channel_mask, &rank,
&area_type, msg);
} else {
rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
&channel_mask, msg);
}
if (rc < 0)
......@@ -2989,14 +3080,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
if (rank < 4)
if (rank == 0xff)
dimm = -1;
else if (rank < 4)
dimm = 0;
else if (rank < 8)
dimm = 1;
else
dimm = 2;
/*
* FIXME: On some memory configurations (mirror, lockstep), the
* Memory Controller can't point the error to a single DIMM. The
......@@ -3045,17 +3137,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
{
struct mce *mce = (struct mce *)data;
struct mem_ctl_info *mci;
struct sbridge_pvt *pvt;
char *type;
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
mci = get_mci_for_node_id(mce->socketid, IMC0);
if (!mci)
return NOTIFY_DONE;
pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
* outside the memory controller. A memory error
......@@ -3065,6 +3151,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1)
return NOTIFY_DONE;
/* Check ADDRV bit in STATUS */
if (!GET_BITFIELD(mce->status, 58, 58))
return NOTIFY_DONE;
/* Check MISCV bit in STATUS */
if (!GET_BITFIELD(mce->status, 59, 59))
return NOTIFY_DONE;
/* Check address type in MISC (physical address only) */
if (GET_BITFIELD(mce->misc, 6, 8) != 2)
return NOTIFY_DONE;
mci = get_mci_for_node_id(mce->socketid, IMC0);
if (!mci)
return NOTIFY_DONE;
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
else
......@@ -3173,6 +3275,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
......@@ -3197,6 +3300,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = sbridge_dram_rule;
pvt->info.get_memory_type = get_memory_type;
pvt->info.get_node_id = get_node_id;
pvt->info.get_ha = sbridge_get_ha;
pvt->info.rir_limit = rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
......@@ -3221,6 +3325,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
......@@ -3245,6 +3350,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = ibridge_dram_rule;
pvt->info.get_memory_type = haswell_get_memory_type;
pvt->info.get_node_id = haswell_get_node_id;
pvt->info.get_ha = ibridge_get_ha;
pvt->info.rir_limit = haswell_rir_limit;
pvt->info.sad_limit = sad_limit;
pvt->info.interleave_mode = interleave_mode;
......@@ -3269,6 +3375,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
pvt->info.dram_rule = knl_dram_rule;
pvt->info.get_memory_type = knl_get_memory_type;
pvt->info.get_node_id = knl_get_node_id;
pvt->info.get_ha = knl_get_ha;
pvt->info.rir_limit = NULL;
pvt->info.sad_limit = knl_sad_limit;
pvt->info.interleave_mode = knl_interleave_mode;
......@@ -3320,17 +3427,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
return rc;
}
#define ICPU(model, table) \
{ X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
static const struct x86_cpu_id sbridge_cpuids[] = {
ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table),
ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table),
ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table),
ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table),
ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table),
INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table),
INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table),
INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table),
INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table),
INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
......
......@@ -364,7 +364,7 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
imc->mc, chan, dimmno, size, npages,
banks, 1 << ranks, rows, cols);
......@@ -424,7 +424,7 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
dimm->mtype = MEM_NVDIMM;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n",
edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
......@@ -668,7 +668,7 @@ static bool skx_sad_decode(struct decoded_addr *res)
break;
case 2:
lchan = (addr >> shift) % 2;
lchan = (lchan << 1) | ~lchan;
lchan = (lchan << 1) | !lchan;
break;
case 3:
lchan = ((addr >> shift) % 2) << 1;
......@@ -959,6 +959,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
......
......@@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
default:
dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
l2c->pdev->device);
return IRQ_NONE;
goto err_free;
}
while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
......@@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
l2c->ring_tail++;
}
return IRQ_HANDLED;
ret = IRQ_HANDLED;
err_free:
kfree(other);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Address translation interface via ACPI DSM.
* Copyright (C) 2018 Intel Corporation
*/
#ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H
const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]);
#endif /* _LINUX_ADXL_H */
......@@ -17,6 +17,7 @@
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/numa.h>
#define EDAC_DEVICE_NAME_LEN 31
......@@ -451,6 +452,8 @@ struct dimm_info {
u32 nr_pages; /* number of pages on this dimm */
unsigned csrow, cschannel; /* Points to the old API data */
u16 smbios_handle; /* Handle for SMBIOS type 17 */
};
/**
......@@ -670,6 +673,6 @@ struct mem_ctl_info {
/*
* Maximum number of memory controllers in the coherent fabric.
*/
#define EDAC_MAX_MCS 16
#define EDAC_MAX_MCS 2 * MAX_NUMNODES
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册