提交 7e740ae6 编写于 作者: L Linus Torvalds

Merge tag 'ras_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:
 "A relatively big amount of movements in RAS-land this time around:

   - First part of a series to move the AMD address translation code
     from arch/x86/ to amd64_edac as that is its only user anyway

   - Some MCE error injection improvements to the AMD side

   - Reorganization of the #MC handler code and the facilities it calls
     to make it noinstr-safe

   - Add support for new AMD MCA bank types and non-uniform banks layout

   - The usual set of cleanups and fixes"

* tag 'ras_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  x86/mce: Reduce number of machine checks taken during recovery
  x86/mce/inject: Avoid out-of-bounds write when setting flags
  x86/MCE/AMD, EDAC/mce_amd: Support non-uniform MCA bank type enumeration
  x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types
  x86/mce: Check regs before accessing it
  x86/mce: Mark mce_start() noinstr
  x86/mce: Mark mce_timed_out() noinstr
  x86/mce: Move the tainting outside of the noinstr region
  x86/mce: Mark mce_read_aux() noinstr
  x86/mce: Mark mce_end() noinstr
  x86/mce: Mark mce_panic() noinstr
  x86/mce: Prevent severity computation from being instrumented
  x86/mce: Allow instrumentation during task work queueing
  x86/mce: Remove noinstr annotation from mce_setup()
  x86/mce: Use mce_rdmsrl() in severity checking code
  x86/mce: Remove function-local cpus variables
  x86/mce: Do not use memset to clear the banks bitmaps
  x86/mce/inject: Set the valid bit in MCA_STATUS before error injection
  x86/mce/inject: Check if a bank is populated before injecting
  x86/mce: Get rid of cpu_missing
  ...
......@@ -24,7 +24,6 @@ extern int amd_set_subcaches(int, unsigned long);
extern int amd_smn_read(u16 node, u32 address, u32 *value);
extern int amd_smn_write(u16 node, u32 address, u32 value);
extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
struct amd_l3_cache {
unsigned indices;
......
......@@ -313,31 +313,22 @@ enum smca_bank_types {
SMCA_SMU, /* System Management Unit */
SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_MPDMA, /* MPDMA Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
SMCA_PCIE_V2,
SMCA_XGMI_PCS, /* xGMI PCS Unit */
SMCA_NBIF, /* NBIF Unit */
SMCA_SHUB, /* System HUB Unit */
SMCA_SATA, /* SATA Unit */
SMCA_USB, /* USB Unit */
SMCA_GMI_PCS, /* GMI PCS Unit */
SMCA_XGMI_PHY, /* xGMI PHY Unit */
SMCA_WAFL_PHY, /* WAFL PHY Unit */
SMCA_GMI_PHY, /* GMI PHY Unit */
N_SMCA_BANK_TYPES
};
#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u8 count; /* Number of instances. */
};
struct smca_bank {
struct smca_hwid *hwid;
u32 id; /* Value of MCA_IPID[InstanceId]. */
u8 sysfs_id; /* Value used for sysfs name. */
};
extern struct smca_bank smca_banks[MAX_NR_BANKS];
extern const char *smca_get_long_name(enum smca_bank_types t);
extern bool amd_mce_is_memory_error(struct mce *m);
......@@ -345,16 +336,13 @@ extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
enum smca_bank_types smca_get_bank_type(unsigned int bank);
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
#else
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
static inline int
umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif
static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
......
......@@ -29,7 +29,7 @@
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
static u32 *flush_words;
......@@ -182,53 +182,6 @@ int amd_smn_write(u16 node, u32 address, u32 value)
}
EXPORT_SYMBOL_GPL(amd_smn_write);
/*
* Data Fabric Indirect Access uses FICAA/FICAD.
*
* Fabric Indirect Configuration Access Address (FICAA): Constructed based
* on the device's Instance Id and the PCI function and register offset of
* the desired register.
*
* Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
* and FICAD HI registers but so far we only need the LO register.
*/
int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
{
struct pci_dev *F4;
u32 ficaa;
int err = -ENODEV;
if (node >= amd_northbridges.num)
goto out;
F4 = node_to_amd_nb(node)->link;
if (!F4)
goto out;
ficaa = 1;
ficaa |= reg & 0x3FC;
ficaa |= (func & 0x7) << 11;
ficaa |= instance_id << 16;
mutex_lock(&smn_mutex);
err = pci_write_config_dword(F4, 0x5C, ficaa);
if (err) {
pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
goto out_unlock;
}
err = pci_read_config_dword(F4, 0x98, lo);
if (err)
pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
out_unlock:
mutex_unlock(&smn_mutex);
out:
return err;
}
EXPORT_SYMBOL_GPL(amd_df_indirect_read);
int amd_cache_northbridges(void)
{
......
......@@ -71,6 +71,22 @@ static const char * const smca_umc_block_names[] = {
"misc_umc"
};
#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
};
struct smca_bank {
const struct smca_hwid *hwid;
u32 id; /* Value of MCA_IPID[InstanceId]. */
u8 sysfs_id; /* Value used for sysfs name. */
};
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
struct smca_bank_name {
const char *name; /* Short name for sysfs */
const char *long_name; /* Long name for pretty-printing */
......@@ -95,11 +111,18 @@ static struct smca_bank_name smca_names[] = {
[SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_MPDMA] = { "mpdma", "MPDMA Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
[SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
[SMCA_NBIF] = { "nbif", "NBIF Unit" },
[SMCA_SHUB] = { "shub", "System Hub Unit" },
[SMCA_SATA] = { "sata", "SATA Unit" },
[SMCA_USB] = { "usb", "USB Unit" },
[SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" },
[SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
[SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
[SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" },
};
static const char *smca_get_name(enum smca_bank_types t)
......@@ -119,14 +142,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
enum smca_bank_types smca_get_bank_type(unsigned int bank)
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
{
struct smca_bank *b;
if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;
b = &smca_banks[bank];
b = &per_cpu(smca_banks, cpu)[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;
......@@ -134,7 +157,7 @@ enum smca_bank_types smca_get_bank_type(unsigned int bank)
}
EXPORT_SYMBOL_GPL(smca_get_bank_type);
static struct smca_hwid smca_hwid_mcatypes[] = {
static const struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype } */
/* Reserved type */
......@@ -174,6 +197,9 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
/* MPDMA MCA type */
{ SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
......@@ -181,19 +207,17 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
/* xGMI PCS MCA type */
{ SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
/* xGMI PHY MCA type */
{ SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
{ SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
{ SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
/* WAFL PHY MCA type */
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
{ SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
EXPORT_SYMBOL_GPL(smca_banks);
/*
* In SMCA enabled processors, we can have multiple banks for a given IP type.
* So to define a unique name for each bank, we use a temp c-string to append
......@@ -249,8 +273,9 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
static void smca_configure(unsigned int bank, unsigned int cpu)
{
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
const struct smca_hwid *s_hwid;
unsigned int i, hwid_mcatype;
struct smca_hwid *s_hwid;
u32 high, low;
u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
......@@ -286,10 +311,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
smca_set_misc_banks_map(bank, cpu);
/* Return early if this bank was already initialized. */
if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
return;
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
......@@ -300,10 +321,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
s_hwid = &smca_hwid_mcatypes[i];
if (hwid_mcatype == s_hwid->hwid_mcatype) {
smca_banks[bank].hwid = s_hwid;
smca_banks[bank].id = low;
smca_banks[bank].sysfs_id = s_hwid->count++;
this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
this_cpu_ptr(smca_banks)[bank].id = low;
this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
break;
}
}
......@@ -589,7 +611,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
bool amd_filter_mce(struct mce *m)
{
enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
struct cpuinfo_x86 *c = &boot_cpu_data;
/* See Family 17h Models 10h-2Fh Erratum #1114. */
......@@ -627,7 +649,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
} else if (c->x86 == 0x17 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
if (smca_get_bank_type(bank) != SMCA_IF)
if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
return;
msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
......@@ -689,213 +711,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
{
u64 dram_base_addr, dram_limit_addr, dram_hole_base;
/* We start from the normalized address */
u64 ret_addr = norm_addr;
u32 tmp;
u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
u8 intlv_addr_sel, intlv_addr_bit;
u8 num_intlv_bits, hashed_bit;
u8 lgcy_mmio_hole_en, base = 0;
u8 cs_mask, cs_id = 0;
bool hash_enabled = false;
/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
goto out_err;
/* Remove HiAddrOffset from normalized address, if enabled: */
if (tmp & BIT(0)) {
u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
if (norm_addr >= hi_addr_offset) {
ret_addr -= hi_addr_offset;
base = 1;
}
}
/* Read D18F0x110 (DramBaseAddress). */
if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
goto out_err;
/* Check if address range is valid. */
if (!(tmp & BIT(0))) {
pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
__func__, tmp);
goto out_err;
}
lgcy_mmio_hole_en = tmp & BIT(1);
intlv_num_chan = (tmp >> 4) & 0xF;
intlv_addr_sel = (tmp >> 8) & 0x7;
dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16;
/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
if (intlv_addr_sel > 3) {
pr_err("%s: Invalid interleave address select %d.\n",
__func__, intlv_addr_sel);
goto out_err;
}
/* Read D18F0x114 (DramLimitAddress). */
if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
goto out_err;
intlv_num_sockets = (tmp >> 8) & 0x1;
intlv_num_dies = (tmp >> 10) & 0x3;
dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
intlv_addr_bit = intlv_addr_sel + 8;
/* Re-use intlv_num_chan by setting it equal to log2(#channels) */
switch (intlv_num_chan) {
case 0: intlv_num_chan = 0; break;
case 1: intlv_num_chan = 1; break;
case 3: intlv_num_chan = 2; break;
case 5: intlv_num_chan = 3; break;
case 7: intlv_num_chan = 4; break;
case 8: intlv_num_chan = 1;
hash_enabled = true;
break;
default:
pr_err("%s: Invalid number of interleaved channels %d.\n",
__func__, intlv_num_chan);
goto out_err;
}
num_intlv_bits = intlv_num_chan;
if (intlv_num_dies > 2) {
pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
__func__, intlv_num_dies);
goto out_err;
}
num_intlv_bits += intlv_num_dies;
/* Add a bit if sockets are interleaved. */
num_intlv_bits += intlv_num_sockets;
/* Assert num_intlv_bits <= 4 */
if (num_intlv_bits > 4) {
pr_err("%s: Invalid interleave bits %d.\n",
__func__, num_intlv_bits);
goto out_err;
}
if (num_intlv_bits > 0) {
u64 temp_addr_x, temp_addr_i, temp_addr_y;
u8 die_id_bit, sock_id_bit, cs_fabric_id;
/*
* Read FabricBlockInstanceInformation3_CS[BlockFabricID].
* This is the fabric id for this coherent slave. Use
* umc/channel# as instance id of the coherent slave
* for FICAA.
*/
if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
goto out_err;
cs_fabric_id = (tmp >> 8) & 0xFF;
die_id_bit = 0;
/* If interleaved over more than 1 channel: */
if (intlv_num_chan) {
die_id_bit = intlv_num_chan;
cs_mask = (1 << die_id_bit) - 1;
cs_id = cs_fabric_id & cs_mask;
}
sock_id_bit = die_id_bit;
/* Read D18F1x208 (SystemFabricIdMask). */
if (intlv_num_dies || intlv_num_sockets)
if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
goto out_err;
/* If interleaved over more than 1 die. */
if (intlv_num_dies) {
sock_id_bit = die_id_bit + intlv_num_dies;
die_id_shift = (tmp >> 24) & 0xF;
die_id_mask = (tmp >> 8) & 0xFF;
cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
}
/* If interleaved over more than 1 socket. */
if (intlv_num_sockets) {
socket_id_shift = (tmp >> 28) & 0xF;
socket_id_mask = (tmp >> 16) & 0xFF;
cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
}
/*
* The pre-interleaved address consists of XXXXXXIIIYYYYY
* where III is the ID for this CS, and XXXXXXYYYYY are the
* address bits from the post-interleaved address.
* "num_intlv_bits" has been calculated to tell us how many "I"
* bits there are. "intlv_addr_bit" tells us how many "Y" bits
* there are (where "I" starts).
*/
temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
temp_addr_i = (cs_id << intlv_addr_bit);
temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
}
/* Add dram base address */
ret_addr += dram_base_addr;
/* If legacy MMIO hole enabled */
if (lgcy_mmio_hole_en) {
if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
goto out_err;
dram_hole_base = tmp & GENMASK(31, 24);
if (ret_addr >= dram_hole_base)
ret_addr += (BIT_ULL(32) - dram_hole_base);
}
if (hash_enabled) {
/* Save some parentheses and grab ls-bit at the end. */
hashed_bit = (ret_addr >> 12) ^
(ret_addr >> 18) ^
(ret_addr >> 21) ^
(ret_addr >> 30) ^
cs_id;
hashed_bit &= BIT(0);
if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
ret_addr ^= BIT(intlv_addr_bit);
}
/* Is calculated system address is above DRAM limit address? */
if (ret_addr > dram_limit_addr)
goto out_err;
*sys_addr = ret_addr;
return 0;
out_err:
return -EINVAL;
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
bool amd_mce_is_memory_error(struct mce *m)
{
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
......@@ -1211,7 +1033,7 @@ static struct kobj_type threshold_ktype = {
.release = threshold_block_release,
};
static const char *get_name(unsigned int bank, struct threshold_block *b)
static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
{
enum smca_bank_types bank_type;
......@@ -1222,7 +1044,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
bank_type = smca_get_bank_type(bank);
bank_type = smca_get_bank_type(cpu, bank);
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
......@@ -1232,12 +1054,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return NULL;
}
if (smca_banks[bank].hwid->count == 1)
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
return smca_get_name(bank_type);
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
"%s_%x", smca_get_name(bank_type),
smca_banks[bank].sysfs_id);
"%s_%u", smca_get_name(bank_type),
per_cpu(smca_banks, cpu)[bank].sysfs_id);
return buf_mcatype;
}
......@@ -1293,7 +1115,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
else
tb->blocks = b;
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
goto out_free;
recurse:
......@@ -1348,7 +1170,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
struct device *dev = this_cpu_read(mce_device);
struct amd_northbridge *nb = NULL;
struct threshold_bank *b = NULL;
const char *name = get_name(bank, NULL);
const char *name = get_name(cpu, bank, NULL);
int err = 0;
if (!dev)
......
......@@ -99,7 +99,6 @@ struct mca_config mca_cfg __read_mostly = {
static DEFINE_PER_CPU(struct mce, mces_seen);
static unsigned long mce_need_notify;
static int cpu_missing;
/*
* MCA banks polled by the period polling timer for corrected events.
......@@ -128,7 +127,7 @@ static struct irq_work mce_irq_work;
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
noinstr void mce_setup(struct mce *m)
void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
......@@ -267,11 +266,17 @@ static void wait_for_panic(void)
panic("Panicing machine check CPU died");
}
static void mce_panic(const char *msg, struct mce *final, char *exp)
static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
{
int apei_err = 0;
struct llist_node *pending;
struct mce_evt_llist *l;
int apei_err = 0;
/*
* Allow instrumentation around external facilities usage. Not that it
* matters a whole lot since the machine is going to panic anyway.
*/
instrumentation_begin();
if (!fake_panic) {
/*
......@@ -286,7 +291,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
} else {
/* Don't log too much for fake panic */
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
goto out;
}
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
......@@ -314,8 +319,6 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (!apei_err)
apei_err = apei_write_mce(final);
}
if (cpu_missing)
pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) {
......@@ -324,6 +327,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
out:
instrumentation_end();
}
/* Support code for software error injection */
......@@ -365,7 +371,7 @@ void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
}
/* MSR access wrappers used for error injection */
static noinstr u64 mce_rdmsrl(u32 msr)
noinstr u64 mce_rdmsrl(u32 msr)
{
DECLARE_ARGS(val, low, high);
......@@ -433,9 +439,15 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
* check into our "mce" struct so that we can use it later to assess
* the severity of the problem as we read per-bank specific details.
*/
static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
{
/*
* Enable instrumentation around mce_setup() which calls external
* facilities.
*/
instrumentation_begin();
mce_setup(m);
instrumentation_end();
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (regs) {
......@@ -636,7 +648,7 @@ static struct notifier_block mce_default_nb = {
/*
* Read ADDR and MISC registers.
*/
static void mce_read_aux(struct mce *m, int i)
static noinstr void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
......@@ -871,8 +883,13 @@ static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
/*
* Check if a timeout waiting for other CPUs happened.
*/
static int mce_timed_out(u64 *t, const char *msg)
static noinstr int mce_timed_out(u64 *t, const char *msg)
{
int ret = 0;
/* Enable instrumentation around calls to external facilities */
instrumentation_begin();
/*
* The others already did panic for some reason.
* Bail out like in a timeout.
......@@ -891,13 +908,17 @@ static int mce_timed_out(u64 *t, const char *msg)
cpumask_pr_args(&mce_missing_cpus));
mce_panic(msg, NULL, NULL);
}
cpu_missing = 1;
return 1;
ret = 1;
goto out;
}
*t -= SPINUNIT;
out:
touch_nmi_watchdog();
return 0;
instrumentation_end();
return ret;
}
/*
......@@ -986,14 +1007,13 @@ static atomic_t global_nwo;
* in the entry order.
* TBD double check parallel CPU hotunplug
*/
static int mce_start(int *no_way_out)
static noinstr int mce_start(int *no_way_out)
{
int order;
int cpus = num_online_cpus();
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
int order, ret = -1;
if (!timeout)
return -1;
return ret;
atomic_add(*no_way_out, &global_nwo);
/*
......@@ -1003,14 +1023,17 @@ static int mce_start(int *no_way_out)
order = atomic_inc_return(&mce_callin);
cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
/* Enable instrumentation around calls to external facilities */
instrumentation_begin();
/*
* Wait for everyone.
*/
while (atomic_read(&mce_callin) != cpus) {
while (atomic_read(&mce_callin) != num_online_cpus()) {
if (mce_timed_out(&timeout,
"Timeout: Not all CPUs entered broadcast exception handler")) {
atomic_set(&global_nwo, 0);
return -1;
goto out;
}
ndelay(SPINUNIT);
}
......@@ -1036,7 +1059,7 @@ static int mce_start(int *no_way_out)
if (mce_timed_out(&timeout,
"Timeout: Subject CPUs unable to finish machine check processing")) {
atomic_set(&global_nwo, 0);
return -1;
goto out;
}
ndelay(SPINUNIT);
}
......@@ -1047,17 +1070,25 @@ static int mce_start(int *no_way_out)
*/
*no_way_out = atomic_read(&global_nwo);
return order;
ret = order;
out:
instrumentation_end();
return ret;
}
/*
* Synchronize between CPUs after main scanning loop.
* This invokes the bulk of the Monarch processing.
*/
static int mce_end(int order)
static noinstr int mce_end(int order)
{
int ret = -1;
u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
int ret = -1;
/* Allow instrumentation around external facilities. */
instrumentation_begin();
if (!timeout)
goto reset;
......@@ -1070,14 +1101,11 @@ static int mce_end(int order)
atomic_inc(&mce_executing);
if (order == 1) {
/* CHECKME: Can this race with a parallel hotplug? */
int cpus = num_online_cpus();
/*
* Monarch: Wait for everyone to go through their scanning
* loops.
*/
while (atomic_read(&mce_executing) <= cpus) {
while (atomic_read(&mce_executing) <= num_online_cpus()) {
if (mce_timed_out(&timeout,
"Timeout: Monarch CPU unable to finish machine check processing"))
goto reset;
......@@ -1101,7 +1129,8 @@ static int mce_end(int order)
/*
* Don't reset anything. That's done by the Monarch.
*/
return 0;
ret = 0;
goto out;
}
/*
......@@ -1117,6 +1146,10 @@ static int mce_end(int order)
* Let others run again.
*/
atomic_set(&mce_executing, 0);
out:
instrumentation_end();
return ret;
}
......@@ -1165,13 +1198,14 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
static __always_inline int
__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
int *worst)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i;
int severity, i, taint = 0;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
__clear_bit(i, toclear);
......@@ -1198,7 +1232,7 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
continue;
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
taint++;
severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
......@@ -1221,7 +1255,13 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
/* assuming valid severity level != 0 */
m->severity = severity;
/*
* Enable instrumentation around the mce_log() call which is
* done in #MC context, where instrumentation is disabled.
*/
instrumentation_begin();
mce_log(m);
instrumentation_end();
if (severity > *worst) {
*final = *m;
......@@ -1231,6 +1271,8 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
/* mce_clear_state will clear *final, save locally for use later */
*m = *final;
return taint;
}
static void kill_me_now(struct callback_head *ch)
......@@ -1320,11 +1362,11 @@ static noinstr void unexpected_machine_check(struct pt_regs *regs)
}
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
* The actual machine check handler. This only handles real exceptions when
* something got corrupted coming in through int 18.
*
* This is executed in NMI context not subject to normal locking rules. This
* implies that most kernel services cannot be safely used. Don't even
* This is executed in #MC context not subject to normal locking rules.
* This implies that most kernel services cannot be safely used. Don't even
* think about putting a printk in there!
*
* On Intel systems this is entered on all CPUs in parallel through
......@@ -1336,12 +1378,20 @@ static noinstr void unexpected_machine_check(struct pt_regs *regs)
* issues: if the machine check was due to a failure of the memory
* backing the user stack, tracing that reads the user stack will cause
* potentially infinite recursion.
*
* Currently, the #MC handler calls out to a number of external facilities
* and, therefore, allows instrumentation around them. The optimal thing to
* have would be to do the absolutely minimal work required in #MC context
* and have instrumentation disabled only around that. Further processing can
* then happen in process context where instrumentation is allowed. Achieving
* that requires careful auditing and modifications. Until then, the code
* allows instrumentation temporarily, where required. *
*/
noinstr void do_machine_check(struct pt_regs *regs)
{
int worst = 0, order, no_way_out, kill_current_task, lmce;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
char *msg = NULL;
......@@ -1385,7 +1435,6 @@ noinstr void do_machine_check(struct pt_regs *regs)
final = this_cpu_ptr(&mces_seen);
*final = m;
memset(valid_banks, 0, sizeof(valid_banks));
no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
barrier();
......@@ -1419,7 +1468,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
__mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
......@@ -1451,6 +1500,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
}
}
/*
* Enable instrumentation around the external facilities like task_work_add()
* (via queue_task_work()), fixup_exception() etc. For now, that is. Fixing this
* properly would need a lot more involved reorganization.
*/
instrumentation_begin();
if (taint)
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
if (worst != MCE_AR_SEVERITY && !kill_current_task)
goto out;
......@@ -1482,7 +1541,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (m.kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, msg, kill_me_never);
}
out:
instrumentation_end();
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
......@@ -2702,7 +2764,6 @@ struct dentry *mce_get_debugfs_dir(void)
static void mce_reset(void)
{
cpu_missing = 0;
atomic_set(&mce_fake_panicked, 0);
atomic_set(&mce_executing, 0);
atomic_set(&mce_callin, 0);
......
......@@ -74,7 +74,6 @@ MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
MCE_INJECT_SET(synd);
MCE_INJECT_SET(ipid);
#define MCE_INJECT_GET(reg) \
static int inj_##reg##_get(void *data, u64 *val) \
......@@ -95,6 +94,20 @@ DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
/* Use the user provided IPID value on a sw injection. */
static int inj_ipid_set(void *data, u64 val)
{
struct mce *m = (struct mce *)data;
if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
if (inj_type == SW_INJ)
m->ipid = val;
}
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
static void setup_inj_struct(struct mce *m)
......@@ -350,7 +363,7 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
char buf[MAX_FLAG_OPT_SIZE], *__buf;
int err;
if (cnt > MAX_FLAG_OPT_SIZE)
if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
......@@ -490,6 +503,8 @@ static void do_inject(void)
i_mce.tsc = rdtsc_ordered();
i_mce.status |= MCI_STATUS_VAL;
if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV;
......@@ -577,6 +592,33 @@ static int inj_bank_set(void *data, u64 val)
}
m->bank = val;
/*
* sw-only injection allows to write arbitrary values into the MCA
* registers because it tests only the decoding paths.
*/
if (inj_type == SW_INJ)
goto inject;
/*
* Read IPID value to determine if a bank is populated on the target
* CPU.
*/
if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
u64 ipid;
if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
pr_err("Error reading IPID on CPU%d\n", m->extcpu);
return -EINVAL;
}
if (!ipid) {
pr_err("Cannot inject into unpopulated bank %llu\n", val);
return -ENODEV;
}
}
inject:
do_inject();
/* Reset injection struct */
......
......@@ -207,4 +207,6 @@ static inline void pentium_machine_check(struct pt_regs *regs) {}
static inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
noinstr u64 mce_rdmsrl(u32 msr);
#endif /* __X86_MCE_INTERNAL_H__ */
......@@ -222,6 +222,9 @@ static bool is_copy_from_user(struct pt_regs *regs)
struct insn insn;
int ret;
if (!regs)
return false;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
......@@ -263,24 +266,36 @@ static bool is_copy_from_user(struct pt_regs *regs)
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
static int error_context(struct mce *m, struct pt_regs *regs)
static noinstr int error_context(struct mce *m, struct pt_regs *regs)
{
int fixup_type;
bool copy_user;
if ((m->cs & 3) == 3)
return IN_USER;
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
switch (ex_get_fixup_type(m->ip)) {
/* Allow instrumentation around external facilities usage. */
instrumentation_begin();
fixup_type = ex_get_fixup_type(m->ip);
copy_user = is_copy_from_user(regs);
instrumentation_end();
switch (fixup_type) {
case EX_TYPE_UACCESS:
case EX_TYPE_COPY:
if (!regs || !is_copy_from_user(regs))
if (!copy_user)
return IN_KERNEL;
m->kflags |= MCE_IN_KERNEL_COPYIN;
fallthrough;
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
default:
return IN_KERNEL;
}
......@@ -288,8 +303,7 @@ static int error_context(struct mce *m, struct pt_regs *regs)
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
u64 mcx_cfg;
/*
* We need to look at the following bits:
......@@ -300,11 +314,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
if (!mce_flags.succor)
return MCE_PANIC_SEVERITY;
if (rdmsr_safe(addr, &low, &high))
return MCE_PANIC_SEVERITY;
mcx_cfg = mce_rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(m->bank));
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
if ((low & MCI_CONFIG_MCAX) &&
if ((mcx_cfg & MCI_CONFIG_MCAX) &&
(m->status & MCI_STATUS_TCC) &&
(err_ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
......@@ -317,8 +330,8 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
char **msg, bool is_excp)
static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
char **msg, bool is_excp)
{
enum context ctx = error_context(m, regs);
......@@ -370,8 +383,8 @@ static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
return MCE_KEEP_SEVERITY;
}
static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp)
static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs,
int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m, regs);
......@@ -407,8 +420,8 @@ static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
}
}
int mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
bool is_excp)
int noinstr mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
bool is_excp)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
......
......@@ -225,6 +225,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Don't try to copy the tail if machine check happened
*
* Input:
* eax trap number written by ex_handler_copy()
* rdi destination
* rsi source
* rdx count
......@@ -233,12 +234,20 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* eax uncopied bytes or 0 if successful.
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
cmp $X86_TRAP_MC,%eax
je 3f
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
3:
movl %edx,%eax
ASM_CLAC
RET
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
......
......@@ -988,6 +988,281 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
return csrow;
}
/* Protect the PCI config register pairs used for DF indirect access. */
static DEFINE_MUTEX(df_indirect_mutex);
/*
* Data Fabric Indirect Access uses FICAA/FICAD.
*
* Fabric Indirect Configuration Access Address (FICAA): Constructed based
* on the device's Instance Id and the PCI function and register offset of
* the desired register.
*
* Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
* and FICAD HI registers but so far we only need the LO register.
*
* Use Instance Id 0xFF to indicate a broadcast read.
*/
#define DF_BROADCAST 0xFF
static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
{
struct pci_dev *F4;
u32 ficaa;
int err = -ENODEV;
if (node >= amd_nb_num())
goto out;
F4 = node_to_amd_nb(node)->link;
if (!F4)
goto out;
ficaa = (instance_id == DF_BROADCAST) ? 0 : 1;
ficaa |= reg & 0x3FC;
ficaa |= (func & 0x7) << 11;
ficaa |= instance_id << 16;
mutex_lock(&df_indirect_mutex);
err = pci_write_config_dword(F4, 0x5C, ficaa);
if (err) {
pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
goto out_unlock;
}
err = pci_read_config_dword(F4, 0x98, lo);
if (err)
pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
out_unlock:
mutex_unlock(&df_indirect_mutex);
out:
return err;
}
static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
{
return __df_indirect_read(node, func, reg, instance_id, lo);
}
static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo)
{
return __df_indirect_read(node, func, reg, DF_BROADCAST, lo);
}
struct addr_ctx {
u64 ret_addr;
u32 tmp;
u16 nid;
u8 inst_id;
};
static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
{
u64 dram_base_addr, dram_limit_addr, dram_hole_base;
u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
u8 intlv_addr_sel, intlv_addr_bit;
u8 num_intlv_bits, hashed_bit;
u8 lgcy_mmio_hole_en, base = 0;
u8 cs_mask, cs_id = 0;
bool hash_enabled = false;
struct addr_ctx ctx;
memset(&ctx, 0, sizeof(ctx));
/* Start from the normalized address */
ctx.ret_addr = norm_addr;
ctx.nid = nid;
ctx.inst_id = umc;
/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp))
goto out_err;
/* Remove HiAddrOffset from normalized address, if enabled: */
if (ctx.tmp & BIT(0)) {
u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8;
if (norm_addr >= hi_addr_offset) {
ctx.ret_addr -= hi_addr_offset;
base = 1;
}
}
/* Read D18F0x110 (DramBaseAddress). */
if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp))
goto out_err;
/* Check if address range is valid. */
if (!(ctx.tmp & BIT(0))) {
pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
__func__, ctx.tmp);
goto out_err;
}
lgcy_mmio_hole_en = ctx.tmp & BIT(1);
intlv_num_chan = (ctx.tmp >> 4) & 0xF;
intlv_addr_sel = (ctx.tmp >> 8) & 0x7;
dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16;
/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
if (intlv_addr_sel > 3) {
pr_err("%s: Invalid interleave address select %d.\n",
__func__, intlv_addr_sel);
goto out_err;
}
/* Read D18F0x114 (DramLimitAddress). */
if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp))
goto out_err;
intlv_num_sockets = (ctx.tmp >> 8) & 0x1;
intlv_num_dies = (ctx.tmp >> 10) & 0x3;
dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
intlv_addr_bit = intlv_addr_sel + 8;
/* Re-use intlv_num_chan by setting it equal to log2(#channels) */
switch (intlv_num_chan) {
case 0: intlv_num_chan = 0; break;
case 1: intlv_num_chan = 1; break;
case 3: intlv_num_chan = 2; break;
case 5: intlv_num_chan = 3; break;
case 7: intlv_num_chan = 4; break;
case 8: intlv_num_chan = 1;
hash_enabled = true;
break;
default:
pr_err("%s: Invalid number of interleaved channels %d.\n",
__func__, intlv_num_chan);
goto out_err;
}
num_intlv_bits = intlv_num_chan;
if (intlv_num_dies > 2) {
pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
__func__, intlv_num_dies);
goto out_err;
}
num_intlv_bits += intlv_num_dies;
/* Add a bit if sockets are interleaved. */
num_intlv_bits += intlv_num_sockets;
/* Assert num_intlv_bits <= 4 */
if (num_intlv_bits > 4) {
pr_err("%s: Invalid interleave bits %d.\n",
__func__, num_intlv_bits);
goto out_err;
}
if (num_intlv_bits > 0) {
u64 temp_addr_x, temp_addr_i, temp_addr_y;
u8 die_id_bit, sock_id_bit, cs_fabric_id;
/*
* Read FabricBlockInstanceInformation3_CS[BlockFabricID].
* This is the fabric id for this coherent slave. Use
* umc/channel# as instance id of the coherent slave
* for FICAA.
*/
if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp))
goto out_err;
cs_fabric_id = (ctx.tmp >> 8) & 0xFF;
die_id_bit = 0;
/* If interleaved over more than 1 channel: */
if (intlv_num_chan) {
die_id_bit = intlv_num_chan;
cs_mask = (1 << die_id_bit) - 1;
cs_id = cs_fabric_id & cs_mask;
}
sock_id_bit = die_id_bit;
/* Read D18F1x208 (SystemFabricIdMask). */
if (intlv_num_dies || intlv_num_sockets)
if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp))
goto out_err;
/* If interleaved over more than 1 die. */
if (intlv_num_dies) {
sock_id_bit = die_id_bit + intlv_num_dies;
die_id_shift = (ctx.tmp >> 24) & 0xF;
die_id_mask = (ctx.tmp >> 8) & 0xFF;
cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
}
/* If interleaved over more than 1 socket. */
if (intlv_num_sockets) {
socket_id_shift = (ctx.tmp >> 28) & 0xF;
socket_id_mask = (ctx.tmp >> 16) & 0xFF;
cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
}
/*
* The pre-interleaved address consists of XXXXXXIIIYYYYY
* where III is the ID for this CS, and XXXXXXYYYYY are the
* address bits from the post-interleaved address.
* "num_intlv_bits" has been calculated to tell us how many "I"
* bits there are. "intlv_addr_bit" tells us how many "Y" bits
* there are (where "I" starts).
*/
temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0);
temp_addr_i = (cs_id << intlv_addr_bit);
temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
}
/* Add dram base address */
ctx.ret_addr += dram_base_addr;
/* If legacy MMIO hole enabled */
if (lgcy_mmio_hole_en) {
if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp))
goto out_err;
dram_hole_base = ctx.tmp & GENMASK(31, 24);
if (ctx.ret_addr >= dram_hole_base)
ctx.ret_addr += (BIT_ULL(32) - dram_hole_base);
}
if (hash_enabled) {
/* Save some parentheses and grab ls-bit at the end. */
hashed_bit = (ctx.ret_addr >> 12) ^
(ctx.ret_addr >> 18) ^
(ctx.ret_addr >> 21) ^
(ctx.ret_addr >> 30) ^
cs_id;
hashed_bit &= BIT(0);
if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0)))
ctx.ret_addr ^= BIT(intlv_addr_bit);
}
/* Is calculated system address is above DRAM limit address? */
if (ctx.ret_addr > dram_limit_addr)
goto out_err;
*sys_addr = ctx.ret_addr;
return 0;
out_err:
return -EINVAL;
}
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
/*
......
......@@ -399,6 +399,63 @@ static const char * const smca_mp5_mce_desc[] = {
"Instruction Tag Cache Bank B ECC or parity error",
};
static const char * const smca_mpdma_mce_desc[] = {
"Main SRAM [31:0] bank ECC or parity error",
"Main SRAM [63:32] bank ECC or parity error",
"Main SRAM [95:64] bank ECC or parity error",
"Main SRAM [127:96] bank ECC or parity error",
"Data Cache Bank A ECC or parity error",
"Data Cache Bank B ECC or parity error",
"Data Tag Cache Bank A ECC or parity error",
"Data Tag Cache Bank B ECC or parity error",
"Instruction Cache Bank A ECC or parity error",
"Instruction Cache Bank B ECC or parity error",
"Instruction Tag Cache Bank A ECC or parity error",
"Instruction Tag Cache Bank B ECC or parity error",
"Data Cache Bank A ECC or parity error",
"Data Cache Bank B ECC or parity error",
"Data Tag Cache Bank A ECC or parity error",
"Data Tag Cache Bank B ECC or parity error",
"Instruction Cache Bank A ECC or parity error",
"Instruction Cache Bank B ECC or parity error",
"Instruction Tag Cache Bank A ECC or parity error",
"Instruction Tag Cache Bank B ECC or parity error",
"Data Cache Bank A ECC or parity error",
"Data Cache Bank B ECC or parity error",
"Data Tag Cache Bank A ECC or parity error",
"Data Tag Cache Bank B ECC or parity error",
"Instruction Cache Bank A ECC or parity error",
"Instruction Cache Bank B ECC or parity error",
"Instruction Tag Cache Bank A ECC or parity error",
"Instruction Tag Cache Bank B ECC or parity error",
"System Hub Read Buffer ECC or parity error",
"MPDMA TVF DVSEC Memory ECC or parity error",
"MPDMA TVF MMIO Mailbox0 ECC or parity error",
"MPDMA TVF MMIO Mailbox1 ECC or parity error",
"MPDMA TVF Doorbell Memory ECC or parity error",
"MPDMA TVF SDP Slave Memory 0 ECC or parity error",
"MPDMA TVF SDP Slave Memory 1 ECC or parity error",
"MPDMA TVF SDP Slave Memory 2 ECC or parity error",
"MPDMA TVF SDP Master Memory 0 ECC or parity error",
"MPDMA TVF SDP Master Memory 1 ECC or parity error",
"MPDMA TVF SDP Master Memory 2 ECC or parity error",
"MPDMA TVF SDP Master Memory 3 ECC or parity error",
"MPDMA TVF SDP Master Memory 4 ECC or parity error",
"MPDMA TVF SDP Master Memory 5 ECC or parity error",
"MPDMA TVF SDP Master Memory 6 ECC or parity error",
"MPDMA PTE Command FIFO ECC or parity error",
"MPDMA PTE Hub Data FIFO ECC or parity error",
"MPDMA PTE Internal Data FIFO ECC or parity error",
"MPDMA PTE Command Memory DMA ECC or parity error",
"MPDMA PTE Command Memory Internal ECC or parity error",
"MPDMA PTE DMA Completion FIFO ECC or parity error",
"MPDMA PTE Tablewalk Completion FIFO ECC or parity error",
"MPDMA PTE Descriptor Completion FIFO ECC or parity error",
"MPDMA PTE ReadOnly Completion FIFO ECC or parity error",
"MPDMA PTE DirectWrite Completion FIFO ECC or parity error",
"SDP Watchdog Timer expired",
};
static const char * const smca_nbio_mce_desc[] = {
"ECC or Parity error",
"PCIE error",
......@@ -448,7 +505,7 @@ static const char * const smca_xgmipcs_mce_desc[] = {
"Rx Replay Timeout Error",
"LinkSub Tx Timeout Error",
"LinkSub Rx Timeout Error",
"Rx CMD Pocket Error",
"Rx CMD Packet Error",
};
static const char * const smca_xgmiphy_mce_desc[] = {
......@@ -458,11 +515,66 @@ static const char * const smca_xgmiphy_mce_desc[] = {
"PHY APB error",
};
static const char * const smca_waflphy_mce_desc[] = {
"RAM ECC Error",
"ARC instruction buffer parity error",
"ARC data buffer parity error",
"PHY APB error",
static const char * const smca_nbif_mce_desc[] = {
"Timeout error from GMI",
"SRAM ECC error",
"NTB Error Event",
"SDP Parity error",
};
static const char * const smca_sata_mce_desc[] = {
"Parity error for port 0",
"Parity error for port 1",
"Parity error for port 2",
"Parity error for port 3",
"Parity error for port 4",
"Parity error for port 5",
"Parity error for port 6",
"Parity error for port 7",
};
static const char * const smca_usb_mce_desc[] = {
"Parity error or ECC error for S0 RAM0",
"Parity error or ECC error for S0 RAM1",
"Parity error or ECC error for S0 RAM2",
"Parity error for PHY RAM0",
"Parity error for PHY RAM1",
"AXI Slave Response error",
};
static const char * const smca_gmipcs_mce_desc[] = {
"Data Loss Error",
"Training Error",
"Replay Parity Error",
"Rx Fifo Underflow Error",
"Rx Fifo Overflow Error",
"CRC Error",
"BER Exceeded Error",
"Tx Fifo Underflow Error",
"Replay Buffer Parity Error",
"Tx Overflow Error",
"Replay Fifo Overflow Error",
"Replay Fifo Underflow Error",
"Elastic Fifo Overflow Error",
"Deskew Error",
"Offline Error",
"Data Startup Limit Error",
"FC Init Timeout Error",
"Recovery Timeout Error",
"Ready Serial Timeout Error",
"Ready Serial Attempt Error",
"Recovery Attempt Error",
"Recovery Relock Attempt Error",
"Deskew Abort Error",
"Rx Buffer Error",
"Rx LFDS Fifo Overflow Error",
"Rx LFDS Fifo Underflow Error",
"LinkSub Tx Timeout Error",
"LinkSub Rx Timeout Error",
"Rx CMD Packet Error",
"LFDS Training Timeout Error",
"LFDS FC Init Timeout Error",
"Data Loss Error",
};
struct smca_mce_desc {
......@@ -490,12 +602,21 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
[SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
[SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
[SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
[SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
/* NBIF and SHUB have the same error descriptions, for now. */
[SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
[SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
[SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) },
[SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) },
[SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) },
/* All the PHY bank types have the same error descriptions, for now. */
[SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
[SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
[SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
[SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
};
static bool f12h_mc0_mce(u16 ec, u8 xec)
......@@ -1045,20 +1166,13 @@ static void decode_mc6_mce(struct mce *m)
/* Decode errors according to Scalable MCA specification */
static void decode_smca_error(struct mce *m)
{
struct smca_hwid *hwid;
enum smca_bank_types bank_type;
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
const char *ip_name;
u8 xec = XEC(m->status, xec_mask);
if (m->bank >= ARRAY_SIZE(smca_banks))
if (bank_type >= N_SMCA_BANK_TYPES)
return;
hwid = smca_banks[m->bank].hwid;
if (!hwid)
return;
bank_type = hwid->bank_type;
if (bank_type == SMCA_RESERVED) {
pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
return;
......
......@@ -2647,7 +2647,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
* and error occurred in DramECC (Extended error code = 0) then only
* process the error, else bail out.
*/
if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) &&
if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
(XEC(m->status, 0x3f) == 0x0)))
return NOTIFY_DONE;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册