Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar: "The main changes in this cycle were: - Assign notifier chain priorities for all RAS related handlers to make the ordering explicit (Borislav Petkov) - Improve the AMD MCA banks sysfs output (Yazen Ghannam) - Various cleanups and restructuring of the x86 RAS code (Borislav Petkov)" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ras, EDAC, acpi: Assign MCE notifier handlers a priority x86/ras: Get rid of mce_process_work() EDAC/mce/amd: Dump TSC value EDAC/mce/amd: Unexport amd_decode_mce() x86/ras/amd/inj: Change dependency x86/ras: Flip the TSC-adding logic x86/ras/amd: Make sysfs names of banks more user-friendly x86/ras/therm_throt: Do not log a fake MCE for thermal events x86/ras/inject: Make it depend on X86_LOCAL_APIC=y

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "The main changes in this cycle were: - Assign notifier chain priorities for all RAS related handlers to make the ordering explicit (Borislav Petkov) - Improve the AMD MCA banks sysfs output (Yazen Ghannam) - Various cleanups and restructuring of the x86 RAS code (Borislav Petkov)" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ras, EDAC, acpi: Assign MCE notifier handlers a priority x86/ras: Get rid of mce_process_work() EDAC/mce/amd: Dump TSC value EDAC/mce/amd: Unexport amd_decode_mce() x86/ras/amd/inj: Change dependency x86/ras: Flip the TSC-adding logic x86/ras/amd: Make sysfs names of banks more user-friendly x86/ras/therm_throt: Do not log a fake MCE for thermal events x86/ras/inject: Make it depend on X86_LOCAL_APIC=y
60c906ba · Linus Torvalds · 7f4eb0a6 · 9026cc82 · 60c906ba · 60c906ba
17 changed file
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1070,7 +1070,7 @@ config X86_MCE_THRESHOLD
 	def_bool y

 config X86_MCE_INJECT
-	depends on X86_MCE
+	depends on X86_MCE && X86_LOCAL_APIC
 	tristate "Machine check injector support"
 	---help---
 	  Provide support for injecting machine checks for testing purposes.

--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -97,10 +97,6 @@

 #define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */

-/* Software defined banks */
-#define MCE_EXTENDED_BANK	128
-#define MCE_THERMAL_BANK	(MCE_EXTENDED_BANK + 0)
-
 #define MCE_LOG_LEN 32
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"

@@ -193,6 +189,15 @@ extern struct mce_vendor_flags mce_flags;

 extern struct mca_config mca_cfg;
 extern struct mca_msr_regs msr_ops;
+
+enum mce_notifier_prios {
+	MCE_PRIO_SRAO		= INT_MAX,
+	MCE_PRIO_EXTLOG		= INT_MAX - 1,
+	MCE_PRIO_NFIT		= INT_MAX - 2,
+	MCE_PRIO_EDAC		= INT_MAX - 3,
+	MCE_PRIO_LOWEST		= 0,
+};
+
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);

@@ -306,8 +311,6 @@ extern void (*deferred_error_int_vector)(void);

 void intel_init_thermal(struct cpuinfo_x86 *c);

-void mce_log_therm_throt_event(__u64 status);
-
 /* Interrupt Handler for core thermal thresholds */
 extern int (*platform_thermal_notify)(__u64 msr_val);

@@ -362,12 +365,13 @@ struct smca_hwid {
 	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
 	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
 	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
+	u8 count;		/* Number of instances. */
 };

 struct smca_bank {
 	struct smca_hwid *hwid;
-	/* Instance ID */
-	u32 id;
+	u32 id;			/* Value of MCA_IPID[InstanceId]. */
+	u8 sysfs_id;		/* Value used for sysfs name. */
 };

 extern struct smca_bank smca_banks[MAX_NR_BANKS];

--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -52,8 +52,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)

 	if (severity >= GHES_SEV_RECOVERABLE)
 		m.status |= MCI_STATUS_UC;
-	if (severity >= GHES_SEV_PANIC)
+
+	if (severity >= GHES_SEV_PANIC) {
 		m.status |= MCI_STATUS_PCC;
+		m.tsc = rdtsc();
+	}

 	m.addr = mem_err->physical_addr;
 	mce_log(&m);

--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -72,7 +72,7 @@ struct llist_node *mce_gen_pool_prepare_records(void)
 	return new_head.first;
 }

-void mce_gen_pool_process(void)
+void mce_gen_pool_process(struct work_struct *__unused)
 {
 	struct llist_node *head;
 	struct mce_evt_llist *node, *tmp;

--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -152,7 +152,6 @@ static void raise_mce(struct mce *m)
 	if (context == MCJ_CTX_RANDOM)
 		return;

-#ifdef CONFIG_X86_LOCAL_APIC
 	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
 		unsigned long start;
 		int cpu;
@@ -192,9 +191,7 @@ static void raise_mce(struct mce *m)
 		raise_local();
 		put_cpu();
 		put_online_cpus();
-	} else
-#endif
-	{
+	} else {
 		preempt_disable();
 		raise_local();
 		preempt_enable();

--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -31,7 +31,7 @@ struct mce_evt_llist {
 	struct mce mce;
 };

-void mce_gen_pool_process(void);
+void mce_gen_pool_process(struct work_struct *__unused);
 bool mce_gen_pool_empty(void);
 int mce_gen_pool_add(struct mce *mce);
 int mce_gen_pool_init(void);

--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -128,7 +128,6 @@ void mce_setup(struct mce *m)
 {
 	memset(m, 0, sizeof(struct mce));
 	m->cpu = m->extcpu = smp_processor_id();
-	m->tsc = rdtsc();
 	/* We hope get_seconds stays lockless */
 	m->time = get_seconds();
 	m->cpuvendor = boot_cpu_data.x86_vendor;
@@ -217,9 +216,7 @@ void mce_register_decode_chain(struct notifier_block *nb)
 {
 	atomic_inc(&num_notifiers);

-	/* Ensure SRAO notifier has the highest priority in the decode chain. */
-	if (nb != &mce_srao_nb && nb->priority == INT_MAX)
-		nb->priority -= 1;
+	WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);

 	atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
 }
@@ -583,7 +580,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 }
 static struct notifier_block mce_srao_nb = {
 	.notifier_call	= srao_decode_notifier,
-	.priority = INT_MAX,
+	.priority	= MCE_PRIO_SRAO,
 };

 static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -609,7 +606,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
 static struct notifier_block mce_default_nb = {
 	.notifier_call	= mce_default_notifier,
 	/* lowest prio, we want it to run last. */
-	.priority	= 0,
+	.priority	= MCE_PRIO_LOWEST,
 };

 /*
@@ -710,14 +707,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)

 	mce_gather_info(&m, NULL);

-	/*
-	 * m.tsc was set in mce_setup(). Clear it if not requested.
-	 *
-	 * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid
-	 *	  that dance.
-	 */
-	if (!(flags & MCP_TIMESTAMP))
-		m.tsc = 0;
+	if (flags & MCP_TIMESTAMP)
+		m.tsc = rdtsc();

 	for (i = 0; i < mca_cfg.banks; i++) {
 		if (!mce_banks[i].ctl || !test_bit(i, *b))
@@ -1156,6 +1147,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		goto out;

 	mce_gather_info(&m, regs);
+	m.tsc = rdtsc();

 	final = this_cpu_ptr(&mces_seen);
 	*final = m;
@@ -1321,41 +1313,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
 }
 #endif

-/*
- * Action optional processing happens here (picking up
- * from the list of faulting pages that do_machine_check()
- * placed into the genpool).
- */
-static void mce_process_work(struct work_struct *dummy)
-{
-	mce_gen_pool_process();
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
-	struct mce m;
-
-	mce_setup(&m);
-	m.bank = MCE_THERMAL_BANK;
-	m.status = status;
-	mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
 /*
 * Periodic polling timer for "silent" machine check errors.  If the
 * poller finds an MCE, poll 2x faster.  When the poller finds no more
@@ -2189,7 +2146,7 @@ int __init mcheck_init(void)
 	mce_register_decode_chain(&mce_default_nb);
 	mcheck_vendor_init_severity();

-	INIT_WORK(&mce_work, mce_process_work);
+	INIT_WORK(&mce_work, mce_gen_pool_process);
 	init_irq_work(&mce_irq_work, mce_irq_work_cb);

 	return 0;

--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -192,6 +192,7 @@ static void get_smca_bank_info(unsigned int bank)

 			smca_banks[bank].hwid = s_hwid;
 			smca_banks[bank].id = instance_id;
+			smca_banks[bank].sysfs_id = s_hwid->count++;
 			break;
 		}
 	}
@@ -777,7 +778,8 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
 	mce_setup(&m);

 	m.status = status;
-	m.bank = bank;
+	m.bank   = bank;
+	m.tsc	 = rdtsc();

 	if (threshold_err)
 		m.misc = misc;
@@ -1064,9 +1066,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
 		return NULL;
 	}

+	if (smca_banks[bank].hwid->count == 1)
+		return smca_get_name(bank_type);
+
 	snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
 		 "%s_%x", smca_get_name(bank_type),
-			  smca_banks[bank].id);
+			  smca_banks[bank].sysfs_id);
 	return buf_mcatype;
 }


--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -6,7 +6,7 @@
 *
 * Maintains a counter in /sys that keeps track of the number of thermal
 * events, such that the user knows how bad the thermal problem might be
- * (since the logging to syslog and mcelog is rate limited).
+ * (since the logging to syslog is rate limited).
 *
 * Author: Dmitriy Zavin (dmitriyz@google.com)
 *
@@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = {
 * IRQ has been acknowledged.
 *
 * It will take care of rate limiting and printing messages to the syslog.
- *
- * Returns: 0 : Event should NOT be further logged, i.e. still in
- *              "timeout" from previous log message.
- *          1 : Event should be logged further, and a message has been
- *              printed to the syslog.
 */
-static int therm_throt_process(bool new_event, int event, int level)
+static void therm_throt_process(bool new_event, int event, int level)
 {
 	struct _thermal_state *state;
 	unsigned int this_cpu = smp_processor_id();
@@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level)
 		else if (event == POWER_LIMIT_EVENT)
 			state = &pstate->core_power_limit;
 		else
-			 return 0;
+			return;
 	} else if (level == PACKAGE_LEVEL) {
 		if (event == THERMAL_THROTTLING_EVENT)
 			state = &pstate->package_throttle;
 		else if (event == POWER_LIMIT_EVENT)
 			state = &pstate->package_power_limit;
 		else
-			return 0;
+			return;
 	} else
-		return 0;
+		return;

 	old_event = state->new_event;
 	state->new_event = new_event;
@@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level)

 	if (time_before64(now, state->next_check) &&
 			state->count != state->last_count)
-		return 0;
+		return;

 	state->next_check = now + CHECK_INTERVAL;
 	state->last_count = state->count;
@@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level)
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-		return 1;
+		return;
 	}
 	if (old_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
 			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package");
-		return 1;
+		return;
 	}
-
-	return 0;
 }

 static int thresh_event_valid(int level, int event)
@@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void)
 	/* Check for violation of core thermal thresholds*/
 	notify_thresholds(msr_val);

-	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
-				THERMAL_THROTTLING_EVENT,
-				CORE_LEVEL) != 0)
-		mce_log_therm_throt_event(msr_val);
+	therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
+			    THERMAL_THROTTLING_EVENT,
+			    CORE_LEVEL);

 	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,

--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
 config MCE_AMD_INJ
 	tristate "Simple MCE injection interface for AMD processors"
-	depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
+	depends on RAS && X86_MCE && DEBUG_FS && AMD_NB
 	default n
 	help
 	  This is a simple debugfs interface to inject MCEs and test different

--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -212,6 +212,7 @@ static bool __init extlog_get_l1addr(void)
 }
 static struct notifier_block extlog_mce_dec = {
 	.notifier_call	= extlog_print,
+	.priority	= MCE_PRIO_EXTLOG,
 };

 static int __init extlog_init(void)

--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -90,6 +90,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,

 static struct notifier_block nfit_mce_dec = {
 	.notifier_call	= nfit_handle_mce,
+	.priority	= MCE_PRIO_NFIT,
 };

 void nfit_mce_register(void)

--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1835,6 +1835,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,

 static struct notifier_block i7_mce_dec = {
 	.notifier_call	= i7core_mce_check_error,
+	.priority	= MCE_PRIO_EDAC,
 };

 struct memdev_dmi_entry {

--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -942,7 +942,8 @@ static const char *decode_error_status(struct mce *m)
 	return "Corrected error, no action required.";
 }

-int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
+static int
+amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 {
 	struct mce *m = (struct mce *)data;
 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
@@ -1005,6 +1006,9 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 		goto err_code;
 	}

+	if (m->tsc)
+		pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
+
 	if (!fam_ops)
 		goto err_code;

@@ -1046,10 +1050,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)

 	return NOTIFY_STOP;
 }
-EXPORT_SYMBOL_GPL(amd_decode_mce);

 static struct notifier_block amd_mce_dec_nb = {
 	.notifier_call	= amd_decode_mce,
+	.priority	= MCE_PRIO_EDAC,
 };

 static int __init mce_amd_init(void)

--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -79,6 +79,5 @@ struct amd_decoder_ops {
 void amd_report_gart_errors(bool);
 void amd_register_ecc_decoder(void (*f)(int, struct mce *));
 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
-int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);

 #endif /* _EDAC_MCE_AMD_H */
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3117,7 +3117,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 }

 static struct notifier_block sbridge_mce_dec = {
-	.notifier_call      = sbridge_mce_check_error,
+	.notifier_call	= sbridge_mce_check_error,
+	.priority	= MCE_PRIO_EDAC,
 };

 /****************************************************************************

--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -1007,7 +1007,8 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 }

 static struct notifier_block skx_mce_dec = {
-	.notifier_call = skx_mce_check_error,
+	.notifier_call	= skx_mce_check_error,
+	.priority	= MCE_PRIO_EDAC,
 };

 static void skx_remove(void)