提交 453a258b 编写于 作者: J James Morse 提交者: Baolin Wang

ACPI / APEI: Don't store CPER records physical address in struct ghes

fix #28612342

commit eeb2555779471abdbcc6289a52dc54ce513feaf2 upstream

When CPER records are found the address of the records is stashed
in the struct ghes. Once the records have been processed, this
address is overwritten with zero so that it won't be processed
again without being re-populated by firmware.

This goes wrong if a struct ghes can be processed concurrently,
as can happen at probe time when an NMI occurs. If the NMI arrives
on another CPU, the probing CPU may call ghes_clear_estatus() on the
records before the handler had finished with them.
Even on the same CPU, once the interrupted handler is resumed, it
will call ghes_clear_estatus() on the NMIs records, this memory may
have already been re-used by firmware.

Avoid this stashing by letting the caller hold the address. A
later patch will do away with the use of ghes->flags in the
read/clear code too.
Signed-off-by: NJames Morse <james.morse@arm.com>
Reviewed-by: NBorislav Petkov <bp@suse.de>
Signed-off-by: NRafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: NBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: NAlex Shi <alex.shi@linux.alibaba.com>
Acked-by: NCaspar Zhang <caspar@linux.alibaba.com>
Reviewed-by: Nluanshi <zhangliguang@linux.alibaba.com>
上级 4d0a055c
...@@ -305,29 +305,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, ...@@ -305,29 +305,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
} }
} }
static int ghes_read_estatus(struct ghes *ghes) static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr)
{ {
struct acpi_hest_generic *g = ghes->generic; struct acpi_hest_generic *g = ghes->generic;
u64 buf_paddr;
u32 len; u32 len;
int rc; int rc;
rc = apei_read(&buf_paddr, &g->error_status_address); rc = apei_read(buf_paddr, &g->error_status_address);
if (rc) { if (rc) {
*buf_paddr = 0;
pr_warn_ratelimited(FW_WARN GHES_PFX pr_warn_ratelimited(FW_WARN GHES_PFX
"Failed to read error status block address for hardware error source: %d.\n", "Failed to read error status block address for hardware error source: %d.\n",
g->header.source_id); g->header.source_id);
return -EIO; return -EIO;
} }
if (!buf_paddr) if (!*buf_paddr)
return -ENOENT; return -ENOENT;
ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr,
sizeof(*ghes->estatus), 1); sizeof(*ghes->estatus), 1);
if (!ghes->estatus->block_status) if (!ghes->estatus->block_status) {
*buf_paddr = 0;
return -ENOENT; return -ENOENT;
}
ghes->buffer_paddr = buf_paddr;
ghes->flags |= GHES_TO_CLEAR; ghes->flags |= GHES_TO_CLEAR;
rc = -EIO; rc = -EIO;
...@@ -339,7 +340,7 @@ static int ghes_read_estatus(struct ghes *ghes) ...@@ -339,7 +340,7 @@ static int ghes_read_estatus(struct ghes *ghes)
if (cper_estatus_check_header(ghes->estatus)) if (cper_estatus_check_header(ghes->estatus))
goto err_read_block; goto err_read_block;
ghes_copy_tofrom_phys(ghes->estatus + 1, ghes_copy_tofrom_phys(ghes->estatus + 1,
buf_paddr + sizeof(*ghes->estatus), *buf_paddr + sizeof(*ghes->estatus),
len - sizeof(*ghes->estatus), 1); len - sizeof(*ghes->estatus), 1);
if (cper_estatus_check(ghes->estatus)) if (cper_estatus_check(ghes->estatus))
goto err_read_block; goto err_read_block;
...@@ -349,15 +350,20 @@ static int ghes_read_estatus(struct ghes *ghes) ...@@ -349,15 +350,20 @@ static int ghes_read_estatus(struct ghes *ghes)
if (rc) if (rc)
pr_warn_ratelimited(FW_WARN GHES_PFX pr_warn_ratelimited(FW_WARN GHES_PFX
"Failed to read error status block!\n"); "Failed to read error status block!\n");
return rc; return rc;
} }
static void ghes_clear_estatus(struct ghes *ghes) static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr)
{ {
ghes->estatus->block_status = 0; ghes->estatus->block_status = 0;
if (!(ghes->flags & GHES_TO_CLEAR)) if (!(ghes->flags & GHES_TO_CLEAR))
return; return;
ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
if (!buf_paddr)
return;
ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
sizeof(ghes->estatus->block_status), 0); sizeof(ghes->estatus->block_status), 0);
ghes->flags &= ~GHES_TO_CLEAR; ghes->flags &= ~GHES_TO_CLEAR;
} }
...@@ -666,11 +672,11 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) ...@@ -666,11 +672,11 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
return apei_write(val, &gv2->read_ack_register); return apei_write(val, &gv2->read_ack_register);
} }
static void __ghes_panic(struct ghes *ghes) static void __ghes_panic(struct ghes *ghes, u64 buf_paddr)
{ {
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
ghes_clear_estatus(ghes); ghes_clear_estatus(ghes, buf_paddr);
/* reboot to log the error! */ /* reboot to log the error! */
if (!panic_timeout) if (!panic_timeout)
...@@ -680,14 +686,15 @@ static void __ghes_panic(struct ghes *ghes) ...@@ -680,14 +686,15 @@ static void __ghes_panic(struct ghes *ghes)
static int ghes_proc(struct ghes *ghes) static int ghes_proc(struct ghes *ghes)
{ {
u64 buf_paddr;
int rc; int rc;
rc = ghes_read_estatus(ghes); rc = ghes_read_estatus(ghes, &buf_paddr);
if (rc) if (rc)
goto out; goto out;
if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
__ghes_panic(ghes); __ghes_panic(ghes, buf_paddr);
} }
if (!ghes_estatus_cached(ghes->estatus)) { if (!ghes_estatus_cached(ghes->estatus)) {
...@@ -697,7 +704,7 @@ static int ghes_proc(struct ghes *ghes) ...@@ -697,7 +704,7 @@ static int ghes_proc(struct ghes *ghes)
ghes_do_proc(ghes, ghes->estatus); ghes_do_proc(ghes, ghes->estatus);
out: out:
ghes_clear_estatus(ghes); ghes_clear_estatus(ghes, buf_paddr);
if (rc == -ENOENT) if (rc == -ENOENT)
return rc; return rc;
...@@ -912,6 +919,7 @@ static void __process_error(struct ghes *ghes) ...@@ -912,6 +919,7 @@ static void __process_error(struct ghes *ghes)
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{ {
u64 buf_paddr;
struct ghes *ghes; struct ghes *ghes;
int sev, ret = NMI_DONE; int sev, ret = NMI_DONE;
...@@ -919,8 +927,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) ...@@ -919,8 +927,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
return ret; return ret;
list_for_each_entry_rcu(ghes, &ghes_nmi, list) { list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
if (ghes_read_estatus(ghes)) { if (ghes_read_estatus(ghes, &buf_paddr)) {
ghes_clear_estatus(ghes); ghes_clear_estatus(ghes, buf_paddr);
continue; continue;
} else { } else {
ret = NMI_HANDLED; ret = NMI_HANDLED;
...@@ -929,14 +937,14 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) ...@@ -929,14 +937,14 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
sev = ghes_severity(ghes->estatus->error_severity); sev = ghes_severity(ghes->estatus->error_severity);
if (sev >= GHES_SEV_PANIC) { if (sev >= GHES_SEV_PANIC) {
ghes_print_queued_estatus(); ghes_print_queued_estatus();
__ghes_panic(ghes); __ghes_panic(ghes, buf_paddr);
} }
if (!(ghes->flags & GHES_TO_CLEAR)) if (!(ghes->flags & GHES_TO_CLEAR))
continue; continue;
__process_error(ghes); __process_error(ghes);
ghes_clear_estatus(ghes); ghes_clear_estatus(ghes, buf_paddr);
} }
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
......
...@@ -22,7 +22,6 @@ struct ghes { ...@@ -22,7 +22,6 @@ struct ghes {
struct acpi_hest_generic_v2 *generic_v2; struct acpi_hest_generic_v2 *generic_v2;
}; };
struct acpi_hest_generic_status *estatus; struct acpi_hest_generic_status *estatus;
u64 buffer_paddr;
unsigned long flags; unsigned long flags;
union { union {
struct list_head list; struct list_head list;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册