提交 7329bbeb 编写于 作者: T Tony Luck

HWPOISON: Add code to handle "action required" errors.

Add new flag bit "MF_ACTION_REQUIRED" to be used by machine check
code to force a signal with si_code = BUS_MCEERR_AR in the case
where the error occurs in processor execution context. Pass the
flags argument along call chain:
	memory_failure()
	  hwpoison_user_mappings()
	    kill_procs()
	      kill_proc()

Drop the "_ao" suffix from kill_procs_ao() and kill_proc_ao() since
they can now handle "action required" as well as "action optional" errors.
Acked-by: NBorislav Petkov <bp@amd64.org>
Signed-off-by: NTony Luck <tony.luck@intel.com>
上级 cd42f4a3
...@@ -1606,6 +1606,7 @@ void vmemmap_populate_print_last(void); ...@@ -1606,6 +1606,7 @@ void vmemmap_populate_print_last(void);
enum mf_flags { enum mf_flags {
MF_COUNT_INCREASED = 1 << 0, MF_COUNT_INCREASED = 1 << 0,
MF_ACTION_REQUIRED = 1 << 1,
}; };
extern int memory_failure(unsigned long pfn, int trapno, int flags); extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
......
...@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p) ...@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)
EXPORT_SYMBOL_GPL(hwpoison_filter); EXPORT_SYMBOL_GPL(hwpoison_filter);
/* /*
* Send all the processes who have the page mapped an ``action optional'' * Send all the processes who have the page mapped a signal.
* signal. * ``action optional'' if they are not immediately affected by the error
* ``action required'' if error happened in current execution context
*/ */
static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
unsigned long pfn, struct page *page) unsigned long pfn, struct page *page, int flags)
{ {
struct siginfo si; struct siginfo si;
int ret; int ret;
printk(KERN_ERR printk(KERN_ERR
"MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid); pfn, t->comm, t->pid);
si.si_signo = SIGBUS; si.si_signo = SIGBUS;
si.si_errno = 0; si.si_errno = 0;
si.si_code = BUS_MCEERR_AO;
si.si_addr = (void *)addr; si.si_addr = (void *)addr;
#ifdef __ARCH_SI_TRAPNO #ifdef __ARCH_SI_TRAPNO
si.si_trapno = trapno; si.si_trapno = trapno;
#endif #endif
si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
/*
* Don't use force here, it's convenient if the signal if ((flags & MF_ACTION_REQUIRED) && t == current) {
* can be temporarily blocked. si.si_code = BUS_MCEERR_AR;
* This could cause a loop when the user sets SIGBUS ret = force_sig_info(SIGBUS, &si, t);
* to SIG_IGN, but hopefully no one will do that? } else {
*/ /*
ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ * Don't use force here, it's convenient if the signal
* can be temporarily blocked.
* This could cause a loop when the user sets SIGBUS
* to SIG_IGN, but hopefully no one will do that?
*/
si.si_code = BUS_MCEERR_AO;
ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
}
if (ret < 0) if (ret < 0)
printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret); t->comm, t->pid, ret);
...@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
* Also when FAIL is set do a force kill because something went * Also when FAIL is set do a force kill because something went
* wrong earlier. * wrong earlier.
*/ */
static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, static void kill_procs(struct list_head *to_kill, int doit, int trapno,
int fail, struct page *page, unsigned long pfn) int fail, struct page *page, unsigned long pfn,
int flags)
{ {
struct to_kill *tk, *next; struct to_kill *tk, *next;
...@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, ...@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
* check for that, but we need to tell the * check for that, but we need to tell the
* process anyways. * process anyways.
*/ */
else if (kill_proc_ao(tk->tsk, tk->addr, trapno, else if (kill_proc(tk->tsk, tk->addr, trapno,
pfn, page) < 0) pfn, page, flags) < 0)
printk(KERN_ERR printk(KERN_ERR
"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
pfn, tk->tsk->comm, tk->tsk->pid); pfn, tk->tsk->comm, tk->tsk->pid);
...@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p, ...@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,
* the pages and send SIGBUS to the processes if the data was dirty. * the pages and send SIGBUS to the processes if the data was dirty.
*/ */
static int hwpoison_user_mappings(struct page *p, unsigned long pfn, static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
int trapno) int trapno, int flags)
{ {
enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
struct address_space *mapping; struct address_space *mapping;
...@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent * use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory. * any accesses to the poisoned memory.
*/ */
kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, kill_procs(&tokill, !!PageDirty(ppage), trapno,
ret != SWAP_SUCCESS, p, pfn); ret != SWAP_SUCCESS, p, pfn, flags);
return ret; return ret;
} }
...@@ -1148,7 +1156,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1148,7 +1156,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* Now take care of user space mappings. * Now take care of user space mappings.
* Abort on fail: __delete_from_page_cache() assumes unmapped page. * Abort on fail: __delete_from_page_cache() assumes unmapped page.
*/ */
if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
res = -EBUSY; res = -EBUSY;
goto out; goto out;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册