提交 86f9a7e3 编写于 作者: M Michal Hocko 提交者: Caspar Zhang

mm, memory_hotplug: print reason for the offlining failure

task #29077503
commit 7960509329c24a2bf0bc4929636614a1b7bb4443 upstream
The memory offlining failure reporting is inconsistent and insufficient.
Some error paths simply do not report the failure to the log at all.  When
we do report there are no details about the reason of the failure and
there are several of them which makes memory offlining failures hard to
debug.

Make sure that the
	memory offlining [mem %#010llx-%#010llx] failed
message is printed for all failures and also provide a short textual
reason for the failure e.g.

[ 1984.506184] rac1 kernel: memory offlining [mem 0x82600000000-0x8267fffffff] failed due to signal backoff

this tells us that the offlining has failed because of a signal pending
aka user intervention.

[akpm@linux-foundation.org: tweak messages a bit]
Link: http://lkml.kernel.org/r/20181107101830.17405-5-mhocko@kernel.orgSigned-off-by: NMichal Hocko <mhocko@suse.com>
Reviewed-by: NAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: NOscar Salvador <osalvador@suse.de>
Reviewed-by: NAnshuman Khandual <anshuman.khandual@arm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Oscar Salvador <OSalvador@suse.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

(cherry picked from ccommit 7960509329c24a2bf0bc4929636614a1b7bb4443)
Signed-off-by: NAlex Shi <alex.shi@linux.alibaba.com>
Reviewed-by: NYang Shi <yang.shi@linux.alibaba.com>
上级 80aa4777
...@@ -1614,6 +1614,7 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1614,6 +1614,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
unsigned long valid_start, valid_end; unsigned long valid_start, valid_end;
struct zone *zone; struct zone *zone;
struct memory_notify arg; struct memory_notify arg;
char *reason;
/* at least, alignment against pageblock is necessary */ /* at least, alignment against pageblock is necessary */
if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
...@@ -1628,7 +1629,9 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1628,7 +1629,9 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) { &valid_end)) {
mem_hotplug_done(); mem_hotplug_done();
return -EINVAL; ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
} }
zone = page_zone(pfn_to_page(valid_start)); zone = page_zone(pfn_to_page(valid_start));
...@@ -1641,7 +1644,8 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1641,7 +1644,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
MEMORY_OFFLINE | REPORT_FAILURE); MEMORY_OFFLINE | REPORT_FAILURE);
if (ret) { if (ret) {
mem_hotplug_done(); mem_hotplug_done();
return ret; reason = "failure to isolate range";
goto failed_removal;
} }
arg.start_pfn = start_pfn; arg.start_pfn = start_pfn;
...@@ -1650,15 +1654,19 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1650,15 +1654,19 @@ static int __ref __offline_pages(unsigned long start_pfn,
ret = memory_notify(MEM_GOING_OFFLINE, &arg); ret = memory_notify(MEM_GOING_OFFLINE, &arg);
ret = notifier_to_errno(ret); ret = notifier_to_errno(ret);
if (ret) if (ret) {
goto failed_removal; reason = "notifier failure";
goto failed_removal_isolated;
}
pfn = start_pfn; pfn = start_pfn;
repeat: repeat:
/* start memory hot removal */ /* start memory hot removal */
ret = -EINTR; ret = -EINTR;
if (signal_pending(current)) if (signal_pending(current)) {
goto failed_removal; reason = "signal backoff";
goto failed_removal_isolated;
}
cond_resched(); cond_resched();
lru_add_drain_all(); lru_add_drain_all();
...@@ -1675,8 +1683,10 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1675,8 +1683,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
* actually in order to make hugetlbfs's object counting consistent. * actually in order to make hugetlbfs's object counting consistent.
*/ */
ret = dissolve_free_huge_pages(start_pfn, end_pfn); ret = dissolve_free_huge_pages(start_pfn, end_pfn);
if (ret) if (ret) {
goto failed_removal; reason = "failure to dissolve huge pages";
goto failed_removal_isolated;
}
/* check again */ /* check again */
offlined_pages = check_pages_isolated(start_pfn, end_pfn); offlined_pages = check_pages_isolated(start_pfn, end_pfn);
if (offlined_pages < 0) if (offlined_pages < 0)
...@@ -1716,13 +1726,15 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1716,13 +1726,15 @@ static int __ref __offline_pages(unsigned long start_pfn,
mem_hotplug_done(); mem_hotplug_done();
return 0; return 0;
failed_removal_isolated:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
failed_removal: failed_removal:
pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n", pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
(unsigned long long) start_pfn << PAGE_SHIFT, (unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1); ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
memory_notify(MEM_CANCEL_OFFLINE, &arg); memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */ /* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
mem_hotplug_done(); mem_hotplug_done();
return ret; return ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册