Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
kernel_linux
提交
46e387bb
K
kernel_linux
项目概览
OpenHarmony
/
kernel_linux
上一次同步 3 年多
通知
13
Star
8
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
kernel_linux
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
46e387bb
编写于
10月 22, 2010
作者:
A
Andi Kleen
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'hwpoison-hugepages' into hwpoison
Conflicts: mm/memory-failure.c
上级
e9d08567
3ef8fd7f
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
551 addition
and
125 deletion
+551
-125
arch/x86/mm/fault.c
arch/x86/mm/fault.c
+13
-6
fs/hugetlbfs/inode.c
fs/hugetlbfs/inode.c
+15
-0
include/linux/hugetlb.h
include/linux/hugetlb.h
+15
-2
include/linux/migrate.h
include/linux/migrate.h
+16
-0
include/linux/mm.h
include/linux/mm.h
+10
-2
mm/hugetlb.c
mm/hugetlb.c
+163
-70
mm/memory-failure.c
mm/memory-failure.c
+93
-9
mm/memory.c
mm/memory.c
+2
-1
mm/migrate.c
mm/migrate.c
+216
-18
mm/rmap.c
mm/rmap.c
+8
-17
未找到文件。
arch/x86/mm/fault.c
浏览文件 @
46e387bb
...
...
@@ -11,6 +11,7 @@
#include <linux/kprobes.h>
/* __kprobes, ... */
#include <linux/mmiotrace.h>
/* kmmio_handler, ... */
#include <linux/perf_event.h>
/* perf_sw_event */
#include <linux/hugetlb.h>
/* hstate_index_to_shift */
#include <asm/traps.h>
/* dotraplinkage, ... */
#include <asm/pgalloc.h>
/* pgd_*(), ... */
...
...
@@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
static
void
force_sig_info_fault
(
int
si_signo
,
int
si_code
,
unsigned
long
address
,
struct
task_struct
*
tsk
)
struct
task_struct
*
tsk
,
int
fault
)
{
unsigned
lsb
=
0
;
siginfo_t
info
;
info
.
si_signo
=
si_signo
;
info
.
si_errno
=
0
;
info
.
si_code
=
si_code
;
info
.
si_addr
=
(
void
__user
*
)
address
;
info
.
si_addr_lsb
=
si_code
==
BUS_MCEERR_AR
?
PAGE_SHIFT
:
0
;
if
(
fault
&
VM_FAULT_HWPOISON_LARGE
)
lsb
=
hstate_index_to_shift
(
VM_FAULT_GET_HINDEX
(
fault
));
if
(
fault
&
VM_FAULT_HWPOISON
)
lsb
=
PAGE_SHIFT
;
info
.
si_addr_lsb
=
lsb
;
force_sig_info
(
si_signo
,
&
info
,
tsk
);
}
...
...
@@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk
->
thread
.
error_code
=
error_code
|
(
address
>=
TASK_SIZE
);
tsk
->
thread
.
trap_no
=
14
;
force_sig_info_fault
(
SIGSEGV
,
si_code
,
address
,
tsk
);
force_sig_info_fault
(
SIGSEGV
,
si_code
,
address
,
tsk
,
0
);
return
;
}
...
...
@@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
tsk
->
thread
.
trap_no
=
14
;
#ifdef CONFIG_MEMORY_FAILURE
if
(
fault
&
VM_FAULT_HWPOISON
)
{
if
(
fault
&
(
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
)
)
{
printk
(
KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx
\n
"
,
tsk
->
comm
,
tsk
->
pid
,
address
);
code
=
BUS_MCEERR_AR
;
}
#endif
force_sig_info_fault
(
SIGBUS
,
code
,
address
,
tsk
);
force_sig_info_fault
(
SIGBUS
,
code
,
address
,
tsk
,
fault
);
}
static
noinline
void
...
...
@@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if
(
fault
&
VM_FAULT_OOM
)
{
out_of_memory
(
regs
,
error_code
,
address
);
}
else
{
if
(
fault
&
(
VM_FAULT_SIGBUS
|
VM_FAULT_HWPOISON
))
if
(
fault
&
(
VM_FAULT_SIGBUS
|
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
))
do_sigbus
(
regs
,
error_code
,
address
,
fault
);
else
BUG
();
...
...
fs/hugetlbfs/inode.c
浏览文件 @
46e387bb
...
...
@@ -31,6 +31,7 @@
#include <linux/statfs.h>
#include <linux/security.h>
#include <linux/magic.h>
#include <linux/migrate.h>
#include <asm/uaccess.h>
...
...
@@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
return
0
;
}
static
int
hugetlbfs_migrate_page
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
int
rc
;
rc
=
migrate_huge_page_move_mapping
(
mapping
,
newpage
,
page
);
if
(
rc
)
return
rc
;
migrate_page_copy
(
newpage
,
page
);
return
0
;
}
static
int
hugetlbfs_statfs
(
struct
dentry
*
dentry
,
struct
kstatfs
*
buf
)
{
struct
hugetlbfs_sb_info
*
sbinfo
=
HUGETLBFS_SB
(
dentry
->
d_sb
);
...
...
@@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = {
.
write_begin
=
hugetlbfs_write_begin
,
.
write_end
=
hugetlbfs_write_end
,
.
set_page_dirty
=
hugetlbfs_set_page_dirty
,
.
migratepage
=
hugetlbfs_migrate_page
,
};
...
...
include/linux/hugetlb.h
浏览文件 @
46e387bb
...
...
@@ -43,7 +43,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct
vm_area_struct
*
vma
,
int
acctflags
);
void
hugetlb_unreserve_pages
(
struct
inode
*
inode
,
long
offset
,
long
freed
);
void
__isolate_hwpoisoned_huge_page
(
struct
page
*
page
);
int
dequeue_hwpoisoned_huge_page
(
struct
page
*
page
);
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
);
extern
unsigned
long
hugepages_treat_as_movable
;
extern
const
unsigned
long
hugetlb_zero
,
hugetlb_infinity
;
...
...
@@ -101,7 +102,10 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
#define __isolate_hwpoisoned_huge_page(page) 0
#define dequeue_hwpoisoned_huge_page(page) 0
static
inline
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
}
#define hugetlb_change_protection(vma, address, end, newprot)
...
...
@@ -228,6 +232,8 @@ struct huge_bootmem_page {
struct
hstate
*
hstate
;
};
struct
page
*
alloc_huge_page_node
(
struct
hstate
*
h
,
int
nid
);
/* arch callback */
int
__init
alloc_bootmem_huge_page
(
struct
hstate
*
h
);
...
...
@@ -301,8 +307,14 @@ static inline struct hstate *page_hstate(struct page *page)
return
size_to_hstate
(
PAGE_SIZE
<<
compound_order
(
page
));
}
static
inline
unsigned
hstate_index_to_shift
(
unsigned
index
)
{
return
hstates
[
index
].
order
+
PAGE_SHIFT
;
}
#else
struct
hstate
{};
#define alloc_huge_page_node(h, nid) NULL
#define alloc_bootmem_huge_page(h) NULL
#define hstate_file(f) NULL
#define hstate_vma(v) NULL
...
...
@@ -317,6 +329,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return
1
;
}
#define hstate_index_to_shift(index) 0
#endif
#endif
/* _LINUX_HUGETLB_H */
include/linux/migrate.h
浏览文件 @
46e387bb
...
...
@@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *,
struct
page
*
,
struct
page
*
);
extern
int
migrate_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
);
extern
int
migrate_huge_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
);
extern
int
fail_migrate_page
(
struct
address_space
*
,
struct
page
*
,
struct
page
*
);
...
...
@@ -23,12 +25,17 @@ extern int migrate_prep_local(void);
extern
int
migrate_vmas
(
struct
mm_struct
*
mm
,
const
nodemask_t
*
from
,
const
nodemask_t
*
to
,
unsigned
long
flags
);
extern
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
);
extern
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
);
#else
#define PAGE_MIGRATION 0
static
inline
void
putback_lru_pages
(
struct
list_head
*
l
)
{}
static
inline
int
migrate_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_huge_pages
(
struct
list_head
*
l
,
new_page_t
x
,
unsigned
long
private
,
int
offlining
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_prep
(
void
)
{
return
-
ENOSYS
;
}
static
inline
int
migrate_prep_local
(
void
)
{
return
-
ENOSYS
;
}
...
...
@@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm,
return
-
ENOSYS
;
}
static
inline
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
{}
static
inline
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
return
-
ENOSYS
;
}
/* Possible settings for the migrate_page() method in address_operations */
#define migrate_page NULL
#define fail_migrate_page NULL
...
...
include/linux/mm.h
浏览文件 @
46e387bb
...
...
@@ -718,12 +718,20 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008
/* Special case for get_user_pages */
#define VM_FAULT_HWPOISON 0x0010
/* Hit poisoned page */
#define VM_FAULT_HWPOISON 0x0010
/* Hit poisoned small page */
#define VM_FAULT_HWPOISON_LARGE 0x0020
/* Hit poisoned large page. Index encoded in upper bits */
#define VM_FAULT_NOPAGE 0x0100
/* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200
/* ->fault locked the returned page */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000
/* encodes hpage index for large hwpoison */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
VM_FAULT_HWPOISON_LARGE)
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
/*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
...
...
mm/hugetlb.c
浏览文件 @
46e387bb
...
...
@@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page,
}
}
static
void
copy_gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
,
static
void
copy_
user_
gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
,
unsigned
long
addr
,
struct
vm_area_struct
*
vma
)
{
int
i
;
struct
hstate
*
h
=
hstate_vma
(
vma
);
struct
page
*
dst_base
=
dst
;
struct
page
*
src_base
=
src
;
might_sleep
();
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
)
{
cond_resched
();
copy_user_highpage
(
dst
,
src
,
addr
+
i
*
PAGE_SIZE
,
vma
);
...
...
@@ -440,14 +440,15 @@ static void copy_gigantic_page(struct page *dst, struct page *src,
src
=
mem_map_next
(
src
,
src_base
,
i
);
}
}
static
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
,
static
void
copy_user_huge_page
(
struct
page
*
dst
,
struct
page
*
src
,
unsigned
long
addr
,
struct
vm_area_struct
*
vma
)
{
int
i
;
struct
hstate
*
h
=
hstate_vma
(
vma
);
if
(
unlikely
(
pages_per_huge_page
(
h
)
>
MAX_ORDER_NR_PAGES
))
{
copy_gigantic_page
(
dst
,
src
,
addr
,
vma
);
copy_
user_
gigantic_page
(
dst
,
src
,
addr
,
vma
);
return
;
}
...
...
@@ -458,6 +459,40 @@ static void copy_huge_page(struct page *dst, struct page *src,
}
}
static
void
copy_gigantic_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
int
i
;
struct
hstate
*
h
=
page_hstate
(
src
);
struct
page
*
dst_base
=
dst
;
struct
page
*
src_base
=
src
;
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
)
{
cond_resched
();
copy_highpage
(
dst
,
src
);
i
++
;
dst
=
mem_map_next
(
dst
,
dst_base
,
i
);
src
=
mem_map_next
(
src
,
src_base
,
i
);
}
}
void
copy_huge_page
(
struct
page
*
dst
,
struct
page
*
src
)
{
int
i
;
struct
hstate
*
h
=
page_hstate
(
src
);
if
(
unlikely
(
pages_per_huge_page
(
h
)
>
MAX_ORDER_NR_PAGES
))
{
copy_gigantic_page
(
dst
,
src
);
return
;
}
might_sleep
();
for
(
i
=
0
;
i
<
pages_per_huge_page
(
h
);
i
++
)
{
cond_resched
();
copy_highpage
(
dst
+
i
,
src
+
i
);
}
}
static
void
enqueue_huge_page
(
struct
hstate
*
h
,
struct
page
*
page
)
{
int
nid
=
page_to_nid
(
page
);
...
...
@@ -466,11 +501,24 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
h
->
free_huge_pages_node
[
nid
]
++
;
}
static
struct
page
*
dequeue_huge_page_node
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
if
(
list_empty
(
&
h
->
hugepage_freelists
[
nid
]))
return
NULL
;
page
=
list_entry
(
h
->
hugepage_freelists
[
nid
].
next
,
struct
page
,
lru
);
list_del
(
&
page
->
lru
);
set_page_refcounted
(
page
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
return
page
;
}
static
struct
page
*
dequeue_huge_page_vma
(
struct
hstate
*
h
,
struct
vm_area_struct
*
vma
,
unsigned
long
address
,
int
avoid_reserve
)
{
int
nid
;
struct
page
*
page
=
NULL
;
struct
mempolicy
*
mpol
;
nodemask_t
*
nodemask
;
...
...
@@ -496,21 +544,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
for_each_zone_zonelist_nodemask
(
zone
,
z
,
zonelist
,
MAX_NR_ZONES
-
1
,
nodemask
)
{
nid
=
zone_to_nid
(
zone
);
if
(
cpuset_zone_allowed_softwall
(
zone
,
htlb_alloc_mask
)
&&
!
list_empty
(
&
h
->
hugepage_freelists
[
nid
]))
{
page
=
list_entry
(
h
->
hugepage_freelists
[
nid
].
next
,
struct
page
,
lru
);
list_del
(
&
page
->
lru
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
if
(
cpuset_zone_allowed_softwall
(
zone
,
htlb_alloc_mask
))
{
page
=
dequeue_huge_page_node
(
h
,
zone_to_nid
(
zone
));
if
(
page
)
{
if
(
!
avoid_reserve
)
decrement_hugepage_resv_vma
(
h
,
vma
);
break
;
}
}
}
err:
mpol_cond_put
(
mpol
);
put_mems_allowed
();
...
...
@@ -770,11 +812,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
return
ret
;
}
static
struct
page
*
alloc_buddy_huge_page
(
struct
hstate
*
h
,
struct
vm_area_struct
*
vma
,
unsigned
long
address
)
static
struct
page
*
alloc_buddy_huge_page
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
unsigned
int
nid
;
unsigned
int
r_
nid
;
if
(
h
->
order
>=
MAX_ORDER
)
return
NULL
;
...
...
@@ -812,9 +853,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
}
spin_unlock
(
&
hugetlb_lock
);
if
(
nid
==
NUMA_NO_NODE
)
page
=
alloc_pages
(
htlb_alloc_mask
|
__GFP_COMP
|
__GFP_REPEAT
|
__GFP_NOWARN
,
huge_page_order
(
h
));
else
page
=
alloc_pages_exact_node
(
nid
,
htlb_alloc_mask
|
__GFP_COMP
|
__GFP_THISNODE
|
__GFP_REPEAT
|
__GFP_NOWARN
,
huge_page_order
(
h
));
if
(
page
&&
arch_prepare_hugepage
(
page
))
{
__free_pages
(
page
,
huge_page_order
(
h
));
...
...
@@ -823,19 +869,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
spin_lock
(
&
hugetlb_lock
);
if
(
page
)
{
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
put_page_testzero
(
page
);
VM_BUG_ON
(
page_count
(
page
));
nid
=
page_to_nid
(
page
);
r_nid
=
page_to_nid
(
page
);
set_compound_page_dtor
(
page
,
free_huge_page
);
/*
* We incremented the global counters already
*/
h
->
nr_huge_pages_node
[
nid
]
++
;
h
->
surplus_huge_pages_node
[
nid
]
++
;
h
->
nr_huge_pages_node
[
r_
nid
]
++
;
h
->
surplus_huge_pages_node
[
r_
nid
]
++
;
__count_vm_event
(
HTLB_BUDDY_PGALLOC
);
}
else
{
h
->
nr_huge_pages
--
;
...
...
@@ -847,6 +887,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
return
page
;
}
/*
* This allocation function is useful in the context where vma is irrelevant.
* E.g. soft-offlining uses this function because it only cares physical
* address of error page.
*/
struct
page
*
alloc_huge_page_node
(
struct
hstate
*
h
,
int
nid
)
{
struct
page
*
page
;
spin_lock
(
&
hugetlb_lock
);
page
=
dequeue_huge_page_node
(
h
,
nid
);
spin_unlock
(
&
hugetlb_lock
);
if
(
!
page
)
page
=
alloc_buddy_huge_page
(
h
,
nid
);
return
page
;
}
/*
* Increase the hugetlb pool such that it can accomodate a reservation
* of size 'delta'.
...
...
@@ -871,17 +930,14 @@ static int gather_surplus_pages(struct hstate *h, int delta)
retry:
spin_unlock
(
&
hugetlb_lock
);
for
(
i
=
0
;
i
<
needed
;
i
++
)
{
page
=
alloc_buddy_huge_page
(
h
,
NU
LL
,
0
);
if
(
!
page
)
{
page
=
alloc_buddy_huge_page
(
h
,
NU
MA_NO_NODE
);
if
(
!
page
)
/*
* We were not able to allocate enough pages to
* satisfy the entire reservation so we free what
* we've allocated so far.
*/
spin_lock
(
&
hugetlb_lock
);
needed
=
0
;
goto
free
;
}
list_add
(
&
page
->
lru
,
&
surplus_list
);
}
...
...
@@ -908,31 +964,31 @@ static int gather_surplus_pages(struct hstate *h, int delta)
needed
+=
allocated
;
h
->
resv_huge_pages
+=
delta
;
ret
=
0
;
free:
spin_unlock
(
&
hugetlb_lock
);
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe
(
page
,
tmp
,
&
surplus_list
,
lru
)
{
if
((
--
needed
)
<
0
)
break
;
list_del
(
&
page
->
lru
);
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
put_page_testzero
(
page
);
VM_BUG_ON
(
page_count
(
page
));
enqueue_huge_page
(
h
,
page
);
}
/* Free unnecessary surplus pages to the buddy allocator */
free:
if
(
!
list_empty
(
&
surplus_list
))
{
spin_unlock
(
&
hugetlb_lock
);
list_for_each_entry_safe
(
page
,
tmp
,
&
surplus_list
,
lru
)
{
list_del
(
&
page
->
lru
);
/*
* The page has a reference count of zero already, so
* call free_huge_page directly instead of using
* put_page. This must be done with hugetlb_lock
* unlocked which is safe because free_huge_page takes
* hugetlb_lock before deciding how to free the page.
*/
free_huge_page
(
page
);
put_page
(
page
);
}
spin_lock
(
&
hugetlb_lock
);
}
spin_lock
(
&
hugetlb_lock
);
return
ret
;
}
...
...
@@ -1052,14 +1108,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
spin_unlock
(
&
hugetlb_lock
);
if
(
!
page
)
{
page
=
alloc_buddy_huge_page
(
h
,
vma
,
addr
);
page
=
alloc_buddy_huge_page
(
h
,
NUMA_NO_NODE
);
if
(
!
page
)
{
hugetlb_put_quota
(
inode
->
i_mapping
,
chg
);
return
ERR_PTR
(
-
VM_FAULT_SIGBUS
);
}
}
set_page_refcounted
(
page
);
set_page_private
(
page
,
(
unsigned
long
)
mapping
);
vma_commit_reservation
(
h
,
vma
,
addr
);
...
...
@@ -2153,6 +2208,19 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
return
-
ENOMEM
;
}
static
int
is_hugetlb_entry_migration
(
pte_t
pte
)
{
swp_entry_t
swp
;
if
(
huge_pte_none
(
pte
)
||
pte_present
(
pte
))
return
0
;
swp
=
pte_to_swp_entry
(
pte
);
if
(
non_swap_entry
(
swp
)
&&
is_migration_entry
(
swp
))
{
return
1
;
}
else
return
0
;
}
static
int
is_hugetlb_entry_hwpoisoned
(
pte_t
pte
)
{
swp_entry_t
swp
;
...
...
@@ -2383,7 +2451,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
if
(
unlikely
(
anon_vma_prepare
(
vma
)))
return
VM_FAULT_OOM
;
copy_huge_page
(
new_page
,
old_page
,
address
,
vma
);
copy_
user_
huge_page
(
new_page
,
old_page
,
address
,
vma
);
__SetPageUptodate
(
new_page
);
/*
...
...
@@ -2515,20 +2583,18 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
hugepage_add_new_anon_rmap
(
page
,
vma
,
address
);
}
}
else
{
page_dup_rmap
(
page
);
}
/*
* Since memory error handler replaces pte into hwpoison swap entry
* at the time of error handling, a process which reserved but not have
* the mapping to the error hugepage does not have hwpoison swap entry.
* So we need to block accesses from such a process by checking
* PG_hwpoison bit here.
* If memory error occurs between mmap() and fault, some process
* don't have hwpoisoned swap entry for errored virtual address.
* So we need to block hugepage fault by PG_hwpoison bit check.
*/
if
(
unlikely
(
PageHWPoison
(
page
)))
{
ret
=
VM_FAULT_HWPOISON
;
ret
=
VM_FAULT_HWPOISON
|
VM_FAULT_SET_HINDEX
(
h
-
hstates
);
goto
backout_unlocked
;
}
page_dup_rmap
(
page
);
}
/*
* If we are going to COW a private mapping later, we examine the
...
...
@@ -2587,8 +2653,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
ptep
=
huge_pte_offset
(
mm
,
address
);
if
(
ptep
)
{
entry
=
huge_ptep_get
(
ptep
);
if
(
unlikely
(
is_hugetlb_entry_hwpoisoned
(
entry
)))
return
VM_FAULT_HWPOISON
;
if
(
unlikely
(
is_hugetlb_entry_migration
(
entry
)))
{
migration_entry_wait
(
mm
,
(
pmd_t
*
)
ptep
,
address
);
return
0
;
}
else
if
(
unlikely
(
is_hugetlb_entry_hwpoisoned
(
entry
)))
return
VM_FAULT_HWPOISON_LARGE
|
VM_FAULT_SET_HINDEX
(
h
-
hstates
);
}
ptep
=
huge_pte_alloc
(
mm
,
address
,
huge_page_size
(
h
));
...
...
@@ -2878,18 +2948,41 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
hugetlb_acct_memory
(
h
,
-
(
chg
-
freed
));
}
#ifdef CONFIG_MEMORY_FAILURE
/* Should be called in hugetlb_lock */
static
int
is_hugepage_on_freelist
(
struct
page
*
hpage
)
{
struct
page
*
page
;
struct
page
*
tmp
;
struct
hstate
*
h
=
page_hstate
(
hpage
);
int
nid
=
page_to_nid
(
hpage
);
list_for_each_entry_safe
(
page
,
tmp
,
&
h
->
hugepage_freelists
[
nid
],
lru
)
if
(
page
==
hpage
)
return
1
;
return
0
;
}
/*
* This function is called from memory failure code.
* Assume the caller holds page lock of the head page.
*/
void
__isolat
e_hwpoisoned_huge_page
(
struct
page
*
hpage
)
int
dequeu
e_hwpoisoned_huge_page
(
struct
page
*
hpage
)
{
struct
hstate
*
h
=
page_hstate
(
hpage
);
int
nid
=
page_to_nid
(
hpage
);
int
ret
=
-
EBUSY
;
spin_lock
(
&
hugetlb_lock
);
if
(
is_hugepage_on_freelist
(
hpage
))
{
list_del
(
&
hpage
->
lru
);
set_page_refcounted
(
hpage
);
h
->
free_huge_pages
--
;
h
->
free_huge_pages_node
[
nid
]
--
;
ret
=
0
;
}
spin_unlock
(
&
hugetlb_lock
);
return
ret
;
}
#endif
mm/memory-failure.c
浏览文件 @
46e387bb
...
...
@@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
* Issues:
* - Error on hugepage is contained in hugepage unit (not in raw page unit.)
* To narrow down kill region to one page, we need to break up pmd.
* - To support soft-offlining for hugepage, we need to support hugepage
* migration.
*/
static
int
me_huge_page
(
struct
page
*
p
,
unsigned
long
pfn
)
{
int
res
=
0
;
struct
page
*
hpage
=
compound_head
(
p
);
/*
* We can safely recover from error on free or reserved (i.e.
...
...
@@ -714,7 +713,8 @@ static int me_huge_page(struct page *p, unsigned long pfn)
* so there is no race between isolation and mapping/unmapping.
*/
if
(
!
(
page_mapping
(
hpage
)
||
PageAnon
(
hpage
)))
{
__isolate_hwpoisoned_huge_page
(
hpage
);
res
=
dequeue_hwpoisoned_huge_page
(
hpage
);
if
(
!
res
)
return
RECOVERED
;
}
return
DELAYED
;
...
...
@@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
* prep_new_page() will be the gate keeper.
* 2) it's part of a non-compound high order page.
* 2) it's a free hugepage, which is also safe:
* an affected hugepage will be dequeued from hugepage freelist,
* so there's no concern about reusing it ever after.
* 3) it's part of a non-compound high order page.
* Implies some kernel user: cannot stop them from
* R/W the page; let's pray that the page has been
* used and will be freed some time later.
...
...
@@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
if
(
is_free_buddy_page
(
p
))
{
action_result
(
pfn
,
"free buddy"
,
DELAYED
);
return
0
;
}
else
if
(
PageHuge
(
hpage
))
{
/*
* Check "just unpoisoned", "filter hit", and
* "race with other subpage."
*/
lock_page_nosync
(
hpage
);
if
(
!
PageHWPoison
(
hpage
)
||
(
hwpoison_filter
(
p
)
&&
TestClearPageHWPoison
(
p
))
||
(
p
!=
hpage
&&
TestSetPageHWPoison
(
hpage
)))
{
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
return
0
;
}
set_page_hwpoison_huge_page
(
hpage
);
res
=
dequeue_hwpoisoned_huge_page
(
hpage
);
action_result
(
pfn
,
"free huge"
,
res
?
IGNORED
:
DELAYED
);
unlock_page
(
hpage
);
return
res
;
}
else
{
action_result
(
pfn
,
"high order kernel"
,
IGNORED
);
return
-
EBUSY
;
...
...
@@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn)
nr_pages
=
1
<<
compound_order
(
page
);
if
(
!
get_page_unless_zero
(
page
))
{
/*
* Since HWPoisoned hugepage should have non-zero refcount,
* race between memory failure and unpoison seems to happen.
* In such case unpoison fails and memory failure runs
* to the end.
*/
if
(
PageHuge
(
page
))
{
pr_debug
(
"MCE: Memory failure is now running on free hugepage %#lx
\n
"
,
pfn
);
return
0
;
}
if
(
TestClearPageHWPoison
(
p
))
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
pr_info
(
"MCE: Software-unpoisoned free page %#lx
\n
"
,
pfn
);
...
...
@@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn)
pr_info
(
"MCE: Software-unpoisoned page %#lx
\n
"
,
pfn
);
atomic_long_sub
(
nr_pages
,
&
mce_bad_pages
);
freeit
=
1
;
}
if
(
PageHuge
(
p
))
if
(
PageHuge
(
page
))
clear_page_hwpoison_huge_page
(
page
);
}
unlock_page
(
page
);
put_page
(
page
);
...
...
@@ -1178,6 +1209,10 @@ EXPORT_SYMBOL(unpoison_memory);
static
struct
page
*
new_page
(
struct
page
*
p
,
unsigned
long
private
,
int
**
x
)
{
int
nid
=
page_to_nid
(
p
);
if
(
PageHuge
(
p
))
return
alloc_huge_page_node
(
page_hstate
(
compound_head
(
p
)),
nid
);
else
return
alloc_pages_exact_node
(
nid
,
GFP_HIGHUSER_MOVABLE
,
0
);
}
...
...
@@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
* was free.
*/
set_migratetype_isolate
(
p
);
/*
* When the target page is a free hugepage, just remove it
* from free hugepage list.
*/
if
(
!
get_page_unless_zero
(
compound_head
(
p
)))
{
if
(
is_free_buddy_page
(
p
))
{
if
(
PageHuge
(
p
))
{
pr_info
(
"get_any_page: %#lx free huge page
\n
"
,
pfn
);
ret
=
dequeue_hwpoisoned_huge_page
(
compound_head
(
p
));
}
else
if
(
is_free_buddy_page
(
p
))
{
pr_info
(
"get_any_page: %#lx free buddy page
\n
"
,
pfn
);
/* Set hwpoison bit while page is still isolated */
SetPageHWPoison
(
p
);
...
...
@@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
return
ret
;
}
static
int
soft_offline_huge_page
(
struct
page
*
page
,
int
flags
)
{
int
ret
;
unsigned
long
pfn
=
page_to_pfn
(
page
);
struct
page
*
hpage
=
compound_head
(
page
);
LIST_HEAD
(
pagelist
);
ret
=
get_any_page
(
page
,
pfn
,
flags
);
if
(
ret
<
0
)
return
ret
;
if
(
ret
==
0
)
goto
done
;
if
(
PageHWPoison
(
hpage
))
{
put_page
(
hpage
);
pr_debug
(
"soft offline: %#lx hugepage already poisoned
\n
"
,
pfn
);
return
-
EBUSY
;
}
/* Keep page count to indicate a given hugepage is isolated. */
list_add
(
&
hpage
->
lru
,
&
pagelist
);
ret
=
migrate_huge_pages
(
&
pagelist
,
new_page
,
MPOL_MF_MOVE_ALL
,
0
);
if
(
ret
)
{
pr_debug
(
"soft offline: %#lx: migration failed %d, type %lx
\n
"
,
pfn
,
ret
,
page
->
flags
);
if
(
ret
>
0
)
ret
=
-
EIO
;
return
ret
;
}
done:
if
(
!
PageHWPoison
(
hpage
))
atomic_long_add
(
1
<<
compound_order
(
hpage
),
&
mce_bad_pages
);
set_page_hwpoison_huge_page
(
hpage
);
dequeue_hwpoisoned_huge_page
(
hpage
);
/* keep elevated page count for bad page */
return
ret
;
}
/**
* soft_offline_page - Soft offline a page.
* @page: page to offline
...
...
@@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags)
int
ret
;
unsigned
long
pfn
=
page_to_pfn
(
page
);
if
(
PageHuge
(
page
))
return
soft_offline_huge_page
(
page
,
flags
);
ret
=
get_any_page
(
page
,
pfn
,
flags
);
if
(
ret
<
0
)
return
ret
;
...
...
mm/memory.c
浏览文件 @
46e387bb
...
...
@@ -1450,7 +1450,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if
(
ret
&
VM_FAULT_OOM
)
return
i
?
i
:
-
ENOMEM
;
if
(
ret
&
(
VM_FAULT_HWPOISON
|
VM_FAULT_SIGBUS
))
(
VM_FAULT_HWPOISON
|
VM_FAULT_HWPOISON_LARGE
|
VM_FAULT_SIGBUS
))
return
i
?
i
:
-
EFAULT
;
BUG
();
}
...
...
mm/migrate.c
浏览文件 @
46e387bb
...
...
@@ -32,6 +32,7 @@
#include <linux/security.h>
#include <linux/memcontrol.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/gfp.h>
#include "internal.h"
...
...
@@ -95,6 +96,12 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte_t
*
ptep
,
pte
;
spinlock_t
*
ptl
;
if
(
unlikely
(
PageHuge
(
new
)))
{
ptep
=
huge_pte_offset
(
mm
,
addr
);
if
(
!
ptep
)
goto
out
;
ptl
=
&
mm
->
page_table_lock
;
}
else
{
pgd
=
pgd_offset
(
mm
,
addr
);
if
(
!
pgd_present
(
*
pgd
))
goto
out
;
...
...
@@ -115,6 +122,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
}
ptl
=
pte_lockptr
(
mm
,
pmd
);
}
spin_lock
(
ptl
);
pte
=
*
ptep
;
if
(
!
is_swap_pte
(
pte
))
...
...
@@ -130,10 +139,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
pte
=
pte_mkold
(
mk_pte
(
new
,
vma
->
vm_page_prot
));
if
(
is_write_migration_entry
(
entry
))
pte
=
pte_mkwrite
(
pte
);
#ifdef CONFIG_HUGETLB_PAGE
if
(
PageHuge
(
new
))
pte
=
pte_mkhuge
(
pte
);
#endif
flush_cache_page
(
vma
,
addr
,
pte_pfn
(
pte
));
set_pte_at
(
mm
,
addr
,
ptep
,
pte
);
if
(
PageHuge
(
new
))
{
if
(
PageAnon
(
new
))
hugepage_add_anon_rmap
(
new
,
vma
,
addr
);
else
page_dup_rmap
(
new
);
}
else
if
(
PageAnon
(
new
))
page_add_anon_rmap
(
new
,
vma
,
addr
);
else
page_add_file_rmap
(
new
);
...
...
@@ -275,11 +293,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
return
0
;
}
/*
* The expected number of remaining references is the same as that
* of migrate_page_move_mapping().
*/
int
migrate_huge_page_move_mapping
(
struct
address_space
*
mapping
,
struct
page
*
newpage
,
struct
page
*
page
)
{
int
expected_count
;
void
**
pslot
;
if
(
!
mapping
)
{
if
(
page_count
(
page
)
!=
1
)
return
-
EAGAIN
;
return
0
;
}
spin_lock_irq
(
&
mapping
->
tree_lock
);
pslot
=
radix_tree_lookup_slot
(
&
mapping
->
page_tree
,
page_index
(
page
));
expected_count
=
2
+
page_has_private
(
page
);
if
(
page_count
(
page
)
!=
expected_count
||
(
struct
page
*
)
radix_tree_deref_slot
(
pslot
)
!=
page
)
{
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
-
EAGAIN
;
}
if
(
!
page_freeze_refs
(
page
,
expected_count
))
{
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
-
EAGAIN
;
}
get_page
(
newpage
);
radix_tree_replace_slot
(
pslot
,
newpage
);
page_unfreeze_refs
(
page
,
expected_count
);
__put_page
(
page
);
spin_unlock_irq
(
&
mapping
->
tree_lock
);
return
0
;
}
/*
* Copy the page to its new location
*/
static
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
void
migrate_page_copy
(
struct
page
*
newpage
,
struct
page
*
page
)
{
if
(
PageHuge
(
page
))
copy_huge_page
(
newpage
,
page
);
else
copy_highpage
(
newpage
,
page
);
if
(
PageError
(
page
))
...
...
@@ -723,6 +789,92 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
return
rc
;
}
/*
* Counterpart of unmap_and_move_page() for hugepage migration.
*
* This function doesn't wait the completion of hugepage I/O
* because there is no race between I/O and migration for hugepage.
* Note that currently hugepage I/O occurs only in direct I/O
* where no lock is held and PG_writeback is irrelevant,
* and writeback status of all subpages are counted in the reference
* count of the head page (i.e. if all subpages of a 2MB hugepage are
* under direct I/O, the reference of the head page is 512 and a bit more.)
* This means that when we try to migrate hugepage whose subpages are
* doing direct I/O, some references remain after try_to_unmap() and
* hugepage migration fails without data corruption.
*
* There is also no race when direct I/O is issued on the page under migration,
* because then pte is replaced with migration swap entry and direct I/O code
* will wait in the page fault for migration to complete.
*/
static
int
unmap_and_move_huge_page
(
new_page_t
get_new_page
,
unsigned
long
private
,
struct
page
*
hpage
,
int
force
,
int
offlining
)
{
int
rc
=
0
;
int
*
result
=
NULL
;
struct
page
*
new_hpage
=
get_new_page
(
hpage
,
private
,
&
result
);
int
rcu_locked
=
0
;
struct
anon_vma
*
anon_vma
=
NULL
;
if
(
!
new_hpage
)
return
-
ENOMEM
;
rc
=
-
EAGAIN
;
if
(
!
trylock_page
(
hpage
))
{
if
(
!
force
)
goto
out
;
lock_page
(
hpage
);
}
if
(
PageAnon
(
hpage
))
{
rcu_read_lock
();
rcu_locked
=
1
;
if
(
page_mapped
(
hpage
))
{
anon_vma
=
page_anon_vma
(
hpage
);
atomic_inc
(
&
anon_vma
->
external_refcount
);
}
}
try_to_unmap
(
hpage
,
TTU_MIGRATION
|
TTU_IGNORE_MLOCK
|
TTU_IGNORE_ACCESS
);
if
(
!
page_mapped
(
hpage
))
rc
=
move_to_new_page
(
new_hpage
,
hpage
,
1
);
if
(
rc
)
remove_migration_ptes
(
hpage
,
hpage
);
if
(
anon_vma
&&
atomic_dec_and_lock
(
&
anon_vma
->
external_refcount
,
&
anon_vma
->
lock
))
{
int
empty
=
list_empty
(
&
anon_vma
->
head
);
spin_unlock
(
&
anon_vma
->
lock
);
if
(
empty
)
anon_vma_free
(
anon_vma
);
}
if
(
rcu_locked
)
rcu_read_unlock
();
out:
unlock_page
(
hpage
);
if
(
rc
!=
-
EAGAIN
)
{
list_del
(
&
hpage
->
lru
);
put_page
(
hpage
);
}
put_page
(
new_hpage
);
if
(
result
)
{
if
(
rc
)
*
result
=
rc
;
else
*
result
=
page_to_nid
(
new_hpage
);
}
return
rc
;
}
/*
* migrate_pages
*
...
...
@@ -788,6 +940,52 @@ int migrate_pages(struct list_head *from,
return
nr_failed
+
retry
;
}
int
migrate_huge_pages
(
struct
list_head
*
from
,
new_page_t
get_new_page
,
unsigned
long
private
,
int
offlining
)
{
int
retry
=
1
;
int
nr_failed
=
0
;
int
pass
=
0
;
struct
page
*
page
;
struct
page
*
page2
;
int
rc
;
for
(
pass
=
0
;
pass
<
10
&&
retry
;
pass
++
)
{
retry
=
0
;
list_for_each_entry_safe
(
page
,
page2
,
from
,
lru
)
{
cond_resched
();
rc
=
unmap_and_move_huge_page
(
get_new_page
,
private
,
page
,
pass
>
2
,
offlining
);
switch
(
rc
)
{
case
-
ENOMEM
:
goto
out
;
case
-
EAGAIN
:
retry
++
;
break
;
case
0
:
break
;
default:
/* Permanent failure */
nr_failed
++
;
break
;
}
}
}
rc
=
0
;
out:
list_for_each_entry_safe
(
page
,
page2
,
from
,
lru
)
put_page
(
page
);
if
(
rc
)
return
rc
;
return
nr_failed
+
retry
;
}
#ifdef CONFIG_NUMA
/*
* Move a list of individual pages
...
...
mm/rmap.c
浏览文件 @
46e387bb
...
...
@@ -780,10 +780,10 @@ void page_move_anon_rmap(struct page *page,
}
/**
* __page_set_anon_rmap - setup new anonymous rmap
* @page:
the page to add the mapping to
* @vma:
the vm area in which the mapping is added
* @address:
the user virtual address mapped
* __page_set_anon_rmap - set
up new anonymous rmap
* @page:
Page to add to rmap
* @vma:
VM area to add page to.
* @address:
User virtual address of the mapping
* @exclusive: the page is exclusively owned by the current process
*/
static
void
__page_set_anon_rmap
(
struct
page
*
page
,
...
...
@@ -793,25 +793,16 @@ static void __page_set_anon_rmap(struct page *page,
BUG_ON
(
!
anon_vma
);
if
(
PageAnon
(
page
))
return
;
/*
* If the page isn't exclusively mapped into this vma,
* we must use the _oldest_ possible anon_vma for the
* page mapping!
*/
if
(
!
exclusive
)
{
if
(
PageAnon
(
page
))
return
;
if
(
!
exclusive
)
anon_vma
=
anon_vma
->
root
;
}
else
{
/*
* In this case, swapped-out-but-not-discarded swap-cache
* is remapped. So, no need to update page->mapping here.
* We convice anon_vma poitned by page->mapping is not obsolete
* because vma->anon_vma is necessary to be a family of it.
*/
if
(
PageAnon
(
page
))
return
;
}
anon_vma
=
(
void
*
)
anon_vma
+
PAGE_MAPPING_ANON
;
page
->
mapping
=
(
struct
address_space
*
)
anon_vma
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录