Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
b5684579
K
Kernel
项目概览
openeuler
/
Kernel
大约 1 年 前同步成功
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b5684579
编写于
6月 08, 2018
作者:
D
Dan Williams
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'for-4.18/dax' into libnvdimm-for-next
上级
808c340b
cc4a90ac
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
545 addition
and
295 deletion
+545
-295
drivers/dax/super.c
drivers/dax/super.c
+11
-3
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pfn_devs.c
+0
-2
drivers/nvdimm/pmem.c
drivers/nvdimm/pmem.c
+25
-0
fs/Kconfig
fs/Kconfig
+1
-0
fs/dax.c
fs/dax.c
+99
-16
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+66
-6
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode.h
+16
-0
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.c
+2
-6
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.c
+10
-6
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_pnfs.c
+8
-7
fs/xfs/xfs_pnfs.h
fs/xfs/xfs_pnfs.h
+3
-2
include/linux/dax.h
include/linux/dax.h
+7
-0
include/linux/memremap.h
include/linux/memremap.h
+10
-26
include/linux/mm.h
include/linux/mm.h
+53
-18
kernel/Makefile
kernel/Makefile
+2
-1
kernel/iomem.c
kernel/iomem.c
+167
-0
kernel/memremap.c
kernel/memremap.c
+30
-180
mm/Kconfig
mm/Kconfig
+5
-0
mm/gup.c
mm/gup.c
+26
-10
mm/hmm.c
mm/hmm.c
+2
-11
mm/swap.c
mm/swap.c
+2
-1
未找到文件。
drivers/dax/super.c
浏览文件 @
b5684579
...
...
@@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
{
struct
block_device
*
bdev
=
sb
->
s_bdev
;
struct
dax_device
*
dax_dev
;
bool
dax_enabled
=
false
;
pgoff_t
pgoff
;
int
err
,
id
;
void
*
kaddr
;
...
...
@@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON
(
IS_ENABLED
(
CONFIG_ARCH_HAS_PMEM_API
));
dax_enabled
=
true
;
}
else
if
(
pfn_t_devmap
(
pfn
))
{
/* pass */
;
}
else
{
struct
dev_pagemap
*
pgmap
;
pgmap
=
get_dev_pagemap
(
pfn_t_to_pfn
(
pfn
),
NULL
);
if
(
pgmap
&&
pgmap
->
type
==
MEMORY_DEVICE_FS_DAX
)
dax_enabled
=
true
;
put_dev_pagemap
(
pgmap
);
}
if
(
!
dax_enabled
)
{
pr_debug
(
"VFS (%s): error: dax support not enabled
\n
"
,
sb
->
s_id
);
return
-
EOPNOTSUPP
;
}
return
0
;
}
EXPORT_SYMBOL_GPL
(
__bdev_dax_supported
);
...
...
drivers/nvdimm/pfn_devs.c
浏览文件 @
b5684579
...
...
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
res
->
start
+=
start_pad
;
res
->
end
-=
end_trunc
;
pgmap
->
type
=
MEMORY_DEVICE_HOST
;
if
(
nd_pfn
->
mode
==
PFN_MODE_RAM
)
{
if
(
offset
<
SZ_8K
)
return
-
EINVAL
;
...
...
drivers/nvdimm/pmem.c
浏览文件 @
b5684579
...
...
@@ -289,6 +289,27 @@ static void pmem_release_disk(void *__pmem)
put_disk
(
pmem
->
disk
);
}
static
void
pmem_release_pgmap_ops
(
void
*
__pgmap
)
{
dev_pagemap_put_ops
();
}
static
void
fsdax_pagefree
(
struct
page
*
page
,
void
*
data
)
{
wake_up_var
(
&
page
->
_refcount
);
}
static
int
setup_pagemap_fsdax
(
struct
device
*
dev
,
struct
dev_pagemap
*
pgmap
)
{
dev_pagemap_get_ops
();
if
(
devm_add_action_or_reset
(
dev
,
pmem_release_pgmap_ops
,
pgmap
))
return
-
ENOMEM
;
pgmap
->
type
=
MEMORY_DEVICE_FS_DAX
;
pgmap
->
page_free
=
fsdax_pagefree
;
return
0
;
}
static
int
pmem_attach_disk
(
struct
device
*
dev
,
struct
nd_namespace_common
*
ndns
)
{
...
...
@@ -347,6 +368,8 @@ static int pmem_attach_disk(struct device *dev,
pmem
->
pfn_flags
=
PFN_DEV
;
pmem
->
pgmap
.
ref
=
&
q
->
q_usage_counter
;
if
(
is_nd_pfn
(
dev
))
{
if
(
setup_pagemap_fsdax
(
dev
,
&
pmem
->
pgmap
))
return
-
ENOMEM
;
addr
=
devm_memremap_pages
(
dev
,
&
pmem
->
pgmap
);
pfn_sb
=
nd_pfn
->
pfn_sb
;
pmem
->
data_offset
=
le64_to_cpu
(
pfn_sb
->
dataoff
);
...
...
@@ -358,6 +381,8 @@ static int pmem_attach_disk(struct device *dev,
}
else
if
(
pmem_should_map_pages
(
dev
))
{
memcpy
(
&
pmem
->
pgmap
.
res
,
&
nsio
->
res
,
sizeof
(
pmem
->
pgmap
.
res
));
pmem
->
pgmap
.
altmap_valid
=
false
;
if
(
setup_pagemap_fsdax
(
dev
,
&
pmem
->
pgmap
))
return
-
ENOMEM
;
addr
=
devm_memremap_pages
(
dev
,
&
pmem
->
pgmap
);
pmem
->
pfn_flags
|=
PFN_MAP
;
memcpy
(
&
bb_res
,
&
pmem
->
pgmap
.
res
,
sizeof
(
bb_res
));
...
...
fs/Kconfig
浏览文件 @
b5684579
...
...
@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
select FS_IOMAP
select DAX
help
...
...
fs/dax.c
浏览文件 @
b5684579
...
...
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
}
}
static
struct
page
*
dax_busy_page
(
void
*
entry
)
{
unsigned
long
pfn
;
for_each_mapped_pfn
(
entry
,
pfn
)
{
struct
page
*
page
=
pfn_to_page
(
pfn
);
if
(
page_ref_count
(
page
)
>
1
)
return
page
;
}
return
NULL
;
}
/*
* Find radix tree entry at given index. If it points to an exceptional entry,
* return it with the radix tree entry locked. If the radix tree doesn't
...
...
@@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
return
entry
;
}
/**
* dax_layout_busy_page - find first pinned page in @mapping
* @mapping: address space to scan for a page with ref count > 1
*
* DAX requires ZONE_DEVICE mapped pages. These pages are never
* 'onlined' to the page allocator so they are considered idle when
* page->count == 1. A filesystem uses this interface to determine if
* any page in the mapping is busy, i.e. for DMA, or other
* get_user_pages() usages.
*
* It is expected that the filesystem is holding locks to block the
* establishment of new mappings in this address_space. I.e. it expects
* to be able to run unmap_mapping_range() and subsequently not race
* mapping_mapped() becoming true.
*/
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
)
{
pgoff_t
indices
[
PAGEVEC_SIZE
];
struct
page
*
page
=
NULL
;
struct
pagevec
pvec
;
pgoff_t
index
,
end
;
unsigned
i
;
/*
* In the 'limited' case get_user_pages() for dax is disabled.
*/
if
(
IS_ENABLED
(
CONFIG_FS_DAX_LIMITED
))
return
NULL
;
if
(
!
dax_mapping
(
mapping
)
||
!
mapping_mapped
(
mapping
))
return
NULL
;
pagevec_init
(
&
pvec
);
index
=
0
;
end
=
-
1
;
/*
* If we race get_user_pages_fast() here either we'll see the
* elevated page count in the pagevec_lookup and wait, or
* get_user_pages_fast() will see that the page it took a reference
* against is no longer mapped in the page tables and bail to the
* get_user_pages() slow path. The slow path is protected by
* pte_lock() and pmd_lock(). New references are not taken without
* holding those locks, and unmap_mapping_range() will not zero the
* pte or pmd without holding the respective lock, so we are
* guaranteed to either see new references or prevent new
* references from being established.
*/
unmap_mapping_range
(
mapping
,
0
,
0
,
1
);
while
(
index
<
end
&&
pagevec_lookup_entries
(
&
pvec
,
mapping
,
index
,
min
(
end
-
index
,
(
pgoff_t
)
PAGEVEC_SIZE
),
indices
))
{
for
(
i
=
0
;
i
<
pagevec_count
(
&
pvec
);
i
++
)
{
struct
page
*
pvec_ent
=
pvec
.
pages
[
i
];
void
*
entry
;
index
=
indices
[
i
];
if
(
index
>=
end
)
break
;
if
(
!
radix_tree_exceptional_entry
(
pvec_ent
))
continue
;
xa_lock_irq
(
&
mapping
->
i_pages
);
entry
=
get_unlocked_mapping_entry
(
mapping
,
index
,
NULL
);
if
(
entry
)
page
=
dax_busy_page
(
entry
);
put_unlocked_mapping_entry
(
mapping
,
index
,
entry
);
xa_unlock_irq
(
&
mapping
->
i_pages
);
if
(
page
)
break
;
}
pagevec_remove_exceptionals
(
&
pvec
);
pagevec_release
(
&
pvec
);
index
++
;
if
(
page
)
break
;
}
return
page
;
}
EXPORT_SYMBOL_GPL
(
dax_layout_busy_page
);
static
int
__dax_invalidate_mapping_entry
(
struct
address_space
*
mapping
,
pgoff_t
index
,
bool
trunc
)
{
...
...
@@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
unsigned
long
vaddr
=
vmf
->
address
;
int
ret
=
VM_FAULT_NOPAGE
;
struct
page
*
zero_page
;
void
*
entry2
;
pfn_t
pfn
;
zero_page
=
ZERO_PAGE
(
0
);
...
...
@@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
}
pfn
=
page_to_pfn_t
(
zero_page
);
entry2
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_ZERO_PAGE
,
false
);
if
(
IS_ERR
(
entry2
))
{
ret
=
VM_FAULT_SIGBUS
;
goto
out
;
}
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_ZERO_PAGE
,
false
);
vm_insert_mixed
(
vmf
->
vma
,
vaddr
,
pfn
);
out:
trace_dax_load_hole
(
inode
,
vmf
,
ret
);
...
...
@@ -1240,10 +1331,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
0
,
write
&&
!
sync
);
if
(
IS_ERR
(
entry
))
{
error
=
PTR_ERR
(
entry
);
goto
error_finish_iomap
;
}
/*
* If we are doing synchronous page fault and inode needs fsync,
...
...
@@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
pfn
=
page_to_pfn_t
(
zero_page
);
ret
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_PMD
|
RADIX_DAX_ZERO_PAGE
,
false
);
if
(
IS_ERR
(
ret
))
goto
fallback
;
ptl
=
pmd_lock
(
vmf
->
vma
->
vm_mm
,
vmf
->
pmd
);
if
(
!
pmd_none
(
*
(
vmf
->
pmd
)))
{
...
...
@@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
entry
=
dax_insert_mapping_entry
(
mapping
,
vmf
,
entry
,
pfn
,
RADIX_DAX_PMD
,
write
&&
!
sync
);
if
(
IS_ERR
(
entry
))
goto
finish_iomap
;
/*
* If we are doing synchronous page fault and inode needs fsync,
...
...
fs/xfs/xfs_file.c
浏览文件 @
b5684579
...
...
@@ -312,7 +312,7 @@ xfs_file_aio_write_checks(
if
(
error
<=
0
)
return
error
;
error
=
xfs_break_layouts
(
inode
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
iolock
,
BREAK_WRITE
);
if
(
error
)
return
error
;
...
...
@@ -718,6 +718,69 @@ xfs_file_write_iter(
return
ret
;
}
static
void
xfs_wait_dax_page
(
struct
inode
*
inode
,
bool
*
did_unlock
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
*
did_unlock
=
true
;
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
schedule
();
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
}
static
int
xfs_break_dax_layouts
(
struct
inode
*
inode
,
uint
iolock
,
bool
*
did_unlock
)
{
struct
page
*
page
;
ASSERT
(
xfs_isilocked
(
XFS_I
(
inode
),
XFS_MMAPLOCK_EXCL
));
page
=
dax_layout_busy_page
(
inode
->
i_mapping
);
if
(
!
page
)
return
0
;
return
___wait_var_event
(
&
page
->
_refcount
,
atomic_read
(
&
page
->
_refcount
)
==
1
,
TASK_INTERRUPTIBLE
,
0
,
0
,
xfs_wait_dax_page
(
inode
,
did_unlock
));
}
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
enum
layout_break_reason
reason
)
{
bool
retry
;
int
error
;
ASSERT
(
xfs_isilocked
(
XFS_I
(
inode
),
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
do
{
retry
=
false
;
switch
(
reason
)
{
case
BREAK_UNMAP
:
error
=
xfs_break_dax_layouts
(
inode
,
*
iolock
,
&
retry
);
if
(
error
||
retry
)
break
;
/* fall through */
case
BREAK_WRITE
:
error
=
xfs_break_leased_layouts
(
inode
,
iolock
,
&
retry
);
break
;
default:
WARN_ON_ONCE
(
1
);
error
=
-
EINVAL
;
}
}
while
(
error
==
0
&&
retry
);
return
error
;
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
...
...
@@ -734,7 +797,7 @@ xfs_file_fallocate(
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
long
error
;
enum
xfs_prealloc_flags
flags
=
0
;
uint
iolock
=
XFS_IOLOCK_EXCL
;
uint
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
loff_t
new_size
=
0
;
bool
do_file_insert
=
false
;
...
...
@@ -744,13 +807,10 @@ xfs_file_fallocate(
return
-
EOPNOTSUPP
;
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
if
(
mode
&
FALLOC_FL_PUNCH_HOLE
)
{
error
=
xfs_free_file_space
(
ip
,
offset
,
len
);
if
(
error
)
...
...
fs/xfs/xfs_inode.h
浏览文件 @
b5684579
...
...
@@ -378,6 +378,20 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
>> XFS_ILOCK_SHIFT)
/*
* Layouts are broken in the BREAK_WRITE case to ensure that
* layout-holders do not collide with local writes. Additionally,
* layouts are broken in the BREAK_UNMAP case to make sure the
* layout-holder has a consistent view of the file's extent map. While
* BREAK_WRITE breaks can be satisfied by recalling FL_LAYOUT leases,
* BREAK_UNMAP breaks additionally require waiting for busy dax-pages to
* go idle.
*/
enum
layout_break_reason
{
BREAK_WRITE
,
BREAK_UNMAP
,
};
/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
...
...
@@ -443,6 +457,8 @@ enum xfs_prealloc_flags {
int
xfs_update_prealloc_flags
(
struct
xfs_inode
*
ip
,
enum
xfs_prealloc_flags
flags
);
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
enum
layout_break_reason
reason
);
/* from xfs_iops.c */
extern
void
xfs_setup_inode
(
struct
xfs_inode
*
ip
);
...
...
fs/xfs/xfs_ioctl.c
浏览文件 @
b5684579
...
...
@@ -39,7 +39,6 @@
#include "xfs_icache.h"
#include "xfs_symlink.h"
#include "xfs_trans.h"
#include "xfs_pnfs.h"
#include "xfs_acl.h"
#include "xfs_btree.h"
#include <linux/fsmap.h>
...
...
@@ -614,7 +613,7 @@ xfs_ioc_space(
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
iattr
iattr
;
enum
xfs_prealloc_flags
flags
=
0
;
uint
iolock
=
XFS_IOLOCK_EXCL
;
uint
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
int
error
;
/*
...
...
@@ -644,13 +643,10 @@ xfs_ioc_space(
return
error
;
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
);
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
switch
(
bf
->
l_whence
)
{
case
0
:
/*SEEK_SET*/
break
;
...
...
fs/xfs/xfs_iops.c
浏览文件 @
b5684579
...
...
@@ -37,7 +37,6 @@
#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_trans_space.h"
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include <linux/capability.h>
...
...
@@ -1030,14 +1029,19 @@ xfs_vn_setattr(
int
error
;
if
(
iattr
->
ia_valid
&
ATTR_SIZE
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
d_inode
(
dentry
));
uint
iolock
=
XFS_IOLOCK_EXCL
;
struct
inode
*
inode
=
d_inode
(
dentry
);
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
uint
iolock
;
error
=
xfs_break_layouts
(
d_inode
(
dentry
),
&
iolock
);
if
(
error
)
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
error
=
xfs_break_layouts
(
inode
,
&
iolock
,
BREAK_UNMAP
);
if
(
error
)
{
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
return
error
;
}
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
error
=
xfs_vn_setattr_size
(
dentry
,
iattr
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_EXCL
);
}
else
{
...
...
fs/xfs/xfs_pnfs.c
浏览文件 @
b5684579
...
...
@@ -31,19 +31,20 @@
* rules in the page fault path we don't bother.
*/
int
xfs_break_layouts
(
xfs_break_l
eased_l
ayouts
(
struct
inode
*
inode
,
uint
*
iolock
)
uint
*
iolock
,
bool
*
did_unlock
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
int
error
;
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
while
((
error
=
break_layout
(
inode
,
false
)
==
-
EWOULDBLOCK
))
{
xfs_iunlock
(
ip
,
*
iolock
);
*
did_unlock
=
true
;
error
=
break_layout
(
inode
,
true
);
*
iolock
=
XFS_IOLOCK_EXCL
;
*
iolock
&=
~
XFS_IOLOCK_SHARED
;
*
iolock
|=
XFS_IOLOCK_EXCL
;
xfs_ilock
(
ip
,
*
iolock
);
}
...
...
@@ -120,8 +121,8 @@ xfs_fs_map_blocks(
* Lock out any other I/O before we flush and invalidate the pagecache,
* and then hand out a layout to the remote system. This is very
* similar to direct I/O, except that the synchronization is much more
* complicated. See the comment near xfs_break_l
ayouts for a detailed
* explanation.
* complicated. See the comment near xfs_break_l
eased_layouts
*
for a detailed
explanation.
*/
xfs_ilock
(
ip
,
XFS_IOLOCK_EXCL
);
...
...
fs/xfs/xfs_pnfs.h
浏览文件 @
b5684579
...
...
@@ -9,10 +9,11 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
int
xfs_fs_commit_blocks
(
struct
inode
*
inode
,
struct
iomap
*
maps
,
int
nr_maps
,
struct
iattr
*
iattr
);
int
xfs_break_layouts
(
struct
inode
*
inode
,
uint
*
iolock
);
int
xfs_break_leased_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
bool
*
did_unlock
);
#else
static
inline
int
xfs_break_l
ayouts
(
struct
inode
*
inode
,
uint
*
io
lock
)
xfs_break_l
eased_layouts
(
struct
inode
*
inode
,
uint
*
iolock
,
bool
*
did_un
lock
)
{
return
0
;
}
...
...
include/linux/dax.h
浏览文件 @
b5684579
...
...
@@ -83,6 +83,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
struct
dax_device
*
fs_dax_get_by_bdev
(
struct
block_device
*
bdev
);
int
dax_writeback_mapping_range
(
struct
address_space
*
mapping
,
struct
block_device
*
bdev
,
struct
writeback_control
*
wbc
);
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
);
#else
static
inline
int
bdev_dax_supported
(
struct
super_block
*
sb
,
int
blocksize
)
{
...
...
@@ -103,6 +105,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
return
NULL
;
}
static
inline
struct
page
*
dax_layout_busy_page
(
struct
address_space
*
mapping
)
{
return
NULL
;
}
static
inline
int
dax_writeback_mapping_range
(
struct
address_space
*
mapping
,
struct
block_device
*
bdev
,
struct
writeback_control
*
wbc
)
{
...
...
include/linux/memremap.h
浏览文件 @
b5684579
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
#include <linux/mm.h>
#include <linux/ioport.h>
#include <linux/percpu-refcount.h>
...
...
@@ -30,13 +29,6 @@ struct vmem_altmap {
* Specialize ZONE_DEVICE memory into multiple types each having differents
* usage.
*
* MEMORY_DEVICE_HOST:
* Persistent device memory (pmem): struct page might be allocated in different
* memory and architecture might want to perform special actions. It is similar
* to regular memory, in that the CPU can access it transparently. However,
* it is likely to have different bandwidth and latency than regular memory.
* See Documentation/nvdimm/nvdimm.txt for more information.
*
* MEMORY_DEVICE_PRIVATE:
* Device memory that is not directly addressable by the CPU: CPU can neither
* read nor write private memory. In this case, we do still have struct pages
...
...
@@ -53,11 +45,19 @@ struct vmem_altmap {
* driver can hotplug the device memory using ZONE_DEVICE and with that memory
* type. Any page of a process can be migrated to such memory. However no one
* should be allow to pin such memory so that it can always be evicted.
*
* MEMORY_DEVICE_FS_DAX:
* Host memory that has similar access semantics as System RAM i.e. DMA
* coherent and supports page pinning. In support of coordinating page
* pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
* wakeup event whenever a page is unpinned and becomes idle. This
* wakeup is used to coordinate physical address space management (ex:
* fs truncate/hole punch) vs pinned pages (ex: device dma).
*/
enum
memory_type
{
MEMORY_DEVICE_HOST
=
0
,
MEMORY_DEVICE_PRIVATE
,
MEMORY_DEVICE_PRIVATE
=
1
,
MEMORY_DEVICE_PUBLIC
,
MEMORY_DEVICE_FS_DAX
,
};
/*
...
...
@@ -129,8 +129,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
unsigned
long
vmem_altmap_offset
(
struct
vmem_altmap
*
altmap
);
void
vmem_altmap_free
(
struct
vmem_altmap
*
altmap
,
unsigned
long
nr_pfns
);
static
inline
bool
is_zone_device_page
(
const
struct
page
*
page
);
#else
static
inline
void
*
devm_memremap_pages
(
struct
device
*
dev
,
struct
dev_pagemap
*
pgmap
)
...
...
@@ -161,20 +159,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
}
#endif
/* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PRIVATE
;
}
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PUBLIC
;
}
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
static
inline
void
put_dev_pagemap
(
struct
dev_pagemap
*
pgmap
)
{
if
(
pgmap
)
...
...
include/linux/mm.h
浏览文件 @
b5684579
...
...
@@ -821,27 +821,65 @@ static inline bool is_zone_device_page(const struct page *page)
}
#endif
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
void
put_zone_device_private_or_public_page
(
struct
page
*
page
);
DECLARE_STATIC_KEY_FALSE
(
device_private_key
);
#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
);
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
);
#else
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
static
inline
void
put_zone_device_private_or_public_page
(
struct
page
*
page
)
#ifdef CONFIG_DEV_PAGEMAP_OPS
void
dev_pagemap_get_ops
(
void
);
void
dev_pagemap_put_ops
(
void
);
void
__put_devmap_managed_page
(
struct
page
*
page
);
DECLARE_STATIC_KEY_FALSE
(
devmap_managed_key
);
static
inline
bool
put_devmap_managed_page
(
struct
page
*
page
)
{
if
(
!
static_branch_unlikely
(
&
devmap_managed_key
))
return
false
;
if
(
!
is_zone_device_page
(
page
))
return
false
;
switch
(
page
->
pgmap
->
type
)
{
case
MEMORY_DEVICE_PRIVATE
:
case
MEMORY_DEVICE_PUBLIC
:
case
MEMORY_DEVICE_FS_DAX
:
__put_devmap_managed_page
(
page
);
return
true
;
default:
break
;
}
return
false
;
}
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PRIVATE
;
}
#define IS_HMM_ENABLED 0
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
is_zone_device_page
(
page
)
&&
page
->
pgmap
->
type
==
MEMORY_DEVICE_PUBLIC
;
}
#else
/* CONFIG_DEV_PAGEMAP_OPS */
static
inline
void
dev_pagemap_get_ops
(
void
)
{
}
static
inline
void
dev_pagemap_put_ops
(
void
)
{
}
static
inline
bool
put_devmap_managed_page
(
struct
page
*
page
)
{
return
false
;
}
static
inline
bool
is_device_private_page
(
const
struct
page
*
page
)
{
return
false
;
}
static
inline
bool
is_device_public_page
(
const
struct
page
*
page
)
{
return
false
;
}
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#endif
/* CONFIG_DEV_PAGEMAP_OPS */
static
inline
void
get_page
(
struct
page
*
page
)
{
...
...
@@ -859,16 +897,13 @@ static inline void put_page(struct page *page)
page
=
compound_head
(
page
);
/*
* For
private device
pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the p
rivate device page is
*
free and we
need to inform the device driver through callback. See
* For
devmap managed
pages we need to catch refcount transition from
* 2 to 1, when refcount reach one it means the p
age is free and we
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
if
(
IS_HMM_ENABLED
&&
unlikely
(
is_device_private_page
(
page
)
||
unlikely
(
is_device_public_page
(
page
))))
{
put_zone_device_private_or_public_page
(
page
);
if
(
put_devmap_managed_page
(
page
))
return
;
}
if
(
put_page_testzero
(
page
))
__put_page
(
page
);
...
...
kernel/Makefile
浏览文件 @
b5684579
...
...
@@ -112,7 +112,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING)
+=
context_tracking.o
obj-$(CONFIG_TORTURE_TEST)
+=
torture.o
obj-$(CONFIG_HAS_IOMEM)
+=
memremap.o
obj-$(CONFIG_HAS_IOMEM)
+=
iomem.o
obj-$(CONFIG_ZONE_DEVICE)
+=
memremap.o
$(obj)/configs.o
:
$(obj)/config_data.h
...
...
kernel/iomem.c
0 → 100644
浏览文件 @
b5684579
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/device.h>
#include <linux/types.h>
#include <linux/io.h>
#include <linux/mm.h>
#ifndef ioremap_cache
/* temporary while we convert existing ioremap_cache users to memremap */
__weak
void
__iomem
*
ioremap_cache
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
ioremap
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_wb
static
void
*
arch_memremap_wb
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
(
__force
void
*
)
ioremap_cache
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_can_ram_remap
static
bool
arch_memremap_can_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
return
true
;
}
#endif
static
void
*
try_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
unsigned
long
pfn
=
PHYS_PFN
(
offset
);
/* In the simple case just return the existing linear address */
if
(
pfn_valid
(
pfn
)
&&
!
PageHighMem
(
pfn_to_page
(
pfn
))
&&
arch_memremap_can_ram_remap
(
offset
,
size
,
flags
))
return
__va
(
offset
);
return
NULL
;
/* fallback to arch_memremap_wb */
}
/**
* memremap() - remap an iomem_resource as cacheable memory
* @offset: iomem resource start address
* @size: size of remap
* @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
* MEMREMAP_ENC, MEMREMAP_DEC
*
* memremap() is "ioremap" for cases where it is known that the resource
* being mapped does not have i/o side effects and the __iomem
* annotation is not applicable. In the case of multiple flags, the different
* mapping types will be attempted in the order listed below until one of
* them succeeds.
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
* Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
* MEMREMAP_WT - establish a mapping whereby writes either bypass the
* cache or are written through to memory and never exist in a
* cache-dirty state with respect to program visibility. Attempts to
* map System RAM with this mapping type will fail.
*
* MEMREMAP_WC - establish a writecombine mapping, whereby writes may
* be coalesced together (e.g. in the CPU's write buffers), but is otherwise
* uncached. Attempts to map System RAM with this mapping type will fail.
*/
void
*
memremap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
int
is_ram
=
region_intersects
(
offset
,
size
,
IORESOURCE_SYSTEM_RAM
,
IORES_DESC_NONE
);
void
*
addr
=
NULL
;
if
(
!
flags
)
return
NULL
;
if
(
is_ram
==
REGION_MIXED
)
{
WARN_ONCE
(
1
,
"memremap attempted on mixed range %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
/* Try all mapping types requested until one returns non-NULL */
if
(
flags
&
MEMREMAP_WB
)
{
/*
* MEMREMAP_WB is special in that it can be satisifed
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
*/
if
(
is_ram
==
REGION_INTERSECTS
)
addr
=
try_ram_remap
(
offset
,
size
,
flags
);
if
(
!
addr
)
addr
=
arch_memremap_wb
(
offset
,
size
);
}
/*
* If we don't have a mapping yet and other request flags are
* present then we will be attempting to establish a new virtual
* address mapping. Enforce that this mapping is not aliasing
* System RAM.
*/
if
(
!
addr
&&
is_ram
==
REGION_INTERSECTS
&&
flags
!=
MEMREMAP_WB
)
{
WARN_ONCE
(
1
,
"memremap attempted on ram %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
if
(
!
addr
&&
(
flags
&
MEMREMAP_WT
))
addr
=
ioremap_wt
(
offset
,
size
);
if
(
!
addr
&&
(
flags
&
MEMREMAP_WC
))
addr
=
ioremap_wc
(
offset
,
size
);
return
addr
;
}
EXPORT_SYMBOL
(
memremap
);
void
memunmap
(
void
*
addr
)
{
if
(
is_vmalloc_addr
(
addr
))
iounmap
((
void
__iomem
*
)
addr
);
}
EXPORT_SYMBOL
(
memunmap
);
static
void
devm_memremap_release
(
struct
device
*
dev
,
void
*
res
)
{
memunmap
(
*
(
void
**
)
res
);
}
static
int
devm_memremap_match
(
struct
device
*
dev
,
void
*
res
,
void
*
match_data
)
{
return
*
(
void
**
)
res
==
match_data
;
}
void
*
devm_memremap
(
struct
device
*
dev
,
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
void
**
ptr
,
*
addr
;
ptr
=
devres_alloc_node
(
devm_memremap_release
,
sizeof
(
*
ptr
),
GFP_KERNEL
,
dev_to_node
(
dev
));
if
(
!
ptr
)
return
ERR_PTR
(
-
ENOMEM
);
addr
=
memremap
(
offset
,
size
,
flags
);
if
(
addr
)
{
*
ptr
=
addr
;
devres_add
(
dev
,
ptr
);
}
else
{
devres_free
(
ptr
);
return
ERR_PTR
(
-
ENXIO
);
}
return
addr
;
}
EXPORT_SYMBOL
(
devm_memremap
);
void
devm_memunmap
(
struct
device
*
dev
,
void
*
addr
)
{
WARN_ON
(
devres_release
(
dev
,
devm_memremap_release
,
devm_memremap_match
,
addr
));
}
EXPORT_SYMBOL
(
devm_memunmap
);
kernel/memremap.c
浏览文件 @
b5684579
/*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
#include <linux/radix-tree.h>
#include <linux/device.h>
#include <linux/types.h>
...
...
@@ -19,170 +9,8 @@
#include <linux/memory_hotplug.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/wait_bit.h>
#ifndef ioremap_cache
/* temporary while we convert existing ioremap_cache users to memremap */
__weak
void
__iomem
*
ioremap_cache
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
ioremap
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_wb
static
void
*
arch_memremap_wb
(
resource_size_t
offset
,
unsigned
long
size
)
{
return
(
__force
void
*
)
ioremap_cache
(
offset
,
size
);
}
#endif
#ifndef arch_memremap_can_ram_remap
static
bool
arch_memremap_can_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
return
true
;
}
#endif
static
void
*
try_ram_remap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
unsigned
long
pfn
=
PHYS_PFN
(
offset
);
/* In the simple case just return the existing linear address */
if
(
pfn_valid
(
pfn
)
&&
!
PageHighMem
(
pfn_to_page
(
pfn
))
&&
arch_memremap_can_ram_remap
(
offset
,
size
,
flags
))
return
__va
(
offset
);
return
NULL
;
/* fallback to arch_memremap_wb */
}
/**
* memremap() - remap an iomem_resource as cacheable memory
* @offset: iomem resource start address
* @size: size of remap
* @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
* MEMREMAP_ENC, MEMREMAP_DEC
*
* memremap() is "ioremap" for cases where it is known that the resource
* being mapped does not have i/o side effects and the __iomem
* annotation is not applicable. In the case of multiple flags, the different
* mapping types will be attempted in the order listed below until one of
* them succeeds.
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
* Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
* MEMREMAP_WT - establish a mapping whereby writes either bypass the
* cache or are written through to memory and never exist in a
* cache-dirty state with respect to program visibility. Attempts to
* map System RAM with this mapping type will fail.
*
* MEMREMAP_WC - establish a writecombine mapping, whereby writes may
* be coalesced together (e.g. in the CPU's write buffers), but is otherwise
* uncached. Attempts to map System RAM with this mapping type will fail.
*/
void
*
memremap
(
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
int
is_ram
=
region_intersects
(
offset
,
size
,
IORESOURCE_SYSTEM_RAM
,
IORES_DESC_NONE
);
void
*
addr
=
NULL
;
if
(
!
flags
)
return
NULL
;
if
(
is_ram
==
REGION_MIXED
)
{
WARN_ONCE
(
1
,
"memremap attempted on mixed range %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
/* Try all mapping types requested until one returns non-NULL */
if
(
flags
&
MEMREMAP_WB
)
{
/*
* MEMREMAP_WB is special in that it can be satisifed
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
*/
if
(
is_ram
==
REGION_INTERSECTS
)
addr
=
try_ram_remap
(
offset
,
size
,
flags
);
if
(
!
addr
)
addr
=
arch_memremap_wb
(
offset
,
size
);
}
/*
* If we don't have a mapping yet and other request flags are
* present then we will be attempting to establish a new virtual
* address mapping. Enforce that this mapping is not aliasing
* System RAM.
*/
if
(
!
addr
&&
is_ram
==
REGION_INTERSECTS
&&
flags
!=
MEMREMAP_WB
)
{
WARN_ONCE
(
1
,
"memremap attempted on ram %pa size: %#lx
\n
"
,
&
offset
,
(
unsigned
long
)
size
);
return
NULL
;
}
if
(
!
addr
&&
(
flags
&
MEMREMAP_WT
))
addr
=
ioremap_wt
(
offset
,
size
);
if
(
!
addr
&&
(
flags
&
MEMREMAP_WC
))
addr
=
ioremap_wc
(
offset
,
size
);
return
addr
;
}
EXPORT_SYMBOL
(
memremap
);
void
memunmap
(
void
*
addr
)
{
if
(
is_vmalloc_addr
(
addr
))
iounmap
((
void
__iomem
*
)
addr
);
}
EXPORT_SYMBOL
(
memunmap
);
static
void
devm_memremap_release
(
struct
device
*
dev
,
void
*
res
)
{
memunmap
(
*
(
void
**
)
res
);
}
static
int
devm_memremap_match
(
struct
device
*
dev
,
void
*
res
,
void
*
match_data
)
{
return
*
(
void
**
)
res
==
match_data
;
}
void
*
devm_memremap
(
struct
device
*
dev
,
resource_size_t
offset
,
size_t
size
,
unsigned
long
flags
)
{
void
**
ptr
,
*
addr
;
ptr
=
devres_alloc_node
(
devm_memremap_release
,
sizeof
(
*
ptr
),
GFP_KERNEL
,
dev_to_node
(
dev
));
if
(
!
ptr
)
return
ERR_PTR
(
-
ENOMEM
);
addr
=
memremap
(
offset
,
size
,
flags
);
if
(
addr
)
{
*
ptr
=
addr
;
devres_add
(
dev
,
ptr
);
}
else
{
devres_free
(
ptr
);
return
ERR_PTR
(
-
ENXIO
);
}
return
addr
;
}
EXPORT_SYMBOL
(
devm_memremap
);
void
devm_memunmap
(
struct
device
*
dev
,
void
*
addr
)
{
WARN_ON
(
devres_release
(
dev
,
devm_memremap_release
,
devm_memremap_match
,
addr
));
}
EXPORT_SYMBOL
(
devm_memunmap
);
#ifdef CONFIG_ZONE_DEVICE
static
DEFINE_MUTEX
(
pgmap_lock
);
static
RADIX_TREE
(
pgmap_radix
,
GFP_KERNEL
);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
...
...
@@ -473,10 +301,32 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return
pgmap
;
}
#endif
/* CONFIG_ZONE_DEVICE */
EXPORT_SYMBOL_GPL
(
get_dev_pagemap
);
#ifdef CONFIG_DEV_PAGEMAP_OPS
DEFINE_STATIC_KEY_FALSE
(
devmap_managed_key
);
EXPORT_SYMBOL_GPL
(
devmap_managed_key
);
static
atomic_t
devmap_enable
;
/*
* Toggle the static key for ->page_free() callbacks when dev_pagemap
* pages go idle.
*/
void
dev_pagemap_get_ops
(
void
)
{
if
(
atomic_inc_return
(
&
devmap_enable
)
==
1
)
static_branch_enable
(
&
devmap_managed_key
);
}
EXPORT_SYMBOL_GPL
(
dev_pagemap_get_ops
);
void
dev_pagemap_put_ops
(
void
)
{
if
(
atomic_dec_and_test
(
&
devmap_enable
))
static_branch_disable
(
&
devmap_managed_key
);
}
EXPORT_SYMBOL_GPL
(
dev_pagemap_put_ops
);
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
void
put_zone_device_private_or_public_page
(
struct
page
*
page
)
void
__put_devmap_managed_page
(
struct
page
*
page
)
{
int
count
=
page_ref_dec_return
(
page
);
...
...
@@ -496,5 +346,5 @@ void put_zone_device_private_or_public_page(struct page *page)
}
else
if
(
!
count
)
__put_page
(
page
);
}
EXPORT_SYMBOL
(
put_zone_device_private_or_public
_page
);
#endif
/* CONFIG_DEV
ICE_PRIVATE || CONFIG_DEVICE_PUBLIC
*/
EXPORT_SYMBOL
_GPL
(
__put_devmap_managed
_page
);
#endif
/* CONFIG_DEV
_PAGEMAP_OPS
*/
mm/Kconfig
浏览文件 @
b5684579
...
...
@@ -693,6 +693,9 @@ config ARCH_HAS_HMM
config MIGRATE_VMA_HELPER
bool
config DEV_PAGEMAP_OPS
bool
config HMM
bool
select MIGRATE_VMA_HELPER
...
...
@@ -713,6 +716,7 @@ config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)"
depends on ARCH_HAS_HMM
select HMM
select DEV_PAGEMAP_OPS
help
Allows creation of struct pages to represent unaddressable device
...
...
@@ -723,6 +727,7 @@ config DEVICE_PUBLIC
bool "Addressable device memory (like GPU memory)"
depends on ARCH_HAS_HMM
select HMM
select DEV_PAGEMAP_OPS
help
Allows creation of struct pages to represent addressable device
...
...
mm/gup.c
浏览文件 @
b5684579
...
...
@@ -1459,32 +1459,48 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
return
1
;
}
static
int
__gup_device_huge_pmd
(
pmd_t
pmd
,
unsigned
long
addr
,
static
int
__gup_device_huge_pmd
(
pmd_t
orig
,
pmd_t
*
pmdp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
unsigned
long
fault_pfn
;
int
nr_start
=
*
nr
;
fault_pfn
=
pmd_pfn
(
orig
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
if
(
!
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
))
return
0
;
fault_pfn
=
pmd_pfn
(
pmd
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
return
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
);
if
(
unlikely
(
pmd_val
(
orig
)
!=
pmd_val
(
*
pmdp
)))
{
undo_dev_pagemap
(
nr
,
nr_start
,
pages
);
return
0
;
}
return
1
;
}
static
int
__gup_device_huge_pud
(
pud_t
pud
,
unsigned
long
addr
,
static
int
__gup_device_huge_pud
(
pud_t
orig
,
pud_t
*
pudp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
unsigned
long
fault_pfn
;
int
nr_start
=
*
nr
;
fault_pfn
=
pud_pfn
(
orig
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
if
(
!
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
))
return
0
;
fault_pfn
=
pud_pfn
(
pud
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
return
__gup_device_huge
(
fault_pfn
,
addr
,
end
,
pages
,
nr
);
if
(
unlikely
(
pud_val
(
orig
)
!=
pud_val
(
*
pudp
)))
{
undo_dev_pagemap
(
nr
,
nr_start
,
pages
);
return
0
;
}
return
1
;
}
#else
static
int
__gup_device_huge_pmd
(
pmd_t
pmd
,
unsigned
long
addr
,
static
int
__gup_device_huge_pmd
(
pmd_t
orig
,
pmd_t
*
pmdp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
BUILD_BUG
();
return
0
;
}
static
int
__gup_device_huge_pud
(
pud_t
pud
,
unsigned
long
addr
,
static
int
__gup_device_huge_pud
(
pud_t
pud
,
pud_t
*
pudp
,
unsigned
long
addr
,
unsigned
long
end
,
struct
page
**
pages
,
int
*
nr
)
{
BUILD_BUG
();
...
...
@@ -1502,7 +1518,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return
0
;
if
(
pmd_devmap
(
orig
))
return
__gup_device_huge_pmd
(
orig
,
addr
,
end
,
pages
,
nr
);
return
__gup_device_huge_pmd
(
orig
,
pmdp
,
addr
,
end
,
pages
,
nr
);
refs
=
0
;
page
=
pmd_page
(
orig
)
+
((
addr
&
~
PMD_MASK
)
>>
PAGE_SHIFT
);
...
...
@@ -1540,7 +1556,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return
0
;
if
(
pud_devmap
(
orig
))
return
__gup_device_huge_pud
(
orig
,
addr
,
end
,
pages
,
nr
);
return
__gup_device_huge_pud
(
orig
,
pudp
,
addr
,
end
,
pages
,
nr
);
refs
=
0
;
page
=
pud_page
(
orig
)
+
((
addr
&
~
PUD_MASK
)
>>
PAGE_SHIFT
);
...
...
mm/hmm.c
浏览文件 @
b5684579
...
...
@@ -35,15 +35,6 @@
#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
/*
* Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
*/
DEFINE_STATIC_KEY_FALSE
(
device_private_key
);
EXPORT_SYMBOL
(
device_private_key
);
#endif
/* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#if IS_ENABLED(CONFIG_HMM_MIRROR)
static
const
struct
mmu_notifier_ops
hmm_mmu_notifier_ops
;
...
...
@@ -1167,7 +1158,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
resource_size_t
addr
;
int
ret
;
static_branch_enable
(
&
device_private_key
);
dev_pagemap_get_ops
(
);
devmem
=
devres_alloc_node
(
&
hmm_devmem_release
,
sizeof
(
*
devmem
),
GFP_KERNEL
,
dev_to_node
(
device
));
...
...
@@ -1261,7 +1252,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
if
(
res
->
desc
!=
IORES_DESC_DEVICE_PUBLIC_MEMORY
)
return
ERR_PTR
(
-
EINVAL
);
static_branch_enable
(
&
device_private_key
);
dev_pagemap_get_ops
(
);
devmem
=
devres_alloc_node
(
&
hmm_devmem_release
,
sizeof
(
*
devmem
),
GFP_KERNEL
,
dev_to_node
(
device
));
...
...
mm/swap.c
浏览文件 @
b5684579
...
...
@@ -29,6 +29,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/backing-dev.h>
#include <linux/memremap.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
...
...
@@ -743,7 +744,7 @@ void release_pages(struct page **pages, int nr)
flags
);
locked_pgdat
=
NULL
;
}
put_
zone_device_private_or_public
_page
(
page
);
put_
devmap_managed
_page
(
page
);
continue
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录