Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7a4ccf59
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7a4ccf59
编写于
1月 19, 2021
作者:
J
JZ-LIANG
提交者:
GitHub
1月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Recompute Offload: fixed bug in memcpy (#30484) (#30517)
上级
1bd284cd
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
12 addition
and
10 deletion
+12
-10
paddle/fluid/operators/memcpy_op.h
paddle/fluid/operators/memcpy_op.h
+2
-2
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+7
-5
python/paddle/fluid/tests/unittests/test_memcpy_op.py
python/paddle/fluid/tests/unittests/test_memcpy_op.py
+3
-3
未找到文件。
paddle/fluid/operators/memcpy_op.h
浏览文件 @
7a4ccf59
...
...
@@ -38,10 +38,10 @@ class MemcpyFunctor {
void
operator
()(
const
framework
::
LoDTensor
&
lod_tensor
)
const
{
auto
&
out_tensor
=
*
out_
->
GetMutable
<
framework
::
LoDTensor
>
();
if
(
dst_place_type_
==
3
)
{
if
(
dst_place_type_
==
2
)
{
framework
::
TensorCopy
(
lod_tensor
,
platform
::
CUDAPinnedPlace
(),
dev_ctx_
,
&
out_tensor
);
}
else
if
(
dst_place_type_
==
2
)
{
}
else
if
(
dst_place_type_
==
1
)
{
framework
::
TensorCopy
(
lod_tensor
,
dev_ctx_
.
GetPlace
(),
dev_ctx_
,
&
out_tensor
);
}
else
{
...
...
python/paddle/fluid/optimizer.py
100644 → 100755
浏览文件 @
7a4ccf59
...
...
@@ -4773,7 +4773,7 @@ class RecomputeOptimizer(Optimizer):
return
def
_insert_async_memcpy_op
(
self
,
insert_idx
,
src_varname
,
dst_varname
,
op_role
,
kind
):
op_role
,
dst_place_type
):
OP_ROLE_KEY
=
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
()
self
.
block
.
_insert_op_without_sync
(
insert_idx
,
...
...
@@ -4782,8 +4782,10 @@ class RecomputeOptimizer(Optimizer):
outputs
=
{
'Out'
:
[
self
.
_main_program
.
global_block
().
var
(
dst_varname
)]
},
attrs
=
{
"dst_place_type"
:
int
(
kind
),
OP_ROLE_KEY
:
op_role
})
attrs
=
{
"dst_place_type"
:
int
(
dst_place_type
),
OP_ROLE_KEY
:
op_role
})
def
_insert_fetch_op
(
self
,
idx
,
varname
):
assert
varname
in
self
.
checkpoint_name2pinned_name
,
"Try to fetch {} from Pinned Memory, but it is NOT a checkpoint"
.
format
(
...
...
@@ -4791,13 +4793,13 @@ class RecomputeOptimizer(Optimizer):
pinned_varname
=
self
.
checkpoint_name2pinned_name
[
varname
]
fetch_varname
=
self
.
checkpoint_name2fetch_name
[
varname
]
self
.
_insert_async_memcpy_op
(
idx
,
pinned_varname
,
fetch_varname
,
1
,
2
)
self
.
_insert_async_memcpy_op
(
idx
,
pinned_varname
,
fetch_varname
,
1
,
1
)
def
_insert_offload_op
(
self
,
idx
,
varname
):
assert
varname
in
self
.
checkpoint_name2pinned_name
,
"Try to offload {} to Pinned Memory, but it is NOT a checkpoint"
.
format
(
varname
)
pinned_varname
=
self
.
checkpoint_name2pinned_name
[
varname
]
self
.
_insert_async_memcpy_op
(
idx
,
varname
,
pinned_varname
,
0
,
3
)
self
.
_insert_async_memcpy_op
(
idx
,
varname
,
pinned_varname
,
0
,
2
)
def
_insert_sync_op
(
self
,
op_idx
,
checkpoint_name
):
# single stream offload no need sync
...
...
python/paddle/fluid/tests/unittests/test_memcpy_op.py
浏览文件 @
7a4ccf59
...
...
@@ -70,7 +70,7 @@ class TestMemcpy_FillConstant(unittest.TestCase):
type
=
'memcpy'
,
inputs
=
{
'X'
:
gpu_var
},
outputs
=
{
'Out'
:
pinned_var
},
attrs
=
{
'dst_place_type'
:
3
})
attrs
=
{
'dst_place_type'
:
2
})
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
gpu_
,
pinned_
=
exe
.
run
(
main_program
,
...
...
@@ -85,7 +85,7 @@ class TestMemcpy_FillConstant(unittest.TestCase):
type
=
'memcpy'
,
inputs
=
{
'X'
:
pinned_var
},
outputs
=
{
'Out'
:
gpu_var
},
attrs
=
{
'dst_place_type'
:
2
})
attrs
=
{
'dst_place_type'
:
1
})
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
gpu_
,
pinned_
=
exe
.
run
(
main_program
,
...
...
@@ -135,7 +135,7 @@ class TestMemcpyOPError(unittest.TestCase):
type
=
'memcpy'
,
inputs
=
{
'X'
:
selected_row_var
},
outputs
=
{
'Out'
:
pinned_var
},
attrs
=
{
'dst_place_type'
:
3
})
attrs
=
{
'dst_place_type'
:
2
})
with
self
.
assertRaises
(
NotImplementedError
):
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录