Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d376cf71
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d376cf71
编写于
2月 18, 2019
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
polish code for reading. test=develop
上级
283573c6
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
128 addition
and
22 deletion
+128
-22
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-0
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+11
-4
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+1
-0
paddle/fluid/framework/details/memory_optimize_helper_test.cc
...le/fluid/framework/details/memory_optimize_helper_test.cc
+46
-0
paddle/fluid/framework/details/memory_optimize_pass.cc
paddle/fluid/framework/details/memory_optimize_pass.cc
+21
-17
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
...id/tests/unittests/test_ir_memory_optimize_transformer.py
+46
-0
未找到文件。
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
d376cf71
...
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue
;
}
}
VLOG
(
3
)
<<
"Start Apply Pass "
<<
pass
->
Type
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
VLOG
(
3
)
<<
"Finish Apply Pass "
<<
pass
->
Type
();
}
return
graph
;
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
d376cf71
...
...
@@ -268,10 +268,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return
false
;
}
void
OrderedSet
::
Erase
(
const
std
::
string
&
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
));
nodes_
.
erase
(
mark_table_
[
var
]);
mark_table_
.
erase
(
var
);
}
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
->
Name
()));
nodes_
.
erase
(
mark_table_
[
var
->
Name
()]);
mark_table_
.
erase
(
var
->
Name
());
PADDLE_ENFORCE
(
var
!=
nullptr
);
Erase
(
var
->
Name
());
}
std
::
string
OrderedSet
::
ToString
()
const
{
...
...
@@ -509,7 +514,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for
(
auto
*
node
:
ops_
)
{
if
(
node
==
op
)
break
;
for
(
auto
&
output
:
node
->
outputs
)
{
if
(
output
->
Name
()
==
name
)
{
PADDLE_ENFORCE
((
output
!=
nullptr
&&
output
->
IsVar
()),
"Output is empty!"
);
if
(
output
->
Var
()
&&
output
->
Name
()
==
name
)
{
found_node
=
output
;
}
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
d376cf71
...
...
@@ -55,6 +55,7 @@ class OrderedSet {
void
Insert
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
const
std
::
string
&
var
);
bool
Has
(
ir
::
Node
*
var
)
const
;
void
Clear
()
{
mark_table_
.
clear
();
...
...
paddle/fluid/framework/details/memory_optimize_helper_test.cc
浏览文件 @
d376cf71
...
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
}
}
TEST
(
OrderedSet
,
FindBestFitNode
)
{
OrderedSet
pool
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
ProgramDesc
prog
;
BlockDesc
*
block_desc
=
prog
.
MutableBlock
(
0
);
auto
*
op_desc
=
block_desc
->
AppendOp
();
op_desc
->
SetType
(
"dummy"
);
std
::
unique_ptr
<
ir
::
Node
>
op
=
ir
::
CreateNodeForTest
(
op_desc
);
{
auto
desc
=
block_desc
->
Var
(
"a"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"b"
);
desc
->
SetShape
({
128
,
129
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"c"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
for
(
auto
&
node
:
nodes
)
{
pool
.
Insert
(
node
.
get
());
}
// FindNextBestFitNode
auto
*
n
=
nodes
[
0
].
get
();
auto
*
cache
=
pool
.
FindBestFitNode
(
n
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"a"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"c"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"b"
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/memory_optimize_pass.cc
浏览文件 @
d376cf71
...
...
@@ -69,7 +69,7 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
}
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
skip_set_
.
count
(
var
->
Name
()))
{
if
(
var
->
IsVar
()
&&
!
var
->
IsCtrlVar
()
&&
skip_set_
.
count
(
var
->
Name
()))
{
VLOG
(
3
)
<<
"Skip set contains variable of "
<<
var
->
Name
()
<<
"disable reuse on it. skipped"
;
continue
;
...
...
@@ -77,8 +77,8 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
if
(
NodeCanReused
(
var
)
&&
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
)
{
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
while
(
cache
!=
nullptr
&&
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused.
"
<<
var
->
Name
()
<<
" is re-filled to the pool after"
VLOG
(
3
)
<<
"The same cache variable is cascade reused.
"
<<
var
->
Name
()
<<
" is re-filled to the pool after"
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
cache
=
pool_
.
FindNextBestFitNode
(
var
,
cache
);
...
...
@@ -107,11 +107,13 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
//
// CFG Graph store the liveness information, when reuse happens
// we also need to update the variable liveness.
cfg_
->
RenameVarInCFGGraph
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphDesc
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var
->
Name
(),
cache
->
Name
(),
idx
,
graph
.
get
());
const
std
::
string
var_name
=
var
->
Name
();
const
std
::
string
cache_name
=
cache
->
Name
();
pool_
.
Erase
(
cache
);
cfg_
->
RenameVarInCFGGraph
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphDesc
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphNode
(
var_name
,
cache_name
,
idx
,
graph
.
get
());
pool_
.
Erase
(
cache_name
);
}
}
}
...
...
@@ -119,7 +121,7 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
if
(
var_node
==
nullptr
)
continue
;
if
(
var_node
==
nullptr
||
var_node
->
IsCtrlVar
()
)
continue
;
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
pool_
.
Insert
(
var_node
);
}
...
...
@@ -275,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var
for
(
auto
*
node
:
op
->
inputs
)
{
if
(
node
->
Name
()
==
var
)
{
ir
::
Node
*
cache_node
=
graph
->
CreateVarNode
(
var_desc
.
get
());
var_nodes_
[
cache_var
].
emplace_back
(
cache_node
);
ir
::
Node
*
cache_node
=
var_nodes_
[
cache_var
].
back
();
// swap node to cache_node
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
...
...
@@ -285,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto
*
prev_op
=
node
->
inputs
[
0
];
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
cache_node
);
cache_node
->
inputs
.
emplace_back
(
prev_op
);
for
(
auto
*
next_op
:
node
->
outputs
)
{
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
...
...
@@ -309,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
}
}
}
// release node of unused var in graph
for
(
auto
*
node
:
var_nodes_
[
var
])
{
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
var_nodes_
.
at
(
var
).
clear
();
}
}
}
}
// namespace details
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
d376cf71
...
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
0 → 100644
浏览文件 @
d376cf71
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'FLAGS_fast_eager_deletion_mode'
]
=
True
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class
TestTransformerWithIR
(
TestTransformer
):
def
test_main
(
self
):
if
core
.
is_compiled_with_cuda
():
# check python transpiler
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
True
,
use_ir_memory_optimize
=
False
)
# check IR memory optimize
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录