Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0a63234c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0a63234c
编写于
1月 31, 2019
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments. test=develop
上级
9e87fbeb
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
70 addition
and
55 deletion
+70
-55
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+6
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+3
-0
paddle/fluid/framework/details/graph_print_pass.h
paddle/fluid/framework/details/graph_print_pass.h
+6
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+28
-19
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+6
-0
paddle/fluid/framework/inplace_op_inference.h
paddle/fluid/framework/inplace_op_inference.h
+4
-24
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+5
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+7
-6
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+1
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+1
-1
python/paddle/fluid/tests/unittests/test_inference_model_io.py
...n/paddle/fluid/tests/unittests/test_inference_model_io.py
+1
-1
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+2
-2
未找到文件。
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
0a63234c
...
...
@@ -53,6 +53,12 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass
(
"fuse_relu_depthwise_conv_pass"
);
}
// NOTE(dzhwinter): A note for automatical inplace.
// 1. modify program desc passes should put
// before inplace pass.
// 2. manually configured inplace should put
// before inplace_pass
// Add automatically inplace.
if
(
strategy_
.
enable_inplace_
)
{
AppendPass
(
"inplace_pass"
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
0a63234c
...
...
@@ -80,6 +80,9 @@ struct BuildStrategy {
bool
memory_early_delete_
{
false
};
// TODO(dzhwinter):
// make enable_inplace, memory_optimize_
// memory_early_delete_ true by default
bool
enable_inplace_
{
false
};
bool
enable_sequential_execution_
{
false
};
...
...
paddle/fluid/framework/details/graph_print_pass.h
浏览文件 @
0a63234c
...
...
@@ -26,6 +26,11 @@ namespace details {
constexpr
char
kGraphvizPath
[]
=
"debug_graphviz_path"
;
constexpr
char
kGraphviz
[]
=
"graphviz"
;
// NOTE(dzhwinter): If the graph contains circles.
// the graph can not be topology sort.
// This printer will print the whole graph
// and highlight the circles. It's quite useful
// for debug the deadlock and circles.
class
GraphvizNode
{
public:
GraphvizNode
(
ir
::
Node
*
n
,
const
int
&
i
)
:
node_
(
n
),
id_
(
i
)
{}
...
...
@@ -37,7 +42,7 @@ class GraphvizNode {
ir
::
Node
*
node_
;
int
id_
;
};
class
GraphvizNode
;
typedef
std
::
unordered_set
<
std
::
unique_ptr
<
GraphvizNode
>>
GraphvizNodes
;
class
SSAGraphPrinter
{
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
0a63234c
...
...
@@ -13,7 +13,9 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <functional>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string>
...
...
@@ -21,15 +23,17 @@ namespace paddle {
namespace
framework
{
namespace
details
{
size_t
NodeSizeInBytes
(
const
VarDesc
&
node
)
{
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
size_t
type_size
=
SizeOfType
(
node
.
GetDataType
());
return
type_size
*
std
::
abs
(
size
);
}
size_t
NodeSizeInBytes
(
ir
::
Node
*
n
)
{
auto
*
desc
=
FindVarDescInBlock
(
n
);
auto
shape
=
desc
->
GetShape
();
size_t
type_size
=
SizeOfType
(
desc
->
GetDataType
());
int
size
=
1
;
for
(
auto
&
s
:
shape
)
{
size
*=
s
;
}
return
type_size
*
std
::
abs
(
size
);
return
NodeSizeInBytes
(
*
desc
);
}
std
::
string
DebugStringImpl
(
VarDesc
*
var
)
{
...
...
@@ -154,23 +158,28 @@ std::string OrderedNodeList::ToString() const {
bool
NodeCanReused
(
ir
::
Node
*
node
)
{
if
(
node
==
nullptr
||
!
node
->
IsVar
()
||
node
->
IsCtrlVar
())
return
false
;
auto
*
desc
=
node
->
Var
();
auto
type
=
desc
->
GetType
();
if
(
desc
->
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
desc
->
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
->
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
// auto* desc = node->Var();
bool
flag
=
NodeCanReused
(
*
node
->
Var
());
for
(
auto
*
op
:
node
->
inputs
)
{
if
(
op
->
Op
()
->
HasAttr
(
"force_cpu"
))
{
// op output force generated in cpu, can not be reused.
return
framework
::
AttrReader
(
op
->
Op
()
->
GetAttrMap
())
.
Get
<
bool
>
(
"force_cpu"
)
==
0
;
flag
&=
framework
::
AttrReader
(
op
->
Op
()
->
GetAttrMap
())
.
Get
<
bool
>
(
"force_cpu"
)
==
0
;
}
}
return
flag
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
if
(
node
.
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
node
.
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
.
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
return
true
;
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
0a63234c
...
...
@@ -86,12 +86,18 @@ class OrderedNodeList {
// valid a tensor can be reuse or not
bool
NodeCanReused
(
ir
::
Node
*
node
);
// valid a tensor can be reuse or not.
bool
NodeCanReused
(
const
VarDesc
&
node
);
// check op has subblock or not
bool
OpHasSubBlock
(
OpDesc
*
desc
);
// node memory size in bytes
size_t
NodeSizeInBytes
(
ir
::
Node
*
n
);
// node memory size in bytes
size_t
NodeSizeInBytes
(
const
VarDesc
&
);
std
::
string
DebugString
(
ir
::
Node
*
var
);
VarDesc
*
FindVarDescInBlock
(
ir
::
Node
*
n
);
...
...
paddle/fluid/framework/inplace_op_inference.h
浏览文件 @
0a63234c
...
...
@@ -19,6 +19,7 @@
#include <unordered_map>
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/type_defs.h"
...
...
@@ -66,30 +67,9 @@ class InplaceInToOut : public InplaceOpInference {
const
OpDesc
&
op_desc
,
BlockDesc
*
block
)
const
=
0
;
bool
TryInplaceInputOutput
(
const
VarDesc
&
in
,
const
VarDesc
&
out
)
const
{
auto
var_can_reused
=
[
&
](
const
VarDesc
&
node
)
->
bool
{
auto
type
=
node
.
GetType
();
if
(
node
.
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
node
.
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
.
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
return
true
;
};
auto
var_size_in_bytes
=
[
&
](
const
VarDesc
&
node
)
->
size_t
{
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
size_t
type_size
=
SizeOfType
(
node
.
GetDataType
());
return
type_size
*
std
::
abs
(
size
);
};
return
in
.
Name
()
!=
out
.
Name
()
&&
var_can_reused
(
in
)
&&
var_can_reused
(
out
)
&&
var_size_in_bytes
(
out
)
<=
var_size_in_bytes
(
in
);
return
in
.
Name
()
!=
out
.
Name
()
&&
details
::
NodeCanReused
(
in
)
&&
details
::
NodeCanReused
(
out
)
&&
details
::
NodeSizeInBytes
(
out
)
<=
details
::
NodeSizeInBytes
(
in
);
}
};
...
...
python/paddle/fluid/compiler.py
浏览文件 @
0a63234c
...
...
@@ -174,6 +174,11 @@ class CompiledProgram(object):
self
.
_exec_strategy
.
num_threads
=
cpu_num
*
2
trainers_endpoints
=
self
.
_program
.
_trainers_endpoints
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
trainers_endpoints
),
"num_trainers == len(end_points)"
...
...
python/paddle/fluid/framework.py
浏览文件 @
0a63234c
...
...
@@ -1725,18 +1725,19 @@ class Program(object):
self
.
_trainers_endpoints
=
[]
# the distributed lookup table names
self
.
_distributed_lookup_table
=
None
# @deprecated(the python memory optimize transpiler is deprecated)
# whether the program is optimized by memory_optimize_transpiler
self
.
__is_optimized
=
False
self
.
__is_
mem_
optimized
=
False
@
property
def
_is_optimized
(
self
):
def
_is_
mem_
optimized
(
self
):
# if the program is optimized, operator input/outputs
# maybe same, which conflict with save_inference_model.
return
self
.
__is_optimized
return
self
.
__is_
mem_
optimized
@
_is_optimized
.
setter
def
_is_optimized
(
self
,
target
):
self
.
__is_optimized
=
target
@
_is_
mem_
optimized
.
setter
def
_is_
mem_
optimized
(
self
,
target
):
self
.
__is_
mem_
optimized
=
target
@
property
def
op_role
(
self
):
...
...
python/paddle/fluid/io.py
浏览文件 @
0a63234c
...
...
@@ -931,7 +931,7 @@ def save_inference_model(dirname,
if
main_program
is
None
:
main_program
=
default_main_program
()
if
main_program
.
_is_optimized
:
if
main_program
.
_is_
mem_
optimized
:
warnings
.
warn
(
"save_inference_model must put before you call memory_optimize.
\
the memory_optimize will modify the original program,
\
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
0a63234c
...
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
build_strategy
.
enable_inplace
=
False
if
main
.
_is_optimized
else
True
build_strategy
.
enable_inplace
=
False
if
main
.
_is_
mem_
optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
...
...
python/paddle/fluid/tests/unittests/test_inference_model_io.py
浏览文件 @
0a63234c
...
...
@@ -108,7 +108,7 @@ class TestSaveInferenceModel(unittest.TestCase):
exe
.
run
(
init_program
,
feed
=
{},
fetch_list
=
[])
memory_optimize
(
program
,
print_log
=
True
)
self
.
assertEqual
(
program
.
_is_optimized
,
True
)
self
.
assertEqual
(
program
.
_is_
mem_
optimized
,
True
)
# will print warning message
save_inference_model
(
MODEL_DIR
,
[
"x"
,
"y"
],
[
avg_cost
],
exe
,
program
)
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
0a63234c
...
...
@@ -540,7 +540,7 @@ def memory_optimize(input_program,
if
skip_opt_set
is
not
None
:
skip_opt_set
=
set
(
map
(
to_name_str
,
skip_opt_set
))
cfgs
=
_get_cfgs
(
input_program
)
input_program
.
_is_optimized
=
True
input_program
.
_is_
mem_
optimized
=
True
for
cfg
in
cfgs
:
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
...
...
@@ -560,6 +560,6 @@ def release_memory(input_program, skip_opt_set=None):
None
"""
cfgs
=
_get_cfgs
(
input_program
)
input_program
.
_is_optimized
=
True
input_program
.
_is_
mem_
optimized
=
True
for
cfg
in
cfgs
:
cfg
.
release_memory
(
skip_opt_set
=
skip_opt_set
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录