Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
0a63234c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0a63234c
编写于
1月 31, 2019
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments. test=develop
上级
9e87fbeb
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
70 addition
and
55 deletion
+70
-55
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+6
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+3
-0
paddle/fluid/framework/details/graph_print_pass.h
paddle/fluid/framework/details/graph_print_pass.h
+6
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+28
-19
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+6
-0
paddle/fluid/framework/inplace_op_inference.h
paddle/fluid/framework/inplace_op_inference.h
+4
-24
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+5
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+7
-6
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+1
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+1
-1
python/paddle/fluid/tests/unittests/test_inference_model_io.py
...n/paddle/fluid/tests/unittests/test_inference_model_io.py
+1
-1
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+2
-2
未找到文件。
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
0a63234c
...
...
@@ -53,6 +53,12 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass
(
"fuse_relu_depthwise_conv_pass"
);
}
// NOTE(dzhwinter): A note for automatical inplace.
// 1. modify program desc passes should put
// before inplace pass.
// 2. manually configured inplace should put
// before inplace_pass
// Add automatically inplace.
if
(
strategy_
.
enable_inplace_
)
{
AppendPass
(
"inplace_pass"
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
0a63234c
...
...
@@ -80,6 +80,9 @@ struct BuildStrategy {
bool
memory_early_delete_
{
false
};
// TODO(dzhwinter):
// make enable_inplace, memory_optimize_
// memory_early_delete_ true by default
bool
enable_inplace_
{
false
};
bool
enable_sequential_execution_
{
false
};
...
...
paddle/fluid/framework/details/graph_print_pass.h
浏览文件 @
0a63234c
...
...
@@ -26,6 +26,11 @@ namespace details {
constexpr
char
kGraphvizPath
[]
=
"debug_graphviz_path"
;
constexpr
char
kGraphviz
[]
=
"graphviz"
;
// NOTE(dzhwinter): If the graph contains circles.
// the graph can not be topology sort.
// This printer will print the whole graph
// and highlight the circles. It's quite useful
// for debug the deadlock and circles.
class
GraphvizNode
{
public:
GraphvizNode
(
ir
::
Node
*
n
,
const
int
&
i
)
:
node_
(
n
),
id_
(
i
)
{}
...
...
@@ -37,7 +42,7 @@ class GraphvizNode {
ir
::
Node
*
node_
;
int
id_
;
};
class
GraphvizNode
;
typedef
std
::
unordered_set
<
std
::
unique_ptr
<
GraphvizNode
>>
GraphvizNodes
;
class
SSAGraphPrinter
{
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
0a63234c
...
...
@@ -13,7 +13,9 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <functional>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string>
...
...
@@ -21,15 +23,17 @@ namespace paddle {
namespace
framework
{
namespace
details
{
size_t
NodeSizeInBytes
(
const
VarDesc
&
node
)
{
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
size_t
type_size
=
SizeOfType
(
node
.
GetDataType
());
return
type_size
*
std
::
abs
(
size
);
}
size_t
NodeSizeInBytes
(
ir
::
Node
*
n
)
{
auto
*
desc
=
FindVarDescInBlock
(
n
);
auto
shape
=
desc
->
GetShape
();
size_t
type_size
=
SizeOfType
(
desc
->
GetDataType
());
int
size
=
1
;
for
(
auto
&
s
:
shape
)
{
size
*=
s
;
}
return
type_size
*
std
::
abs
(
size
);
return
NodeSizeInBytes
(
*
desc
);
}
std
::
string
DebugStringImpl
(
VarDesc
*
var
)
{
...
...
@@ -154,23 +158,28 @@ std::string OrderedNodeList::ToString() const {
bool
NodeCanReused
(
ir
::
Node
*
node
)
{
if
(
node
==
nullptr
||
!
node
->
IsVar
()
||
node
->
IsCtrlVar
())
return
false
;
auto
*
desc
=
node
->
Var
();
auto
type
=
desc
->
GetType
();
if
(
desc
->
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
desc
->
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
->
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
// auto* desc = node->Var();
bool
flag
=
NodeCanReused
(
*
node
->
Var
());
for
(
auto
*
op
:
node
->
inputs
)
{
if
(
op
->
Op
()
->
HasAttr
(
"force_cpu"
))
{
// op output force generated in cpu, can not be reused.
return
framework
::
AttrReader
(
op
->
Op
()
->
GetAttrMap
())
.
Get
<
bool
>
(
"force_cpu"
)
==
0
;
flag
&=
framework
::
AttrReader
(
op
->
Op
()
->
GetAttrMap
())
.
Get
<
bool
>
(
"force_cpu"
)
==
0
;
}
}
return
flag
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
if
(
node
.
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
node
.
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
.
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
return
true
;
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
0a63234c
...
...
@@ -86,12 +86,18 @@ class OrderedNodeList {
// valid a tensor can be reuse or not
bool
NodeCanReused
(
ir
::
Node
*
node
);
// valid a tensor can be reuse or not.
bool
NodeCanReused
(
const
VarDesc
&
node
);
// check op has subblock or not
bool
OpHasSubBlock
(
OpDesc
*
desc
);
// node memory size in bytes
size_t
NodeSizeInBytes
(
ir
::
Node
*
n
);
// node memory size in bytes
size_t
NodeSizeInBytes
(
const
VarDesc
&
);
std
::
string
DebugString
(
ir
::
Node
*
var
);
VarDesc
*
FindVarDescInBlock
(
ir
::
Node
*
n
);
...
...
paddle/fluid/framework/inplace_op_inference.h
浏览文件 @
0a63234c
...
...
@@ -19,6 +19,7 @@
#include <unordered_map>
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/type_defs.h"
...
...
@@ -66,30 +67,9 @@ class InplaceInToOut : public InplaceOpInference {
const
OpDesc
&
op_desc
,
BlockDesc
*
block
)
const
=
0
;
bool
TryInplaceInputOutput
(
const
VarDesc
&
in
,
const
VarDesc
&
out
)
const
{
auto
var_can_reused
=
[
&
](
const
VarDesc
&
node
)
->
bool
{
auto
type
=
node
.
GetType
();
if
(
node
.
Persistable
()
||
type
!=
proto
::
VarType
::
LOD_TENSOR
||
node
.
GetShape
().
empty
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
std
::
string
name
=
node
.
Name
();
if
(
!
name
.
empty
()
&&
name
[
0
]
==
'@'
&&
name
[
name
.
size
()
-
1
]
==
'@'
)
return
false
;
return
true
;
};
auto
var_size_in_bytes
=
[
&
](
const
VarDesc
&
node
)
->
size_t
{
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
size_t
type_size
=
SizeOfType
(
node
.
GetDataType
());
return
type_size
*
std
::
abs
(
size
);
};
return
in
.
Name
()
!=
out
.
Name
()
&&
var_can_reused
(
in
)
&&
var_can_reused
(
out
)
&&
var_size_in_bytes
(
out
)
<=
var_size_in_bytes
(
in
);
return
in
.
Name
()
!=
out
.
Name
()
&&
details
::
NodeCanReused
(
in
)
&&
details
::
NodeCanReused
(
out
)
&&
details
::
NodeSizeInBytes
(
out
)
<=
details
::
NodeSizeInBytes
(
in
);
}
};
...
...
python/paddle/fluid/compiler.py
浏览文件 @
0a63234c
...
...
@@ -174,6 +174,11 @@ class CompiledProgram(object):
self
.
_exec_strategy
.
num_threads
=
cpu_num
*
2
trainers_endpoints
=
self
.
_program
.
_trainers_endpoints
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
trainers_endpoints
),
"num_trainers == len(end_points)"
...
...
python/paddle/fluid/framework.py
浏览文件 @
0a63234c
...
...
@@ -1725,18 +1725,19 @@ class Program(object):
self
.
_trainers_endpoints
=
[]
# the distributed lookup table names
self
.
_distributed_lookup_table
=
None
# @deprecated(the python memory optimize transpiler is deprecated)
# whether the program is optimized by memory_optimize_transpiler
self
.
__is_optimized
=
False
self
.
__is_
mem_
optimized
=
False
@
property
def
_is_optimized
(
self
):
def
_is_
mem_
optimized
(
self
):
# if the program is optimized, operator input/outputs
# maybe same, which conflict with save_inference_model.
return
self
.
__is_optimized
return
self
.
__is_
mem_
optimized
@
_is_optimized
.
setter
def
_is_optimized
(
self
,
target
):
self
.
__is_optimized
=
target
@
_is_
mem_
optimized
.
setter
def
_is_
mem_
optimized
(
self
,
target
):
self
.
__is_
mem_
optimized
=
target
@
property
def
op_role
(
self
):
...
...
python/paddle/fluid/io.py
浏览文件 @
0a63234c
...
...
@@ -931,7 +931,7 @@ def save_inference_model(dirname,
if
main_program
is
None
:
main_program
=
default_main_program
()
if
main_program
.
_is_optimized
:
if
main_program
.
_is_
mem_
optimized
:
warnings
.
warn
(
"save_inference_model must put before you call memory_optimize.
\
the memory_optimize will modify the original program,
\
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
0a63234c
...
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
build_strategy
.
enable_inplace
=
False
if
main
.
_is_optimized
else
True
build_strategy
.
enable_inplace
=
False
if
main
.
_is_
mem_
optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
...
...
python/paddle/fluid/tests/unittests/test_inference_model_io.py
浏览文件 @
0a63234c
...
...
@@ -108,7 +108,7 @@ class TestSaveInferenceModel(unittest.TestCase):
exe
.
run
(
init_program
,
feed
=
{},
fetch_list
=
[])
memory_optimize
(
program
,
print_log
=
True
)
self
.
assertEqual
(
program
.
_is_optimized
,
True
)
self
.
assertEqual
(
program
.
_is_
mem_
optimized
,
True
)
# will print warning message
save_inference_model
(
MODEL_DIR
,
[
"x"
,
"y"
],
[
avg_cost
],
exe
,
program
)
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
0a63234c
...
...
@@ -540,7 +540,7 @@ def memory_optimize(input_program,
if
skip_opt_set
is
not
None
:
skip_opt_set
=
set
(
map
(
to_name_str
,
skip_opt_set
))
cfgs
=
_get_cfgs
(
input_program
)
input_program
.
_is_optimized
=
True
input_program
.
_is_
mem_
optimized
=
True
for
cfg
in
cfgs
:
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
...
...
@@ -560,6 +560,6 @@ def release_memory(input_program, skip_opt_set=None):
None
"""
cfgs
=
_get_cfgs
(
input_program
)
input_program
.
_is_optimized
=
True
input_program
.
_is_
mem_
optimized
=
True
for
cfg
in
cfgs
:
cfg
.
release_memory
(
skip_opt_set
=
skip_opt_set
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录