Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d79d2f68
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d79d2f68
编写于
2月 19, 2019
作者:
C
chengduozh
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_shape_api_doc
test=develop
上级
3ce12b1b
28609b34
变更
55
显示空白变更内容
内联
并排
Showing
55 changed file
with
1952 addition
and
403 deletion
+1952
-403
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+6
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-0
paddle/fluid/framework/details/inplace_op_pass.cc
paddle/fluid/framework/details/inplace_op_pass.cc
+1
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+66
-6
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+2
-0
paddle/fluid/framework/details/memory_optimize_helper_test.cc
...le/fluid/framework/details/memory_optimize_helper_test.cc
+46
-0
paddle/fluid/framework/details/memory_optimize_pass.cc
paddle/fluid/framework/details/memory_optimize_pass.cc
+57
-51
paddle/fluid/framework/inplace_op_inference_test.cc
paddle/fluid/framework/inplace_op_inference_test.cc
+16
-16
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
+7
-3
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+1
-1
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+0
-71
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
...le/fluid/inference/analysis/ir_passes/subgraph_detector.h
+1
-26
paddle/fluid/operators/detection/density_prior_box_op.h
paddle/fluid/operators/detection/density_prior_box_op.h
+6
-7
paddle/fluid/operators/detection/prior_box_op.h
paddle/fluid/operators/detection/prior_box_op.h
+30
-39
paddle/fluid/operators/group_norm_op.cc
paddle/fluid/operators/group_norm_op.cc
+37
-2
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+75
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+132
-1
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+5
-1
paddle/fluid/operators/lstm_op.h
paddle/fluid/operators/lstm_op.h
+4
-0
paddle/fluid/operators/lstmp_op.h
paddle/fluid/operators/lstmp_op.h
+5
-0
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+5
-5
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+17
-13
paddle/fluid/pybind/ir.cc
paddle/fluid/pybind/ir.cc
+32
-26
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-7
python/CMakeLists.txt
python/CMakeLists.txt
+1
-0
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+4
-1
python/paddle/fluid/contrib/int8_inference/README.md
python/paddle/fluid/contrib/int8_inference/README.md
+2
-2
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+377
-17
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+6
-0
python/paddle/fluid/contrib/slim/tests/__init__.py
python/paddle/fluid/contrib/slim/tests/__init__.py
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
+1
-1
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
...on/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/test_factory.py
python/paddle/fluid/contrib/slim/tests/test_factory.py
+1
-1
python/paddle/fluid/contrib/slim/tests/test_graph.py
python/paddle/fluid/contrib/slim/tests/test_graph.py
+80
-0
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+372
-0
python/paddle/fluid/contrib/tests/CMakeLists.txt
python/paddle/fluid/contrib/tests/CMakeLists.txt
+5
-1
python/paddle/fluid/contrib/tests/test_calibration.py
python/paddle/fluid/contrib/tests/test_calibration.py
+0
-4
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+5
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+222
-24
python/paddle/fluid/imperative/layers.py
python/paddle/fluid/imperative/layers.py
+94
-19
python/paddle/fluid/imperative/nn.py
python/paddle/fluid/imperative/nn.py
+0
-6
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+45
-8
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+5
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-0
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_base_layer.py
python/paddle/fluid/tests/unittests/test_base_layer.py
+82
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+10
-0
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+4
-0
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+12
-0
python/paddle/fluid/tests/unittests/test_imperative_gan.py
python/paddle/fluid/tests/unittests/test_imperative_gan.py
+0
-7
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+0
-16
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
...on/paddle/fluid/tests/unittests/test_imperative_resnet.py
+12
-14
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
...id/tests/unittests/test_ir_memory_optimize_transformer.py
+48
-0
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
d79d2f68
...
@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
...
@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper
)
if
(
WITH_GPU
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info
)
else
()
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info
)
endif
()
cc_library
(
memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass
)
cc_library
(
memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass
)
cc_library
(
inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info
)
cc_library
(
inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info
)
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
d79d2f68
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue
;
continue
;
}
}
}
}
VLOG
(
3
)
<<
"Start Apply Pass "
<<
pass
->
Type
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
VLOG
(
3
)
<<
"Finish Apply Pass "
<<
pass
->
Type
();
}
}
return
graph
;
return
graph
;
}
}
...
...
paddle/fluid/framework/details/inplace_op_pass.cc
浏览文件 @
d79d2f68
...
@@ -49,7 +49,7 @@ DEFINE_bool(
...
@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced."
"If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op."
"If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add"
"Such as scale, elementwise_add"
"By default, it's turned o
n
"
);
"By default, it's turned o
ff
"
);
DECLARE_string
(
memory_optimize_debug
);
DECLARE_string
(
memory_optimize_debug
);
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
d79d2f68
...
@@ -13,13 +13,19 @@
...
@@ -13,13 +13,19 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <algorithm>
#include <deque>
#include <deque>
#include <functional>
#include <functional>
#include <i
ostream
>
#include <i
terator
>
#include <numeric>
#include <numeric>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -166,6 +172,11 @@ struct NodeComparator {
...
@@ -166,6 +172,11 @@ struct NodeComparator {
bool
operator
()(
ir
::
Node
*
lhs
,
ir
::
Node
*
rhs
)
const
{
bool
operator
()(
ir
::
Node
*
lhs
,
ir
::
Node
*
rhs
)
const
{
auto
*
lhs_desc
=
FindVarDescInBlock
(
lhs
);
auto
*
lhs_desc
=
FindVarDescInBlock
(
lhs
);
auto
*
rhs_desc
=
FindVarDescInBlock
(
rhs
);
auto
*
rhs_desc
=
FindVarDescInBlock
(
rhs
);
// match data type
if
(
lhs_desc
->
GetDataType
()
!=
rhs_desc
->
GetDataType
())
{
return
false
;
}
// match shape
auto
lhs_shape
=
lhs_desc
->
GetShape
();
auto
lhs_shape
=
lhs_desc
->
GetShape
();
auto
rhs_shape
=
rhs_desc
->
GetShape
();
auto
rhs_shape
=
rhs_desc
->
GetShape
();
if
((
lhs_shape
[
0
]
==
-
1
&&
rhs_shape
[
0
]
==
-
1
)
||
if
((
lhs_shape
[
0
]
==
-
1
&&
rhs_shape
[
0
]
==
-
1
)
||
...
@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
...
@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
return
found_node
;
return
found_node
;
}
}
ir
::
Node
*
OrderedSet
::
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
{
ir
::
Node
*
found_node
=
nullptr
;
NodeComparator
functor
;
auto
it
=
std
::
find_if
(
nodes_
.
begin
(),
nodes_
.
end
(),
[
&
](
const
NodeVector
&
v
)
{
if
(
v
.
front
()
==
prev
)
return
true
;
else
return
false
;
});
PADDLE_ENFORCE
(
it
!=
nodes_
.
end
(),
"Not found previous in node list!"
);
for
(
it
=
std
::
next
(
it
);
it
!=
nodes_
.
end
();
++
it
)
{
auto
&
candidate
=
it
->
front
();
if
(
functor
(
var
,
candidate
))
{
found_node
=
candidate
;
break
;
}
}
return
found_node
;
}
bool
OrderedSet
::
Has
(
ir
::
Node
*
var
)
const
{
bool
OrderedSet
::
Has
(
ir
::
Node
*
var
)
const
{
if
(
mark_table_
.
count
(
var
->
Name
()))
{
if
(
mark_table_
.
count
(
var
->
Name
()))
{
auto
&
node_in_samename
=
mark_table_
.
at
(
var
->
Name
());
auto
&
node_in_samename
=
mark_table_
.
at
(
var
->
Name
());
...
@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
...
@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return
false
;
return
false
;
}
}
void
OrderedSet
::
Erase
(
const
std
::
string
&
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
));
nodes_
.
erase
(
mark_table_
[
var
]);
mark_table_
.
erase
(
var
);
}
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
->
Name
()));
PADDLE_ENFORCE
(
var
!=
nullptr
);
nodes_
.
erase
(
mark_table_
[
var
->
Name
()]);
Erase
(
var
->
Name
());
mark_table_
.
erase
(
var
->
Name
());
}
}
std
::
string
OrderedSet
::
ToString
()
const
{
std
::
string
OrderedSet
::
ToString
()
const
{
...
@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
...
@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
return
flag
;
return
flag
;
}
}
int
MinChunkSize
()
{
int
size
{
0
};
#ifdef PADDLE_WITH_CUDA
size
=
platform
::
GpuMinChunkSize
();
#else
size
=
platform
::
CpuMinChunkSize
();
#endif // PADDLE_WITH_CUDA
return
size
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
auto
type
=
node
.
GetType
();
// only these types holds bulk of gpu memory
if
(
!
(
type
==
proto
::
VarType
::
LOD_TENSOR
||
if
(
!
(
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
return
false
;
return
false
;
}
}
if
(
node
.
Persistable
()
||
node
.
GetShape
().
empty
())
{
// persistable variable is parameter
if
(
node
.
Persistable
())
{
return
false
;
}
// shape < min_chunk_size is meaningless.
// further more, fetched loss always has size = 1
// which should not be reused.
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
abs
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
if
(
shape
.
empty
()
||
size
<
MinChunkSize
())
{
return
false
;
return
false
;
}
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
...
@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
...
@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for
(
auto
*
node
:
ops_
)
{
for
(
auto
*
node
:
ops_
)
{
if
(
node
==
op
)
break
;
if
(
node
==
op
)
break
;
for
(
auto
&
output
:
node
->
outputs
)
{
for
(
auto
&
output
:
node
->
outputs
)
{
if
(
output
->
Name
()
==
name
)
{
PADDLE_ENFORCE
((
output
!=
nullptr
&&
output
->
IsVar
()),
"Output is empty!"
);
if
(
output
->
Var
()
&&
output
->
Name
()
==
name
)
{
found_node
=
output
;
found_node
=
output
;
}
}
}
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
d79d2f68
...
@@ -55,6 +55,7 @@ class OrderedSet {
...
@@ -55,6 +55,7 @@ class OrderedSet {
void
Insert
(
ir
::
Node
*
var
);
void
Insert
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
const
std
::
string
&
var
);
bool
Has
(
ir
::
Node
*
var
)
const
;
bool
Has
(
ir
::
Node
*
var
)
const
;
void
Clear
()
{
void
Clear
()
{
mark_table_
.
clear
();
mark_table_
.
clear
();
...
@@ -62,6 +63,7 @@ class OrderedSet {
...
@@ -62,6 +63,7 @@ class OrderedSet {
}
}
// find the bestfit shape node block with var.
// find the bestfit shape node block with var.
ir
::
Node
*
FindBestFitNode
(
ir
::
Node
*
var
)
const
;
ir
::
Node
*
FindBestFitNode
(
ir
::
Node
*
var
)
const
;
ir
::
Node
*
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
;
// map store non-const iterator, can not promise const
// map store non-const iterator, can not promise const
int
GetNodeIndexInPool
(
ir
::
Node
*
var
);
int
GetNodeIndexInPool
(
ir
::
Node
*
var
);
// pool all node to string
// pool all node to string
...
...
paddle/fluid/framework/details/memory_optimize_helper_test.cc
浏览文件 @
d79d2f68
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
}
}
}
}
TEST
(
OrderedSet
,
FindBestFitNode
)
{
OrderedSet
pool
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
ProgramDesc
prog
;
BlockDesc
*
block_desc
=
prog
.
MutableBlock
(
0
);
auto
*
op_desc
=
block_desc
->
AppendOp
();
op_desc
->
SetType
(
"dummy"
);
std
::
unique_ptr
<
ir
::
Node
>
op
=
ir
::
CreateNodeForTest
(
op_desc
);
{
auto
desc
=
block_desc
->
Var
(
"a"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"b"
);
desc
->
SetShape
({
128
,
129
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"c"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
for
(
auto
&
node
:
nodes
)
{
pool
.
Insert
(
node
.
get
());
}
// FindNextBestFitNode
auto
*
n
=
nodes
[
0
].
get
();
auto
*
cache
=
pool
.
FindBestFitNode
(
n
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"a"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"c"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"b"
);
}
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/framework/details/memory_optimize_pass.cc
浏览文件 @
d79d2f68
...
@@ -69,11 +69,20 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
...
@@ -69,11 +69,20 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
}
}
for
(
auto
&
var
:
op
->
outputs
)
{
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
!
NodeCanReused
(
var
)
||
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
||
if
(
var
->
IsVar
()
&&
!
var
->
IsCtrlVar
()
&&
skip_set_
.
count
(
var
->
Name
()))
{
skip_set_
.
count
(
var
->
Name
()))
VLOG
(
3
)
<<
"Skip set contains variable of "
<<
var
->
Name
()
<<
"disable reuse on it. skipped"
;
continue
;
continue
;
}
if
(
NodeCanReused
(
var
)
&&
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
)
{
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
while
(
cache
!=
nullptr
&&
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused. "
<<
cache
->
Name
()
<<
" is re-filled to the pool after "
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
cache
=
pool_
.
FindNextBestFitNode
(
var
,
cache
);
}
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
<<
op
->
Name
();
...
@@ -82,42 +91,37 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
...
@@ -82,42 +91,37 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
}
if
(
cache
==
nullptr
)
continue
;
if
(
cache
!=
nullptr
)
{
if
(
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused."
<<
var
->
Name
()
<<
" is re-filled to the pool after"
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
continue
;
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// NOTE(dzhwinter): update the ProgramDesc/IR Graph
// and the CFG Graph on the fly.
//
// IR Graph define the dependence relationship between nodes.
//
// ProgramDesc defines the input/output vars. Its used in
// CreateOp, CreateVar when running happens.
//
// CFG Graph store the liveness information, when reuse happens
// we also need to update the variable liveness.
const
std
::
string
var_name
=
var
->
Name
();
const
std
::
string
cache_name
=
cache
->
Name
();
// update CFG Graph on the fly.
cfg_
->
RenameVarInCFGGraph
(
var_name
,
cache_name
,
idx
);
// reused var maybe re-fill into the pool
RenameVarInGraphDesc
(
var_name
,
cache_name
,
idx
);
cfg_
->
RenameVarInCFGGraph
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var_name
,
cache_name
,
idx
,
graph
.
get
());
// NOTE(dzhwinter): we need to both update the ProgramDesc
pool_
.
Erase
(
cache_name
);
// and IR Graph. because op_desc/var_desc is used in CreateOp,
}
// CreateVar when running happens. But IR Graph
}
// define the dependence relationship between nodes.
RenameVarInGraphDesc
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var
->
Name
(),
cache
->
Name
(),
idx
,
graph
.
get
());
pool_
.
Erase
(
cache
);
}
}
// fill the pool
// fill the pool
std
::
unordered_set
<
std
::
string
>
unlived_vars
;
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
unlived_vars
.
emplace
(
var
);
}
}
for
(
auto
var
:
unlived_vars
)
{
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
if
(
var_node
==
nullptr
||
var_node
->
IsCtrlVar
())
continue
;
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
pool_
.
Insert
(
var_node
);
pool_
.
Insert
(
var_node
);
}
}
...
@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
...
@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var
// redirect the input to the latest version of cache_var
for
(
auto
*
node
:
op
->
inputs
)
{
for
(
auto
*
node
:
op
->
inputs
)
{
if
(
node
->
Name
()
==
var
)
{
if
(
node
->
Name
()
==
var
)
{
ir
::
Node
*
cache_node
=
graph
->
CreateVarNode
(
var_desc
.
get
());
ir
::
Node
*
cache_node
=
var_nodes_
[
cache_var
].
back
();
var_nodes_
[
cache_var
].
emplace_back
(
cache_node
);
// swap node to cache_node
// swap node to cache_node
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
...
@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
...
@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto
*
prev_op
=
node
->
inputs
[
0
];
auto
*
prev_op
=
node
->
inputs
[
0
];
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
cache_node
);
cache_node
);
cache_node
->
inputs
.
emplace_back
(
prev_op
);
for
(
auto
*
next_op
:
node
->
outputs
)
{
for
(
auto
*
next_op
:
node
->
outputs
)
{
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
cache_node
);
}
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
}
}
...
@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
...
@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
cache_node
);
}
}
}
}
}
// release node of unused var in graph
// erase unused node
for
(
auto
*
node
:
var_nodes_
[
var
])
{
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
graph
->
RemoveNode
(
node
);
}
}
var_nodes_
.
at
(
var
).
clear
();
}
}
}
}
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/inplace_op_inference_test.cc
浏览文件 @
d79d2f68
...
@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
...
@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
op
->
SetOutput
(
"Out"
,
{
"test2_out"
});
op
->
SetOutput
(
"Out"
,
{
"test2_out"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
,
128
,
128
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
128
,
128
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
...
@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
op
->
SetOutput
(
GradVarName
(
"X"
),
{
"test2_a"
,
"test2_b"
,
"test2_c"
});
op
->
SetOutput
(
GradVarName
(
"X"
),
{
"test2_a"
,
"test2_b"
,
"test2_c"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
...
@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
...
@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
浏览文件 @
d79d2f68
...
@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
...
@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
->
assert_is_op
(
"scale"
)
->
assert_is_op
(
"scale"
)
->
assert_op_attr
<
float
>
(
"scale"
,
1.
)
->
assert_op_attr
<
float
>
(
"scale"
,
1.
)
->
assert_op_attr
<
float
>
(
"bias"
,
0.
);
->
assert_op_attr
<
float
>
(
"bias"
,
0.
);
auto
scale_out
=
detector
.
mutable_pattern
()
auto
scale_out
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_out"
)
->
NewNode
(
"scale_out"
)
->
assert_is_op_output
(
"scale"
);
->
assert_is_op_output
(
"scale"
)
// scale's output var should has only one consumer, or it can't be
// removed.
->
assert_more
([](
Node
*
x
)
{
return
x
->
outputs
.
size
()
==
1UL
;
});
pre_op
->
LinksTo
({
scale_in
});
pre_op
->
LinksTo
({
scale_in
});
scale_op
->
LinksFrom
({
scale_in
}).
LinksTo
({
scale_out
});
scale_op
->
LinksFrom
({
scale_in
}).
LinksTo
({
scale_out
});
...
...
paddle/fluid/imperative/layer.cc
浏览文件 @
d79d2f68
...
@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() {
...
@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() {
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
OpBase
::
ApplyGrad
()
{
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
OpBase
::
ApplyGrad
()
{
if
(
grad_op_descs_
.
empty
()
&&
backward_id_
<=
0
)
{
if
(
grad_op_descs_
.
empty
()
&&
backward_id_
<=
0
)
{
LOG
(
WARNING
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
VLOG
(
3
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
return
{};
return
{};
}
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
浏览文件 @
d79d2f68
...
@@ -460,77 +460,6 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
...
@@ -460,77 +460,6 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
return
node
.
inputs
.
size
()
==
n
;
return
node
.
inputs
.
size
()
==
n
;
}
}
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
Agent
(
p
).
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inputs
.
begin
(),
p
->
inputs
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
->
bool
{
return
visited
.
count
(
x
)
!=
0
;
});
if
(
inlink_visited
.
size
()
==
p
->
inputs
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outputs
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
NodesTSIterator
::
NodesTSIterator
(
const
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
NodesTSIterator
&
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
NodesTSIterator
&
NodesTSIterator
::
operator
=
(
const
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
NodesTSIterator
::
operator
==
(
const
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
浏览文件 @
d79d2f68
...
@@ -30,6 +30,7 @@ namespace inference {
...
@@ -30,6 +30,7 @@ namespace inference {
namespace
analysis
{
namespace
analysis
{
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
NodesTSIterator
;
const
char
kIsFunctionNode
[]
=
"__is_function_node__"
;
const
char
kIsFunctionNode
[]
=
"__is_function_node__"
;
const
char
kFunctionNodeSubGraph
[]
=
"__function_node_sub_graph__"
;
const
char
kFunctionNodeSubGraph
[]
=
"__function_node_sub_graph__"
;
...
@@ -132,32 +133,6 @@ struct Agent {
...
@@ -132,32 +133,6 @@ struct Agent {
framework
::
ir
::
Node
*
x_
;
framework
::
ir
::
Node
*
x_
;
};
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
framework
::
ir
::
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
framework
::
ir
::
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
framework
::
ir
::
Node
*
operator
->
();
private:
std
::
vector
<
framework
::
ir
::
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
// The nodes those have no input will be treated as start points.
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
framework
::
ir
::
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
static
std
::
vector
<
framework
::
ir
::
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
result
;
std
::
vector
<
framework
::
ir
::
Node
*>
result
;
...
...
paddle/fluid/operators/detection/density_prior_box_op.h
浏览文件 @
d79d2f68
...
@@ -72,7 +72,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -72,7 +72,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#pragma omp parallel for
#endif
#endif
for
(
in
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
sqrt_fixed_ratios
.
push_back
(
sqrt
(
fixed_ratios
[
i
]));
sqrt_fixed_ratios
.
push_back
(
sqrt
(
fixed_ratios
[
i
]));
}
}
...
@@ -115,11 +115,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -115,11 +115,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
}
}
}
}
if
(
clip
)
{
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
T
*
dt
=
boxes
->
data
<
T
>
();
ClipFunctor
<
T
>
clip_func
;
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
});
boxes
->
data
<
T
>
(),
clip_func
);
}
}
framework
::
Tensor
var_t
;
framework
::
Tensor
var_t
;
var_t
.
mutable_data
<
T
>
(
var_t
.
mutable_data
<
T
>
(
...
@@ -141,7 +140,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -141,7 +140,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
#endif
#endif
for
(
int
i
=
0
;
i
<
box_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
box_num
;
++
i
)
{
for
(
in
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
e_vars
(
i
,
j
)
=
variances
[
j
];
e_vars
(
i
,
j
)
=
variances
[
j
];
}
}
}
}
...
...
paddle/fluid/operators/detection/prior_box_op.h
浏览文件 @
d79d2f68
...
@@ -46,13 +46,6 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
...
@@ -46,13 +46,6 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
}
}
}
}
template
<
typename
T
>
struct
ClipFunctor
{
HOSTDEVICE
inline
T
operator
()(
T
in
)
const
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
in
,
0.
),
1.
);
}
};
template
<
typename
T
>
template
<
typename
T
>
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -101,31 +94,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -101,31 +94,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
e_boxes
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
boxes
);
T
*
b_t
=
boxes
->
data
<
T
>
(
);
for
(
int
h
=
0
;
h
<
feature_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
feature_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
feature_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
feature_width
;
++
w
)
{
T
center_x
=
(
w
+
offset
)
*
step_width
;
T
center_x
=
(
w
+
offset
)
*
step_width
;
T
center_y
=
(
h
+
offset
)
*
step_height
;
T
center_y
=
(
h
+
offset
)
*
step_height
;
T
box_width
,
box_height
;
T
box_width
,
box_height
;
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
auto
min_size
=
min_sizes
[
s
];
if
(
min_max_aspect_ratios_order
)
{
if
(
min_max_aspect_ratios_order
)
{
box_width
=
box_height
=
min_size
/
2.
;
box_width
=
box_height
=
min_size
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
+=
4
;
if
(
max_sizes
.
size
()
>
0
)
{
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
+=
4
;
}
}
// priors with different aspect ratios
// priors with different aspect ratios
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
...
@@ -135,11 +127,11 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -135,11 +127,11 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
}
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
+=
4
;
}
}
}
else
{
}
else
{
// priors with different aspect ratios
// priors with different aspect ratios
...
@@ -147,21 +139,21 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -147,21 +139,21 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
float
ar
=
aspect_ratios
[
r
];
float
ar
=
aspect_ratios
[
r
];
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
+=
4
;
}
}
if
(
max_sizes
.
size
()
>
0
)
{
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
+=
4
;
}
}
}
}
}
}
...
@@ -169,11 +161,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -169,11 +161,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
}
if
(
clip
)
{
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
T
*
dt
=
boxes
->
data
<
T
>
();
ClipFunctor
<
T
>
clip_func
;
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
});
boxes
->
data
<
T
>
(),
clip_func
);
}
}
framework
::
Tensor
var_t
;
framework
::
Tensor
var_t
;
...
...
paddle/fluid/operators/group_norm_op.cc
浏览文件 @
d79d2f68
...
@@ -170,13 +170,48 @@ class GroupNormGradMaker : public framework::SingleGradOpDescMaker {
...
@@ -170,13 +170,48 @@ class GroupNormGradMaker : public framework::SingleGradOpDescMaker {
}
}
};
};
class
GroupNormInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
"X"
,
"Y"
}};
}
};
class
GroupNormGradInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
framework
::
GradVarName
(
"Y"
),
framework
::
GradVarName
(
"X"
)}};
}
};
class
GroupNormOpInferVarType
:
public
framework
::
PassInDtypeAndVarTypeToOutput
{
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
GetInputOutputWithSameType
()
const
override
{
return
{{
"X"
,
/*->*/
"Y"
}};
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
group_norm
,
ops
::
GroupNormOp
,
ops
::
GroupNormOpMaker
,
REGISTER_OPERATOR
(
group_norm
,
ops
::
GroupNormOp
,
ops
::
GroupNormOpMaker
,
ops
::
GroupNormGradMaker
);
ops
::
GroupNormOpInferVarType
,
ops
::
GroupNormGradMaker
,
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
);
ops
::
GroupNormInplaceInToOut
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
,
ops
::
GroupNormGradInplaceInToOut
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
d79d2f68
...
@@ -339,6 +339,71 @@ void BenchSoftmaxKernel() {
...
@@ -339,6 +339,71 @@ void BenchSoftmaxKernel() {
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchLayerNormKernel
()
{
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
int
sz
=
left
*
right
;
Tensor
x
,
mean
,
var
,
scale
,
bias
,
out
;
x
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
out
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
mean
.
Resize
({
n
,
x_dim_0
});
var
.
Resize
({
n
,
x_dim_0
});
scale
.
Resize
({
x_dim_1
});
bias
.
Resize
({
x_dim_1
});
RandomVec
<
T
>
(
sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
<
T
>
();
const
T
*
bias_data
=
bias
.
data
<
T
>
();
T
*
x_data
=
x
.
data
<
T
>
();
T
*
mean_data
=
mean
.
data
<
T
>
();
T
*
var_data
=
var
.
data
<
T
>
();
T
*
out_data
=
out
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
>
(
right
,
x_data
,
out_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchCRFDecodingKernel
()
{
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
Tensor
x
,
w
,
alpha
,
track
;
x
.
Resize
({
seq_len
,
tag_num
});
w
.
Resize
({
tag_num
+
state_trans_base_idx
,
tag_num
});
alpha
.
Resize
({
seq_len
,
tag_num
});
track
.
Resize
({
seq_len
,
tag_num
});
RandomVec
<
T
>
(
x_sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
w_data
=
w
.
data
<
T
>
();
T
*
alpha_data
=
alpha
.
mutable_data
<
T
>
(
PlaceType
());
int
*
track_data
=
track
.
mutable_data
<
int
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
>
(
tag_num
,
seq_len
,
x_data
,
w_data
,
alpha_data
,
track_data
,
tag_num
);
}
}
}
using
T
=
float
;
using
T
=
float
;
using
CPUPlace
=
paddle
::
platform
::
CPUPlace
;
using
CPUPlace
=
paddle
::
platform
::
CPUPlace
;
...
@@ -382,6 +447,16 @@ BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
...
@@ -382,6 +447,16 @@ BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
// softmax
// softmax
BENCH_FP32_CPU
(
kSoftmax
)
{
BenchSoftmaxKernel
<
jit
::
kSoftmax
,
T
,
CPUPlace
>
();
}
BENCH_FP32_CPU
(
kSoftmax
)
{
BenchSoftmaxKernel
<
jit
::
kSoftmax
,
T
,
CPUPlace
>
();
}
// layernorm
BENCH_FP32_CPU
(
kLayerNorm
)
{
BenchLayerNormKernel
<
jit
::
kLayerNorm
,
T
,
CPUPlace
>
();
}
// crfdecoding
BENCH_FP32_CPU
(
kCRFDecoding
)
{
BenchCRFDecodingKernel
<
jit
::
kCRFDecoding
,
T
,
CPUPlace
>
();
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// To use this tool, run command: ./benchmark [options...]
// Options:
// Options:
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
d79d2f68
...
@@ -292,6 +292,63 @@ struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
...
@@ -292,6 +292,63 @@ struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
}
}
};
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
LayerNormTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
,
int
>
{
void
operator
()(
const
typename
jit
::
LayerNormTuples
<
T
>::
func_type
tgt
,
std
::
vector
<
T
>&
x
,
std
::
vector
<
T
>&
outref
,
// NOLINT
std
::
vector
<
T
>&
mean
,
std
::
vector
<
T
>&
var
,
// NOLINT
const
std
::
vector
<
T
>&
scale
,
const
std
::
vector
<
T
>&
bias
,
int
left
,
const
float
epsilon
,
int
right
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
outref
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
mean
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
var
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
scale
.
size
(),
static_cast
<
size_t
>
(
right
));
EXPECT_EQ
(
bias
.
size
(),
static_cast
<
size_t
>
(
right
));
std
::
vector
<
T
>
outtgt
(
outref
.
size
());
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
T
*
outtgt_data
=
outtgt
.
data
();
tgt
(
x_data
,
outtgt_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
ExpectEQ
<
T
>
(
outtgt_data
,
outref_data
,
left
*
right
);
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
CRFDecodingTuples
<
T
>
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
{
void
operator
()(
const
typename
jit
::
CRFDecodingTuples
<
T
>::
func_type
tgt
,
const
int
seq_len
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
w
,
std
::
vector
<
T
>&
alpharef
,
// NOLINT
std
::
vector
<
int
>&
trackref
,
int
tag_num
)
{
// NOLINT
constexpr
int
state_trans_base_idx
=
2
;
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
w
.
size
(),
static_cast
<
size_t
>
((
tag_num
+
state_trans_base_idx
)
*
tag_num
));
EXPECT_EQ
(
alpharef
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
trackref
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
std
::
vector
<
T
>
alphatgt
(
alpharef
.
size
());
std
::
vector
<
int
>
tracktgt
(
trackref
.
size
());
memcpy
(
trackref
.
data
(),
tracktgt
.
data
(),
tag_num
*
sizeof
(
int
));
tgt
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alphatgt
.
data
(),
tracktgt
.
data
(),
tag_num
);
ExpectEQ
<
T
>
(
alpharef
.
data
(),
alphatgt
.
data
(),
seq_len
*
tag_num
);
ExpectEQ
<
int
>
(
trackref
.
data
(),
tracktgt
.
data
(),
seq_len
*
tag_num
);
}
};
template
<
jit
::
KernelType
KT
,
typename
KernelTuples
,
typename
PlaceType
,
template
<
jit
::
KernelType
KT
,
typename
KernelTuples
,
typename
PlaceType
,
typename
...
Args
>
typename
...
Args
>
void
TestAllImpls
(
const
typename
KernelTuples
::
attr_type
&
attr
,
Args
...
args
)
{
void
TestAllImpls
(
const
typename
KernelTuples
::
attr_type
&
attr
,
Args
...
args
)
{
...
@@ -640,6 +697,71 @@ void TestNCHW16CMulNCKernel() {
...
@@ -640,6 +697,71 @@ void TestNCHW16CMulNCKernel() {
}
}
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestLayerNormKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
LayerNormTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
sz
=
left
*
right
;
std
::
vector
<
T
>
x
(
sz
),
mean
(
left
),
var
(
left
),
scale
(
right
),
bias
(
right
),
outref
(
sz
);
RandomVec
<
T
>
(
sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
ref
(
x_data
,
outref_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
TestAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
>
(
right
,
x
,
outref
,
mean
,
var
,
scale
,
bias
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestCRFDecodingKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
CRFDecodingTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
std
::
vector
<
T
>
x
(
x_sz
),
w
(
w_sz
),
alpharef
(
x_sz
);
std
::
vector
<
int
>
trackref
(
x_sz
);
RandomVec
<
T
>
(
x_sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
data
(),
-
2.
f
,
2.
f
);
ref
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alpharef
.
data
(),
trackref
.
data
(),
tag_num
);
TestAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
(
tag_num
,
seq_len
,
x
,
w
,
alpharef
,
trackref
,
tag_num
);
}
}
}
// XYZNTuple
// XYZNTuple
TEST
(
JITKernel
,
kVMul
)
{
TEST
(
JITKernel
,
kVMul
)
{
TestXYZNKernel
<
jit
::
kVMul
,
float
,
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVMul
,
float
,
CPUPlace
>
();
...
@@ -761,7 +883,16 @@ TEST(JITKernel, kNCHW16CMulNC) {
...
@@ -761,7 +883,16 @@ TEST(JITKernel, kNCHW16CMulNC) {
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
();
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
();
}
}
// TODO(yihua/TJ): add crf decoding and layer norm unit tests
TEST
(
JITKernel
,
kLayerNorm
)
{
TestLayerNormKernel
<
jit
::
kLayerNorm
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLayerNormKernel
<
jit
::
kLayerNorm
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
kCRFDecoding
)
{
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
pool
)
{
TEST
(
JITKernel
,
pool
)
{
// TODO(TJ): add some test
// TODO(TJ): add some test
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
d79d2f68
...
@@ -64,7 +64,7 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -64,7 +64,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
// Error checking
// Error checking
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
buffer
),
"Cannot read more"
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
*
buffer
),
"Cannot read more"
);
// Get data from fin to tensor
// Get data from fin to tensor
DeserializeFromStream
(
*
buffer
,
tensor
,
dev_ctx
);
DeserializeFromStream
(
*
buffer
,
tensor
,
dev_ctx
);
...
@@ -90,6 +90,10 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -90,6 +90,10 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
->
ShareDataWith
(
fp16_tensor
);
tensor
->
ShareDataWith
(
fp16_tensor
);
}
}
}
}
buffer
->
peek
();
PADDLE_ENFORCE
(
buffer
->
eof
(),
"You are not allowed to load partial data via "
"load_combine_op, use load_op instead."
);
}
}
};
};
...
...
paddle/fluid/operators/lstm_op.h
浏览文件 @
d79d2f68
...
@@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
...
@@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
lstm_grad
.
prev_state_grad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
lstm_grad
.
prev_state_grad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
}
}
// lstm_value.output_value not used in bp, set to nullptr
// lstm_grad.state_active_grad not used in bp, set to nullptr
lstm_value
.
output_value
=
nullptr
;
lstm_grad
.
state_active_grad
=
nullptr
;
int
cur_batch_size
=
bend
-
bstart
;
int
cur_batch_size
=
bend
-
bstart
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstm_value
,
lstm_grad
,
frame_size
,
cur_batch_size
,
device_ctx
,
lstm_value
,
lstm_grad
,
frame_size
,
cur_batch_size
,
...
...
paddle/fluid/operators/lstmp_op.h
浏览文件 @
d79d2f68
...
@@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
...
@@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
}
}
int
cur_batch_size
=
bend
-
bstart
;
int
cur_batch_size
=
bend
-
bstart
;
// lstmp_value.output_value not used in bp, set to null
// lstmp_grad.state_active_grad not used in bp, set to null
lstmp_value
.
output_value
=
nullptr
;
lstmp_grad
.
state_active_grad
=
nullptr
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstmp_value
,
lstmp_grad
,
frame_size
,
cur_batch_size
,
device_ctx
,
lstmp_value
,
lstmp_grad
,
frame_size
,
cur_batch_size
,
gate_act
,
cell_act
,
cand_act
);
gate_act
,
cell_act
,
cand_act
);
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
d79d2f68
...
@@ -109,23 +109,23 @@ from future subsequences in a computationally efficient manner to improve
...
@@ -109,23 +109,23 @@ from future subsequences in a computationally efficient manner to improve
unidirectional recurrent neural networks. The row convolution operator is
unidirectional recurrent neural networks. The row convolution operator is
different from the 1D sequence convolution, and is computed as follows:
different from the 1D sequence convolution, and is computed as follows:
Given an input sequence $
in$ of length $t$ and input dimension $d
$,
Given an input sequence $
X$ of length $t$ and input dimension $D
$,
and a filter ($W$) of size $context \times
d$,
and a filter ($W$) of size $context \times
D$,
the output sequence is convolved as:
the output sequence is convolved as:
$$
$$
out_{i
, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :
}
out_{i
} = \\sum_{j=i}^{i + context - 1} X_{j} \\cdot W_{j-i
}
$$
$$
In the above equation:
In the above equation:
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $
\\tau
$: Future context size.
* $
context
$: Future context size.
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $W_{
i-j}$: The (i-j
)-th row of parameters with shape [1, D].
* $W_{
j-i}$: The (j-i
)-th row of parameters with shape [1, D].
More details about row_conv please refer to
More details about row_conv please refer to
the design document
the design document
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
d79d2f68
...
@@ -234,8 +234,10 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
...
@@ -234,8 +234,10 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
#endif // PADDLE_WITH_CUDA
#endif // PADDLE_WITH_CUDA
#define PADDLE_THROW(...) \
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
throw ::paddle::platform::EnforceNotMet( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__)
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
} while (0)
#define PADDLE_ENFORCE(COND, ...) \
#define PADDLE_ENFORCE(COND, ...) \
do { \
do { \
...
@@ -274,19 +276,21 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
...
@@ -274,19 +276,21 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
do { \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
if (UNLIKELY(nullptr == (__VAL))) { \
PADDLE_THROW(#__VAL " should not be null\n%s", \
PADDLE_THROW(#__VAL " should not be null\n%s", \
paddle::string::Sprintf(""
__VA_ARGS__)); \
::paddle::string::Sprintf(
__VA_ARGS__)); \
} \
} \
} while (0)
} while (0)
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
do { \
if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \
auto __cond1__ = (__VAL0); \
auto __cond2__ = (__VAL1); \
if (UNLIKELY(!((__cond1__)__CMP(__cond2__)))) { \
PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
" %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
" %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
#__VAL0, #__VAL1, #__VAL0, \
#__VAL0, #__VAL1, #__VAL0, \
paddle::string::to_string(__VAL0), #__VAL1,
\
::paddle::string::to_string(__cond1__), #__VAL1,
\
paddle::string::to_string(__VAL1),
\
::paddle::string::to_string(__cond2__),
\
paddle::string::Sprintf("" __VA_ARGS__));
\
::paddle::string::Sprintf(__VA_ARGS__));
\
} \
} \
} while (0)
} while (0)
...
...
paddle/fluid/pybind/ir.cc
浏览文件 @
d79d2f68
...
@@ -13,10 +13,12 @@
...
@@ -13,10 +13,12 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/pybind/ir.h"
#include "paddle/fluid/pybind/ir.h"
#include <algorithm>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_desc.h"
...
@@ -27,6 +29,10 @@ namespace py = pybind11;
...
@@ -27,6 +29,10 @@ namespace py = pybind11;
using
paddle
::
framework
::
ir
::
Graph
;
using
paddle
::
framework
::
ir
::
Graph
;
using
paddle
::
framework
::
ir
::
Node
;
using
paddle
::
framework
::
ir
::
Node
;
using
paddle
::
framework
::
ir
::
GraphSafeRemoveNodes
;
using
paddle
::
framework
::
ir
::
GraphSafeRemoveNodes
;
using
paddle
::
framework
::
ir
::
HasCircle
;
using
paddle
::
framework
::
ir
::
GraphNum
;
using
paddle
::
framework
::
ir
::
TopologySortOperations
;
using
paddle
::
framework
::
ir
::
BuildOperationAdjList
;
using
paddle
::
framework
::
OpDesc
;
using
paddle
::
framework
::
OpDesc
;
using
paddle
::
framework
::
ProgramDesc
;
using
paddle
::
framework
::
ProgramDesc
;
using
paddle
::
framework
::
VarDesc
;
using
paddle
::
framework
::
VarDesc
;
...
@@ -36,6 +42,12 @@ namespace paddle {
...
@@ -36,6 +42,12 @@ namespace paddle {
namespace
pybind
{
namespace
pybind
{
void
BindGraph
(
py
::
module
*
m
)
{
void
BindGraph
(
py
::
module
*
m
)
{
m
->
def
(
"graph_safe_remove_nodes"
,
GraphSafeRemoveNodes
);
m
->
def
(
"graph_safe_remove_nodes"
,
GraphSafeRemoveNodes
);
m
->
def
(
"has_circle"
,
HasCircle
);
m
->
def
(
"graph_num"
,
GraphNum
);
m
->
def
(
"topology_sort"
,
TopologySortOperations
,
return_value_policy
::
reference
);
m
->
def
(
"build_adjacency_list"
,
BuildOperationAdjList
,
return_value_policy
::
reference
);
py
::
class_
<
Graph
,
std
::
shared_ptr
<
Graph
>>
(
py
::
class_
<
Graph
,
std
::
shared_ptr
<
Graph
>>
(
*
m
,
"Graph"
,
*
m
,
"Graph"
,
"The graph is a Directed Acyclic Single Static Assignment Graph, see "
"The graph is a Directed Acyclic Single Static Assignment Graph, see "
...
@@ -46,7 +58,6 @@ void BindGraph(py::module *m) {
...
@@ -46,7 +58,6 @@ void BindGraph(py::module *m) {
.
def
(
"get_float"
,
&
Graph
::
Get
<
float
>
)
.
def
(
"get_float"
,
&
Graph
::
Get
<
float
>
)
.
def
(
"get_double"
,
&
Graph
::
Get
<
double
>
)
.
def
(
"get_double"
,
&
Graph
::
Get
<
double
>
)
.
def
(
"get_string"
,
&
Graph
::
Get
<
std
::
string
>
)
.
def
(
"get_string"
,
&
Graph
::
Get
<
std
::
string
>
)
.
def
(
"get_program"
,
&
Graph
::
Get
<
ProgramDesc
>
)
.
def
(
"get_marked_nodes"
,
&
Graph
::
Get
<
std
::
unordered_set
<
const
Node
*>>
)
.
def
(
"get_marked_nodes"
,
&
Graph
::
Get
<
std
::
unordered_set
<
const
Node
*>>
)
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
int
attr
)
{
return
self
.
Set
(
attr_name
,
new
int
(
attr
));
})
int
attr
)
{
return
self
.
Set
(
attr_name
,
new
int
(
attr
));
})
...
@@ -63,11 +74,6 @@ void BindGraph(py::module *m) {
...
@@ -63,11 +74,6 @@ void BindGraph(py::module *m) {
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
double
attr
)
{
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
double
attr
)
{
return
self
.
Set
(
attr_name
,
new
double
(
attr
));
return
self
.
Set
(
attr_name
,
new
double
(
attr
));
})
})
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
ProgramDesc
&
attr
)
{
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
})
.
def
(
"set"
,
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
std
::
unordered_set
<
const
Node
*>
&
attr
)
{
const
std
::
unordered_set
<
const
Node
*>
&
attr
)
{
...
@@ -108,42 +114,42 @@ void BindNode(py::module *m) {
...
@@ -108,42 +114,42 @@ void BindNode(py::module *m) {
.
def
(
"is_op"
,
&
Node
::
IsOp
)
.
def
(
"is_op"
,
&
Node
::
IsOp
)
.
def
(
"is_var"
,
&
Node
::
IsVar
)
.
def
(
"is_var"
,
&
Node
::
IsVar
)
.
def
(
"is_ctrl_var"
,
&
Node
::
IsCtrlVar
)
.
def
(
"is_ctrl_var"
,
&
Node
::
IsCtrlVar
)
.
def
(
"clear_inputs"
,
[](
Node
&
self
)
{
self
.
inputs
.
clear
();
})
.
def
(
"inputs_remove"
,
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
auto
pos
=
std
::
find_if
(
it
++
)
{
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
if
((
*
it
)
->
id
()
==
node_id
)
{
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
self
.
inputs
.
erase
(
it
);
if
(
pos
!=
self
.
inputs
.
end
())
{
}
self
.
inputs
.
erase
(
pos
);
}
}
})
})
.
def
(
"inputs_remove"
,
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
auto
pos
=
it
++
)
{
std
::
find
(
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
&
node
);
if
(
*
it
==
&
node
)
{
if
(
pos
!=
self
.
inputs
.
end
())
{
self
.
inputs
.
erase
(
it
);
self
.
inputs
.
erase
(
pos
);
}
}
}
})
})
.
def
(
"inputs_append"
,
.
def
(
"inputs_append"
,
[](
Node
&
self
,
Node
&
node
)
{
self
.
inputs
.
push_back
(
&
node
);
})
[](
Node
&
self
,
Node
&
node
)
{
self
.
inputs
.
push_back
(
&
node
);
})
.
def
(
"clear_outputs"
,
[](
Node
&
self
)
{
self
.
outputs
.
clear
();
})
.
def
(
"outputs_remove"
,
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
auto
pos
=
std
::
find_if
(
it
++
)
{
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
if
((
*
it
)
->
id
()
==
node_id
)
{
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
self
.
outputs
.
erase
(
it
);
if
(
pos
!=
self
.
outputs
.
end
())
{
}
self
.
outputs
.
erase
(
pos
);
}
}
})
})
.
def
(
"outputs_remove"
,
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
auto
pos
=
it
++
)
{
std
::
find
(
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
&
node
);
if
(
*
it
==
&
node
)
{
if
(
pos
!=
self
.
outputs
.
end
())
{
self
.
outputs
.
erase
(
it
);
self
.
outputs
.
erase
(
pos
);
}
}
}
})
})
.
def
(
"outputs_append"
,
.
def
(
"outputs_append"
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d79d2f68
...
@@ -829,8 +829,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -829,8 +829,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"get_pass"
,
[](
const
py
::
bytes
&
binary_str
)
{
m
.
def
(
"get_pass"
,
[](
const
std
::
string
&
pass_type
)
{
std
::
string
pass_type
(
binary_str
);
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
return
std
::
shared_ptr
<
framework
::
ir
::
Pass
>
(
std
::
move
(
pass
));
return
std
::
shared_ptr
<
framework
::
ir
::
Pass
>
(
std
::
move
(
pass
));
});
});
...
@@ -838,10 +837,9 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -838,10 +837,9 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
pass
.
def
(
py
::
init
())
.
def
(
"has"
,
&
ir
::
Pass
::
Has
)
.
def
(
"has"
,
&
ir
::
Pass
::
Has
)
.
def
(
"set"
,
.
def
(
"set_not_owned"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
ProgramDesc
&
attr
)
{
const
ProgramDesc
&
attr
)
{
self
.
SetNotOwned
<
ProgramDesc
>
(
attr_name
,
&
attr
);
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
})
})
.
def
(
.
def
(
"set"
,
"set"
,
...
@@ -850,7 +848,6 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -850,7 +848,6 @@ All parameter, weight, gradient are variables in Paddle.
})
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"get_program"
,
&
ir
::
Pass
::
Get
<
ProgramDesc
>
)
.
def
(
"type"
,
&
ir
::
Pass
::
Type
)
.
def
(
"type"
,
&
ir
::
Pass
::
Type
)
.
def
(
"apply"
,
[](
ir
::
Pass
&
self
,
std
::
shared_ptr
<
ir
::
Graph
>
graph
)
{
.
def
(
"apply"
,
[](
ir
::
Pass
&
self
,
std
::
shared_ptr
<
ir
::
Graph
>
graph
)
{
std
::
unique_ptr
<
ir
::
Graph
>
origin_graph
(
graph
.
get
());
std
::
unique_ptr
<
ir
::
Graph
>
origin_graph
(
graph
.
get
());
...
...
python/CMakeLists.txt
浏览文件 @
d79d2f68
...
@@ -64,6 +64,7 @@ if (WITH_TESTING)
...
@@ -64,6 +64,7 @@ if (WITH_TESTING)
add_subdirectory
(
paddle/dataset/tests
)
add_subdirectory
(
paddle/dataset/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
add_subdirectory
(
paddle/fluid/contrib/slim/tests
)
endif
()
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/compiler.py
浏览文件 @
d79d2f68
...
@@ -177,7 +177,10 @@ class CompiledProgram(object):
...
@@ -177,7 +177,10 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
memory_optimize
is
None
:
self
.
_build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
enable_inplace
is
None
:
self
.
_build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
assert
self
.
_build_strategy
.
num_trainers
==
len
(
...
...
python/paddle/fluid/contrib/int8_inference/README.md
浏览文件 @
d79d2f68
...
@@ -63,10 +63,10 @@ Notes:
...
@@ -63,10 +63,10 @@ Notes:
## 4. How to reproduce the results
## 4. How to reproduce the results
*
Small dataset
*
Small dataset
```
bash
```
bash
python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
python python/paddle/fluid/contrib/tests/test_calibration.py
```
```
*
Full dataset
*
Full dataset
```
bash
```
bash
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
```
```
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
d79d2f68
...
@@ -13,14 +13,19 @@
...
@@ -13,14 +13,19 @@
# limitations under the License.
# limitations under the License.
import
collections
import
collections
import
numpy
as
np
import
six
from
.....
import
compat
as
cpt
from
....
import
core
from
....
import
core
from
....framework
import
IrGraph
from
....framework
import
IrGraph
from
....framework
import
Program
from
....framework
import
Program
from
....framework
import
Variable
from
....initializer
import
Constant
from
....initializer
import
Constant
from
....
import
unique_name
from
....
import
unique_name
__all__
=
[
'QuantizationTransformPass'
]
__all__
=
[
'QuantizationTransformPass'
,
'QuantizationFreezePass'
,
'ConvertToInt8Pass'
,
'TransformForMobilePass'
]
class
QuantizationTransformPass
(
object
):
class
QuantizationTransformPass
(
object
):
...
@@ -35,7 +40,13 @@ class QuantizationTransformPass(object):
...
@@ -35,7 +40,13 @@ class QuantizationTransformPass(object):
"""
"""
Convert and rewrite the IrGraph according to weight and
Convert and rewrite the IrGraph according to weight and
activation quantization type.
activation quantization type.
Args:
Args:
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to
initialize these new parameters.
program_exe(fluid.Executor): program_exe is used to initialize new
parameters described above.
weight_bits (int): quantization bit number for weights,
weight_bits (int): quantization bit number for weights,
the bias is not quantized.
the bias is not quantized.
activation_bits (int): quantization bit number for activation.
activation_bits (int): quantization bit number for activation.
...
@@ -49,6 +60,7 @@ class QuantizationTransformPass(object):
...
@@ -49,6 +60,7 @@ class QuantizationTransformPass(object):
support 'abs_max'. The 'range_abs_max' usually is not used for
support 'abs_max'. The 'range_abs_max' usually is not used for
weight, since weights are fixed once the model is well trained.
weight, since weights are fixed once the model is well trained.
window_size (int): the window size for 'range_abs_max' quantization.
window_size (int): the window size for 'range_abs_max' quantization.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
# The original graph will be rewrite.
# The original graph will be rewrite.
...
@@ -88,31 +100,35 @@ class QuantizationTransformPass(object):
...
@@ -88,31 +100,35 @@ class QuantizationTransformPass(object):
self
.
_quantizable_grad_ops
=
[
self
.
_quantizable_grad_ops
=
[
'%s_grad'
%
(
op
)
for
op
in
self
.
_quantizable_ops
'%s_grad'
%
(
op
)
for
op
in
self
.
_quantizable_ops
]
]
self
.
_fake_quant_op_types
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_types
=
[
'fake_dequantize_max_abs'
]
self
.
_is_test
=
None
self
.
_is_test
=
None
self
.
_global_step
=
None
self
.
_global_step
=
None
def
apply
(
self
,
graph
):
def
apply
(
self
,
graph
):
"""
Quantize the graph for training process. According to weight and
activation quantization type, the graph will be added some fake
quantize operators and fake dequantize operators.
Args:
graph(IrGraph): the applied graph.
"""
assert
isinstance
(
graph
,
assert
isinstance
(
graph
,
IrGraph
),
'graph must be the instance of IrGraph.'
IrGraph
),
'graph must be the instance of IrGraph.'
self
.
_need_initialized
.
clear
()
self
.
_need_initialized
.
clear
()
self
.
_is_test
=
graph
.
is_test
()
self
.
_is_test
=
graph
.
is_test
()
# marked the variable which has been dequantized.
# marked the variable which has been dequantized.
dequantized_vars
=
collections
.
OrderedDict
()
dequantized_vars
=
collections
.
OrderedDict
()
p
arams
=
[
p
.
name
()
for
p
in
graph
.
all_paramete
rs
()]
p
ersistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_va
rs
()]
def
_transform_forward
(
graph
,
op
):
def
_transform_forward
(
graph
,
op
):
for
var_node
in
op
.
inputs
:
for
var_node
in
op
.
inputs
:
if
var_node
.
name
()
in
dequantized_vars
:
if
var_node
.
name
()
in
dequantized_vars
:
dequant_var_node
=
dequantized_vars
[
var_node
.
name
()]
dequant_var_node
=
dequantized_vars
[
var_node
.
name
()]
else
:
else
:
quant_bits
=
self
.
_weight_bits
if
var_node
.
name
()
in
p
aram
s
\
quant_bits
=
self
.
_weight_bits
if
var_node
.
name
()
in
p
ersistable_var
s
\
else
self
.
_activation_bits
else
self
.
_activation_bits
quant_type
=
self
.
_weight_quantize_type
if
var_node
.
name
()
\
quant_type
=
self
.
_weight_quantize_type
if
var_node
.
name
()
\
in
p
aram
s
else
self
.
_activation_quantize_type
in
p
ersistable_var
s
else
self
.
_activation_quantize_type
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
graph
,
var_node
,
quant_bits
,
quant_type
)
graph
,
var_node
,
quant_bits
,
quant_type
)
dequant_var_node
=
self
.
_insert_dequant_op
(
dequant_var_node
=
self
.
_insert_dequant_op
(
...
@@ -150,9 +166,14 @@ class QuantizationTransformPass(object):
...
@@ -150,9 +166,14 @@ class QuantizationTransformPass(object):
assert
self
.
_program_exe
is
not
None
,
\
assert
self
.
_program_exe
is
not
None
,
\
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
init_program
=
Program
()
init_program
=
Program
()
for
var_desc
,
initializer
in
self
.
_need_initialized
.
iteritems
():
for
var_desc
,
initializer
in
six
.
iteritems
(
self
.
_need_initialized
):
var
=
Variable
(
init_program
.
global_block
())
var
=
init_program
.
global_block
().
create_var
(
var
.
_set_desc
(
var_desc
)
name
=
var_desc
.
name
(),
shape
=
var_desc
.
shape
(),
dtype
=
var_desc
.
dtype
(),
type
=
var_desc
.
type
(),
lod_level
=
var_desc
.
lod_level
(),
persistable
=
var_desc
.
persistable
())
initializer
(
var
,
init_program
.
global_block
())
initializer
(
var
,
init_program
.
global_block
())
self
.
_program_exe
.
run
(
program
=
init_program
,
scope
=
self
.
_scope
)
self
.
_program_exe
.
run
(
program
=
init_program
,
scope
=
self
.
_scope
)
...
@@ -161,7 +182,7 @@ class QuantizationTransformPass(object):
...
@@ -161,7 +182,7 @@ class QuantizationTransformPass(object):
def
_create_global_step
(
self
,
graph
):
def
_create_global_step
(
self
,
graph
):
if
self
.
_weight_quantize_type
==
'range_abs_max'
or
\
if
self
.
_weight_quantize_type
==
'range_abs_max'
or
\
self
.
_activation_quantize_type
==
'range_abs_max'
:
self
.
_activation_quantize_type
==
'range_abs_max'
:
counter_name
=
'@STEP_COUNTER@'
counter_name
=
cpt
.
to_text
(
'@STEP_COUNTER@'
)
for
node
in
graph
.
all_vars
():
for
node
in
graph
.
all_vars
():
if
node
.
name
()
==
counter_name
:
if
node
.
name
()
==
counter_name
:
self
.
_global_step
=
node
self
.
_global_step
=
node
...
@@ -175,9 +196,14 @@ class QuantizationTransformPass(object):
...
@@ -175,9 +196,14 @@ class QuantizationTransformPass(object):
Constant
(
value
=
0
,
force_cpu
=
True
)
Constant
(
value
=
0
,
force_cpu
=
True
)
global_step_out
=
graph
.
create_var_node_from_desc
(
global_step_out
=
graph
.
create_var_node_from_desc
(
global_step_in
.
var
())
global_step_in
.
var
())
# The attribute of `op_role` is needed by ParallelExecutor.
increment_op
=
graph
.
create_op_node
(
increment_op
=
graph
.
create_op_node
(
op_type
=
'increment'
,
op_type
=
'increment'
,
attrs
=
{
'step'
:
1.0
},
attrs
=
{
'step'
:
1.0
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
global_step_in
},
inputs
=
{
'X'
:
global_step_in
},
outputs
=
{
'Out'
:
global_step_out
})
outputs
=
{
'Out'
:
global_step_out
})
graph
.
link_to
(
global_step_in
,
increment_op
)
graph
.
link_to
(
global_step_in
,
increment_op
)
...
@@ -212,7 +238,10 @@ class QuantizationTransformPass(object):
...
@@ -212,7 +238,10 @@ class QuantizationTransformPass(object):
var_dtype
=
var_node
.
var
().
dtype
())
var_dtype
=
var_node
.
var
().
dtype
())
quant_op_node
=
graph
.
create_op_node
(
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_abs_max'
,
op_type
=
'fake_quantize_abs_max'
,
attrs
=
{
'bit_length'
:
quant_bits
},
attrs
=
{
'bit_length'
:
quant_bits
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
},
inputs
=
{
'X'
:
var_node
},
outputs
=
{
'Out'
:
quant_var_node
,
outputs
=
{
'Out'
:
quant_var_node
,
'OutScale'
:
scale_var_node
})
'OutScale'
:
scale_var_node
})
...
@@ -257,7 +286,8 @@ class QuantizationTransformPass(object):
...
@@ -257,7 +286,8 @@ class QuantizationTransformPass(object):
attrs
=
{
attrs
=
{
'window_size'
:
self
.
_window_size
,
'window_size'
:
self
.
_window_size
,
'bit_length'
:
quant_bits
,
'bit_length'
:
quant_bits
,
'is_test'
:
self
.
_is_test
'is_test'
:
self
.
_is_test
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
}
}
quant_op_node
=
graph
.
create_op_node
(
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_range_abs_max'
,
op_type
=
'fake_quantize_range_abs_max'
,
...
@@ -290,7 +320,10 @@ class QuantizationTransformPass(object):
...
@@ -290,7 +320,10 @@ class QuantizationTransformPass(object):
max_range
=
(
1
<<
(
quant_bits
-
1
))
-
1
max_range
=
(
1
<<
(
quant_bits
-
1
))
-
1
dequant_op_node
=
graph
.
create_op_node
(
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_dequantize_max_abs'
,
op_type
=
'fake_dequantize_max_abs'
,
attrs
=
{
'max_range'
:
float
(
max_range
)},
attrs
=
{
'max_range'
:
float
(
max_range
),
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
,
inputs
=
{
'X'
:
var_node
,
'Scale'
:
scale_var_node
},
'Scale'
:
scale_var_node
},
outputs
=
{
'Out'
:
dequant_var_node
})
outputs
=
{
'Out'
:
dequant_var_node
})
...
@@ -316,3 +349,330 @@ class QuantizationTransformPass(object):
...
@@ -316,3 +349,330 @@ class QuantizationTransformPass(object):
Return the scale name of quantized variable for the input `var_name`.
Return the scale name of quantized variable for the input `var_name`.
"""
"""
return
"%s.scale"
%
(
var_name
)
return
"%s.scale"
%
(
var_name
)
class
QuantizationFreezePass
(
object
):
"""
The freeze pass is used to adjust the quantize operator order, for example:
1) `activation -> quant -> dequant -> conv2d` will be freezed into
`activation -> quant -> conv2d -> dequant`
2) `weight -> quant -> dequant -> conv2d` will be freezed into `weight -> conv2d`,
and weight will be sacled offline.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors.
weight_bits (int): quantization bit number for weights.
activation_bits (int): quantization bit number for activation.
weight_quantize_type (str): quantization type for weights, support 'abs_max'.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the
model is well trained.
"""
def
__init__
(
self
,
scope
,
place
,
weight_bits
=
8
,
activation_bits
=
8
,
weight_quantize_type
=
'abs_max'
):
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
self
.
_scope
=
scope
self
.
_place
=
place
self
.
_weight_bits
=
weight_bits
self
.
_activation_bits
=
activation_bits
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
self
.
_op_input_rename_map
=
collections
.
OrderedDict
()
self
.
_op_output_rename_map
=
collections
.
OrderedDict
()
self
.
_var_scale_map
=
collections
.
OrderedDict
()
def
apply
(
self
,
graph
):
"""
Adjust quantize/dequantize operators order for the inference process.
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_fake_quant_op_names
:
input_arg_name
=
op_node
.
op
().
input
(
'X'
)[
0
]
if
input_arg_name
in
persistable_vars
:
if
self
.
_weight_quantize_type
==
'abs_max'
:
param
=
self
.
_load_var
(
input_arg_name
)
scale_v
=
np
.
max
(
np
.
abs
(
param
))
else
:
scale_v
=
self
.
_load_var
(
op_node
.
op
().
output
(
'OutScale'
)
[
0
])[
0
]
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
else
:
scale_v
=
graph
.
var_node
(
op_node
.
op
().
output
(
'OutScale'
)[
0
])
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
if
input_arg_name
in
persistable_vars
:
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
# quantize weight and restore
param_v
=
self
.
_load_var
(
input_arg_name
)
quantized_param_v
=
self
.
_quant
(
param_v
,
scale_v
,
self
.
_weight_bits
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_fake_dequant_op_names
:
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
self
.
_insert_post_dequant_op
(
graph
,
op_node
)
for
op_node
in
ops
:
# insert dequant_op after fc/conv, need to rename inputs of the followed ops
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
self
.
_op_output_rename_map
:
old_in
=
graph
.
var_node
(
name
)
new_in
=
self
.
_op_output_rename_map
[
name
]
graph
.
update_input_link
(
old_in
,
new_in
,
op_node
)
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_remove_fake_quant_and_dequant_op
(
self
,
graph
,
op_node
):
k
=
op_node
.
op
().
output
(
'Out'
)[
0
]
v
=
op_node
.
op
().
input
(
'X'
)[
0
]
if
v
not
in
self
.
_op_input_rename_map
:
self
.
_op_input_rename_map
[
k
]
=
v
else
:
self
.
_op_input_rename_map
[
k
]
=
self
.
_op_input_rename_map
[
v
]
graph
.
safe_remove_nodes
(
op_node
)
def
_insert_post_dequant_op
(
self
,
graph
,
op_node
):
max_range
=
None
scale_var_node
=
None
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
self
.
_op_input_rename_map
:
old_in
=
graph
.
var_node
(
name
)
new_in
=
graph
.
var_node
(
self
.
_op_input_rename_map
[
name
])
new_in
.
clear_outputs
()
graph
.
update_input_link
(
old_in
,
new_in
,
op_node
)
original_var_name
=
self
.
_original_var_name
(
name
)
scale_v
=
self
.
_var_scale_map
[
original_var_name
]
if
original_var_name
in
persistable_vars
:
param_range
=
(
1
<<
(
self
.
_weight_bits
-
1
))
-
1
act_range
=
(
1
<<
(
self
.
_activation_bits
-
1
))
-
1
assert
self
.
_is_float
(
scale_v
),
'The scale of parameter %s is not a float.'
%
(
original_var_name
)
max_range
=
param_range
*
act_range
/
scale_v
else
:
assert
isinstance
(
scale_v
,
core
.
Node
)
scale_var_node
=
self
.
_var_scale_map
[
original_var_name
]
if
len
(
op_node
.
outputs
)
!=
1
:
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op_node
.
name
()))
output_var_node
=
op_node
.
outputs
[
0
]
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
output_var_node
.
name
()),
var_type
=
output_var_node
.
var
().
type
(),
shape
=
output_var_node
.
var
().
shape
(),
var_dtype
=
output_var_node
.
var
().
dtype
())
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_dequantize_max_abs'
,
attrs
=
{
'max_range'
:
float
(
max_range
),
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
output_var_node
,
'Scale'
:
scale_var_node
},
outputs
=
{
'Out'
:
dequant_var_node
})
graph
.
link_to
(
output_var_node
,
dequant_op_node
)
graph
.
link_to
(
scale_var_node
,
dequant_op_node
)
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
self
.
_op_output_rename_map
[
output_var_node
.
name
()]
=
dequant_var_node
return
dequant_var_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_restore_var
(
self
,
name
,
array
):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
,
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
for
input_node
in
op_node
.
inputs
:
all_used_vars
.
add
(
input_node
)
for
output_node
in
op_node
.
outputs
:
all_used_vars
.
add
(
output_node
)
all_unused_vars
=
graph
.
all_vars
()
-
all_used_vars
graph
.
safe_remove_nodes
(
all_unused_vars
)
def
_original_var_name
(
self
,
var_name
):
"""
Return the original variable name.
"""
if
var_name
.
endswith
(
'.quantized.dequantized'
):
return
var_name
[:
-
len
(
'.quantized.dequantized'
)]
if
var_name
.
endswith
(
'.quantized'
):
return
var_name
[:
-
len
(
'.quantized'
)]
if
var_name
.
endswith
(
'.dequantized'
):
return
var_name
[:
-
len
(
'.dequantized'
)]
if
var_name
.
endswith
(
'.scale'
):
return
var_name
[:
-
len
(
'.scale'
)]
else
:
return
var_name
def
_dequantized_var_name
(
self
,
var_name
):
"""
Return dequantized variable name for the input `var_name`.
"""
return
"%s.dequantized"
%
(
var_name
)
def
_is_float
(
self
,
v
):
return
isinstance
(
v
,
float
)
or
isinstance
(
v
,
np
.
float32
)
\
or
isinstance
(
v
,
np
.
float64
)
def
_quant
(
self
,
x
,
scale
,
num_bits
):
return
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
class
ConvertToInt8Pass
(
object
):
"""
Convert the weights into int8_t type.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the
8bits weight tensors.
"""
def
__init__
(
self
,
scope
,
place
):
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
self
.
_scope
=
scope
self
.
_place
=
place
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
def
apply
(
self
,
graph
):
"""
Convert weights' tpye of the graph. After that, the data type of the
graph weigths is int8_t.
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
ops
=
graph
.
all_ops
()
input_map
=
{}
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
persistable_vars
:
if
name
not
in
input_map
:
int8_var_node
=
self
.
_convert_to_int8
(
graph
,
var_node
)
input_map
[
name
]
=
int8_var_node
graph
.
update_input_link
(
var_node
,
input_map
[
name
],
op_node
)
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_convert_to_int8
(
self
,
graph
,
var_node
):
int8_var_node_name
=
var_node
.
name
()
+
".int8"
int8_var_node
=
graph
.
create_param_node
(
name
=
cpt
.
to_text
(
int8_var_node_name
),
var_type
=
var_node
.
var
().
type
(),
shape
=
var_node
.
var
().
shape
(),
var_dtype
=
core
.
VarDesc
.
VarType
.
INT8
)
array
=
self
.
_load_var
(
var_node
.
name
())
self
.
_scope
.
var
(
int8_var_node_name
)
self
.
_store_var
(
int8_var_node_name
,
array
,
np
.
int8
)
return
int8_var_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_store_var
(
self
,
name
,
array
,
dtype
):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
.
astype
(
dtype
),
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
for
input_node
in
op_node
.
inputs
:
all_used_vars
.
add
(
input_node
)
for
output_node
in
op_node
.
outputs
:
all_used_vars
.
add
(
output_node
)
all_unused_vars
=
graph
.
all_vars
()
-
all_used_vars
graph
.
safe_remove_nodes
(
all_unused_vars
)
class
TransformForMobilePass
(
object
):
"""
This pass is used to convert the freezed graph for paddle-mobile execution.
"""
def
__init__
(
self
):
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
def
apply
(
self
,
graph
):
"""
Because paddle-mobile use `quantize` an `dequantize` as the names of
quantize operator and dequantize operator, the `apply` function just
realize this logic.
Args:
graph(IrGraph): the graph will be transformed.
"""
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
name
=
op_node
.
name
()
if
name
in
self
.
_fake_quant_op_names
:
op_node
.
op
().
set_type
(
'quantize'
)
quant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
quant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
quant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
if
name
in
self
.
_fake_dequant_op_names
:
op_node
.
op
().
set_type
(
'dequantize'
)
dequant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
dequant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
dequant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
return
graph
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
0 → 100644
浏览文件 @
d79d2f68
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/contrib/slim/
unitest
/__init__.py
→
python/paddle/fluid/contrib/slim/
tests
/__init__.py
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/config.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/config.yaml
浏览文件 @
d79d2f68
version
:
1.0
version
:
1.0
include
:
[
"
./
unitest/configs/pruners.yaml"
,
"
./unitest
/configs/pruners_0.yaml"
]
include
:
[
"
./
configs/pruners.yaml"
,
"
.
/configs/pruners_0.yaml"
]
pruners
:
pruners
:
pruner_1
:
pruner_1
:
class
:
'
RatioPruner'
class
:
'
RatioPruner'
...
...
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners.yaml
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners_0.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners_0.yaml
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/test_factory.py
→
python/paddle/fluid/contrib/slim/
tests
/test_factory.py
浏览文件 @
d79d2f68
...
@@ -18,7 +18,7 @@ import unittest
...
@@ -18,7 +18,7 @@ import unittest
class
TestFactory
(
unittest
.
TestCase
):
class
TestFactory
(
unittest
.
TestCase
):
def
test_parse
(
self
):
def
test_parse
(
self
):
factory
=
ConfigFactory
(
'./
unitest/
configs/config.yaml'
)
factory
=
ConfigFactory
(
'./configs/config.yaml'
)
pruner
=
factory
.
instance
(
'pruner_1'
)
pruner
=
factory
.
instance
(
'pruner_1'
)
self
.
assertEquals
(
pruner
.
ratios
[
'conv1_1.w'
],
0.3
)
self
.
assertEquals
(
pruner
.
ratios
[
'conv1_1.w'
],
0.3
)
...
...
python/paddle/fluid/contrib/slim/tests/test_graph.py
0 → 100644
浏览文件 @
d79d2f68
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
import
six
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid
import
core
def
residual_block
(
num
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
,
bias_attr
=
False
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
bias_attr
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
conv
=
conv_bn_layer
(
hidden
,
16
,
3
,
1
,
1
,
act
=
None
,
bias_attr
=
True
)
short
=
conv_bn_layer
(
hidden
,
16
,
1
,
1
,
0
,
act
=
None
)
hidden
=
fluid
.
layers
.
elementwise_add
(
x
=
conv
,
y
=
short
,
act
=
'relu'
)
fc
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
10
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestGraph
(
unittest
.
TestCase
):
def
test_graph_functions
(
self
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
residual_block
(
2
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
marked_nodes
=
set
()
for
op
in
graph
.
all_ops
():
if
op
.
name
().
find
(
'conv2d'
)
>
-
1
:
marked_nodes
.
add
(
op
)
graph
.
draw
(
'.'
,
'residual'
,
marked_nodes
)
self
.
assertFalse
(
graph
.
has_circle
())
self
.
assertEqual
(
graph
.
graph_num
(),
1
)
nodes
=
graph
.
topology_sort
()
self
.
assertEqual
(
len
(
nodes
),
len
(
graph
.
all_ops
()))
nodes_map
=
graph
.
build_adjacency_list
()
self
.
assertEqual
(
len
(
nodes_map
),
len
(
graph
.
all_ops
()))
nodes_num
=
len
(
graph
.
all_nodes
())
graph
.
safe_remove_nodes
(
marked_nodes
)
self
.
assertEqual
(
len
(
graph
.
all_nodes
()),
nodes_num
-
len
(
marked_nodes
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/slim/
unitest
/test_quantization_pass.py
→
python/paddle/fluid/contrib/slim/
tests
/test_quantization_pass.py
浏览文件 @
d79d2f68
...
@@ -17,9 +17,12 @@ import random
...
@@ -17,9 +17,12 @@ import random
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
six
import
six
from
paddle.fluid.framework
import
Program
import
paddle
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationFreezePass
from
paddle.fluid.contrib.slim.quantization
import
ConvertToInt8Pass
from
paddle.fluid.contrib.slim.quantization
import
TransformForMobilePass
from
paddle.fluid
import
core
from
paddle.fluid
import
core
...
@@ -65,6 +68,28 @@ def residual_block(num):
...
@@ -65,6 +68,28 @@ def residual_block(num):
return
loss
return
loss
def
conv_net
(
img
,
label
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
img
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_1
=
fluid
.
layers
.
batch_norm
(
conv_pool_1
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
class
TestQuantizationTransformPass
(
unittest
.
TestCase
):
class
TestQuantizationTransformPass
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
quantizable_op_and_inputs
=
{
self
.
quantizable_op_and_inputs
=
{
...
@@ -171,5 +196,177 @@ class TestQuantizationTransformPass(unittest.TestCase):
...
@@ -171,5 +196,177 @@ class TestQuantizationTransformPass(unittest.TestCase):
self
.
residual_block_quant
(
'range_abs_max'
)
self
.
residual_block_quant
(
'range_abs_max'
)
class
TestQuantizationFreezePass
(
unittest
.
TestCase
):
def
freeze_graph
(
self
,
use_cuda
,
seed
,
quant_type
):
def
build_program
(
main
,
startup
,
is_test
):
main
.
random_seed
=
seed
startup
.
random_seed
=
seed
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
conv_net
(
img
,
label
)
if
not
is_test
:
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
return
[
img
,
label
],
loss
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
feeds
,
loss
=
build_program
(
main
,
startup
,
False
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
main_graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
test_graph
=
IrGraph
(
core
.
Graph
(
test_program
.
desc
),
for_test
=
True
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
scope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
exe
.
run
(
startup
)
transform_pass
=
QuantizationTransformPass
(
scope
=
scope
,
program_exe
=
exe
,
activation_quantize_type
=
quant_type
)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
marked_nodes
=
set
()
for
op
in
main_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
main_graph
.
draw
(
'.'
,
'main'
+
dev_name
+
quant_type
,
marked_nodes
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test'
+
dev_name
+
quant_type
,
marked_nodes
)
quantized_main_program
=
main_graph
.
to_program
()
quantized_test_program
=
test_graph
.
to_program
()
iters
=
5
batch_size
=
8
#train_exe = fluid.ParallelExecutor(
# main_program=quantized_main_program,
# use_cuda=bool(use_cuda),
# loss_name=loss.name,
# scope=scope)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
with
fluid
.
scope_guard
(
scope
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
quantized_main_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
#loss_v = train_exe.run(feed=feeder.feed(data),
# fetch_list=[loss.name])
#print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data
=
next
(
test_reader
())
with
fluid
.
program_guard
(
quantized_test_program
):
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
quantized_test_program
)
# Testing
with
fluid
.
scope_guard
(
scope
):
test_loss1
,
w_quant
=
exe
.
run
(
program
=
quantized_test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
freeze_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_freeze'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
test_loss2
,
=
exe
.
run
(
program
=
server_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
#print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
#print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0'
).
get_tensor
())
# Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
#print('{}: {}'.format('w_quant' + dev_name + quant_type,
# np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass
=
ConvertToInt8Pass
(
scope
=
scope
,
place
=
place
)
convert_int8_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_int8'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program_int8
=
test_graph
.
to_program
()
# Save the 8-bit parameter and model file.
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
server_program_int8
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0.int8'
).
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
#print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
mobile_pass
=
TransformForMobilePass
()
mobile_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_mobile'
+
dev_name
+
quant_type
,
marked_nodes
)
mobile_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'mobile_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
mobile_program
)
def
test_freeze_graph_cuda_dynamic
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cpu_dynamic
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cuda_static
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
)
def
test_freeze_graph_cpu_static
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/contrib/tests/CMakeLists.txt
浏览文件 @
d79d2f68
...
@@ -6,5 +6,9 @@ if(APPLE OR WIN32 OR NOT WITH_MKL)
...
@@ -6,5 +6,9 @@ if(APPLE OR WIN32 OR NOT WITH_MKL)
endif
()
endif
()
foreach
(
src
${
TEST_OPS
}
)
foreach
(
src
${
TEST_OPS
}
)
if
(
src MATCHES
"test_calibration"
)
py_test
(
${
src
}
SRCS
${
src
}
.py ENVS FLAGS_use_mkldnn=true
)
else
()
py_test
(
${
src
}
SRCS
${
src
}
.py
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endif
()
endforeach
()
endforeach
()
python/paddle/fluid/contrib/tests/test_calibration.py
浏览文件 @
d79d2f68
...
@@ -199,7 +199,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
...
@@ -199,7 +199,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
def
run_program
(
self
,
model_path
,
generate_int8
=
False
,
algo
=
'direct'
):
def
run_program
(
self
,
model_path
,
generate_int8
=
False
,
algo
=
'direct'
):
image_shape
=
[
3
,
224
,
224
]
image_shape
=
[
3
,
224
,
224
]
os
.
environ
[
'FLAGS_use_mkldnn'
]
=
'True'
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
...
@@ -241,9 +240,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
...
@@ -241,9 +240,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
label
=
label
.
reshape
([
-
1
,
1
])
label
=
label
.
reshape
([
-
1
,
1
])
running_program
=
calibrator
.
sampling_program
.
clone
(
running_program
=
calibrator
.
sampling_program
.
clone
(
)
if
generate_int8
else
infer_program
.
clone
()
)
if
generate_int8
else
infer_program
.
clone
()
for
op
in
running_program
.
current_block
().
ops
:
if
op
.
has_attr
(
"use_mkldnn"
):
op
.
_set_attr
(
"use_mkldnn"
,
True
)
t1
=
time
.
time
()
t1
=
time
.
time
()
_
,
acc1
,
_
=
exe
.
run
(
_
,
acc1
,
_
=
exe
.
run
(
...
...
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
浏览文件 @
d79d2f68
...
@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
...
@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
build_program
(
test_program
,
startup
,
True
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_type
=
'range_abs_max'
# 'range_abs_max' or 'abs_max'
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
=
QuantizeTranspiler
(
quant_transpiler
.
training_transpile
(
test_program
)
activation_quantize_type
=
quant_type
)
quant_transpiler
.
training_transpile
(
main
,
startup
)
quant_transpiler
.
training_transpile
(
test_program
,
startup
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
d79d2f68
...
@@ -16,6 +16,8 @@ from __future__ import print_function
...
@@ -16,6 +16,8 @@ from __future__ import print_function
import
collections
import
collections
from
collections
import
defaultdict
from
collections
import
defaultdict
from
collections
import
Iterable
import
contextlib
from
.wrapped_decorator
import
signature_safe_contextmanager
from
.wrapped_decorator
import
signature_safe_contextmanager
import
os
import
os
import
re
import
re
...
@@ -1529,12 +1531,16 @@ class Block(object):
...
@@ -1529,12 +1531,16 @@ class Block(object):
class
IrGraph
(
object
):
class
IrGraph
(
object
):
"""
"""
IrGraph uses core.Graph as the delegation to accomplish the manipulation.
Python IrGraph. Beneath it is a core.Graph, which is used for
create a c++ Ir Pass Graph. An IrGraph is just a graph view of
a Program. In an IrGraph, both Variables and Operators are graph
nodes.
"""
"""
def
__init__
(
self
,
graph
,
for_test
=
False
):
def
__init__
(
self
,
graph
,
for_test
=
False
):
"""
"""
Construct the IrGraph using core.Graph.
Construct an IrGraph using core.Graph.
Args:
Args:
graph(core.Graph): C++ Graph.
graph(core.Graph): C++ Graph.
for_test(bool): True for the test graph and false for the train graph.
for_test(bool): True for the test graph and false for the train graph.
...
@@ -1545,23 +1551,81 @@ class IrGraph(object):
...
@@ -1545,23 +1551,81 @@ class IrGraph(object):
self
.
_for_test
=
for_test
self
.
_for_test
=
for_test
def
is_test
(
self
):
def
is_test
(
self
):
"""
If the graph is used for testing, the function returns true. Otherwise, returns false.
"""
return
self
.
_for_test
return
self
.
_for_test
def
all_parameters
(
self
):
def
all_nodes
(
self
):
param_nodes
=
set
()
"""
for
node
in
self
.
graph
.
nodes
():
Return all nodes included in the graph as a set.
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
"""
).
persistable
():
return
{
node
for
node
in
self
.
graph
.
nodes
()}
param_nodes
.
add
(
node
)
return
param_nodes
def
all_vars
(
self
):
def
all_vars
(
self
):
"""
Return all variable nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_var
()}
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_var
()}
def
all_persistable_vars
(
self
):
"""
Return all persistable variable nodes included in the graph as a set.
"""
persistable_nodes
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
).
persistable
():
persistable_nodes
.
add
(
node
)
return
persistable_nodes
def
all_ops
(
self
):
def
all_ops
(
self
):
"""
Return all operator nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_op
()}
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_op
()}
def
var_node
(
self
,
name
):
"""
Get a variable node by name from the graph.
Args:
name(str): the name of the variable node.
Raises:
ValueError: The If input's type is not str, or this graph
doesn't have a variable with the giving name.
Returns:
core.Node: the variable node with the giving name.
"""
if
not
isinstance
(
name
,
six
.
string_types
):
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
(
type
(
name
)))
target_var_node
=
None
var_nodes
=
self
.
all_vars
()
for
var_node
in
var_nodes
:
if
var_node
.
name
()
==
name
:
target_var_node
=
var_node
if
target_var_node
is
None
:
raise
ValueError
(
"var_node %s not in this graph"
%
name
)
return
target_var_node
def
create_param_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
def
create_param_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a persistable variable node in the graph. In IrGraph,
it can not distinguish between persistable variables and parameters.
Args:
name(str): the name of the persistable variable node.
vart_type(core.VarDesc.VarType): the type of the persistable variable node.
shape(list): the shape of the persistable variable node.
var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
Returns:
core.Node: the created persistable variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
var_desc
.
set_shape
(
shape
)
...
@@ -1570,6 +1634,20 @@ class IrGraph(object):
...
@@ -1570,6 +1634,20 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
def
create_var_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a variable node in the graph. The created variable node is
not persistable.
Args:
name(str): the name of the variable node.
vart_type(core.VarDesc.VarType): the type of the variable node.
shape(list): the shape of the variable node.
var_dtype(core.VarDesc.VarType): the data type of the variable node.
Returns:
core.Node: the created variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
var_desc
.
set_shape
(
shape
)
...
@@ -1577,19 +1655,41 @@ class IrGraph(object):
...
@@ -1577,19 +1655,41 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node_from_desc
(
self
,
var_desc
):
def
create_var_node_from_desc
(
self
,
var_desc
):
"""
Create a variable node by using an existing VarDesc in the graph.
Depend on the giving VarDesc, the created variable node may be persistable.
Args:
var_desc(core.VarDesc): the giving variable description.
Returns:
core.Node: the created variable node.
"""
return
self
.
graph
.
create_var_node
(
var_desc
)
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_op_node
(
self
,
op_type
,
attrs
,
inputs
,
outputs
):
def
create_op_node
(
self
,
op_type
,
attrs
,
inputs
,
outputs
):
"""
Create a operator node in the graph.
Args:
op_type(str): the type of the operator node.
attrs(dict): the attributes of the operator node.
inputs(dict): the inputs of the operator node.
outputs(dict): the outpus of the operator node.
Returns:
core.Node: the created operator node.
"""
op_desc
=
core
.
OpDesc
()
op_desc
=
core
.
OpDesc
()
op_desc
.
set_type
(
op_type
)
op_desc
.
set_type
(
op_type
)
for
attr
,
value
in
attrs
.
iteritems
(
):
for
attr
,
value
in
six
.
iteritems
(
attrs
):
self
.
_update_desc_attr
(
op_desc
,
attr
,
value
)
self
.
_update_desc_attr
(
op_desc
,
attr
,
value
)
for
input_name
,
var_nodes
in
inputs
.
iteritems
(
):
for
input_name
,
var_nodes
in
six
.
iteritems
(
inputs
):
if
not
isinstance
(
var_nodes
,
list
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
var_nodes
=
[
var_nodes
]
op_desc
.
set_input
(
input_name
,
op_desc
.
set_input
(
input_name
,
[
var_node
.
name
()
for
var_node
in
var_nodes
])
[
var_node
.
name
()
for
var_node
in
var_nodes
])
for
output_name
,
var_nodes
in
outputs
.
iteritems
(
):
for
output_name
,
var_nodes
in
six
.
iteritems
(
outputs
):
if
not
isinstance
(
var_nodes
,
list
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
var_nodes
=
[
var_nodes
]
op_desc
.
set_output
(
output_name
,
op_desc
.
set_output
(
output_name
,
...
@@ -1597,11 +1697,29 @@ class IrGraph(object):
...
@@ -1597,11 +1697,29 @@ class IrGraph(object):
return
self
.
graph
.
create_op_node
(
op_desc
)
return
self
.
graph
.
create_op_node
(
op_desc
)
def
create_op_node_from_desc
(
self
,
op_desc
):
def
create_op_node_from_desc
(
self
,
op_desc
):
"""
Create a operator node by using an existing OpDesc in the graph.
Args:
op_desc(core.VarDesc): the giving operator description.
Returns:
core.Node: the created operator node.
"""
return
self
.
graph
.
create_op_node
(
op_desc
)
return
self
.
graph
.
create_op_node
(
op_desc
)
def
update_input_link
(
self
,
old_input_node
,
new_input_node
,
op_node
):
def
update_input_link
(
self
,
old_input_node
,
new_input_node
,
op_node
):
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
self
.
graph
.
nodes
()
and
\
"""
op_node
in
self
.
graph
.
nodes
(),
'Th three arguments must be in the graph nodes.'
Update the input's link of a operator node.
Args:
old_input_node(core.Node): the old input node of the giving op_node.
new_input_node(core.Node): the new input node of the giving op_node.
op_node(core.Node): the operator node that is needed to update input's link.
"""
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
\
self
.
graph
.
nodes
()
and
op_node
in
self
.
graph
.
nodes
(),
\
'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
old_input_node
.
outputs_remove
(
op_node
)
old_input_node
.
outputs_remove
(
op_node
)
op_node
.
inputs_remove
(
old_input_node
)
op_node
.
inputs_remove
(
old_input_node
)
new_input_node
.
outputs_append
(
op_node
)
new_input_node
.
outputs_append
(
op_node
)
...
@@ -1609,17 +1727,85 @@ class IrGraph(object):
...
@@ -1609,17 +1727,85 @@ class IrGraph(object):
op_node
.
op
().
_rename_input
(
old_input_node
.
name
(),
new_input_node
.
name
())
op_node
.
op
().
_rename_input
(
old_input_node
.
name
(),
new_input_node
.
name
())
def
link_to
(
self
,
node_in
,
node_out
):
def
link_to
(
self
,
node_in
,
node_out
):
"""
Connect two nodes.
Args:
node_in(core.Node): the input node.
node_out(core.Node): the output node.
"""
assert
node_in
in
self
.
graph
.
nodes
()
and
node_out
in
self
.
graph
.
nodes
(),
\
assert
node_in
in
self
.
graph
.
nodes
()
and
node_out
in
self
.
graph
.
nodes
(),
\
'Th
two arguments
must be in the graph nodes.'
'Th
e two arguments(node_in&node_out)
must be in the graph nodes.'
node_in
.
outputs_append
(
node_out
)
node_in
.
outputs_append
(
node_out
)
node_out
.
inputs_append
(
node_in
)
node_out
.
inputs_append
(
node_in
)
def
safe_remove_nodes
(
self
,
remove_nodes
):
def
safe_remove_nodes
(
self
,
remove_nodes
):
"""
Remove nodes safely since links connected to these removed nodes are
also removed.
Args:
remove_nodes(set): the nodes prepared to be removed.
"""
if
not
isinstance
(
remove_nodes
,
set
):
if
not
isinstance
(
remove_nodes
,
set
):
if
isinstance
(
remove_nodes
,
Iterable
):
remove_nodes
=
set
(
remove_nodes
)
remove_nodes
=
set
(
remove_nodes
)
else
:
remove_nodes
=
{
remove_nodes
}
core
.
graph_safe_remove_nodes
(
self
.
graph
,
remove_nodes
)
core
.
graph_safe_remove_nodes
(
self
.
graph
,
remove_nodes
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
):
def
has_circle
(
self
):
"""
Check if the graph has a circle.
Returns:
bool: True if the graph has a circle else False.
"""
return
core
.
has_circle
(
self
.
graph
)
def
graph_num
(
self
):
"""
Count the number of unconnected graphs in this graph.
Returns:
int: the number of unconnected graphs.
"""
return
core
.
graph_num
(
self
.
graph
)
def
topology_sort
(
self
):
"""
Perform the topology sort operation on the graph.
Notes: the `graph` cannot contain a circle.
Returns:
set(core.Node): nodes in topology order.
"""
return
core
.
topology_sort
(
self
.
graph
)
def
build_adjacency_list
(
self
):
"""
Build an adjacency list of operations for the `graph`.
Returns:
dict{core.Node: set(core.Node)}: the adjacency list.
"""
return
core
.
build_adjacency_list
(
self
.
graph
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
,
remove_ctr_var
=
True
):
"""
Draw the graph. If `dot` command is installed, the drawn graph
will be saved as pdf file type, otherwise dot file type is used.
Args:
save_path(str): the save path of drawn graph.
name(str): the name of drawn graph.
marked_nodes(set(core.Node)): nodes that are needed to be marked.
Default value is None.
remove_ctr_var(bool): If it is set True, all control variable nodes
in the graph will be removed. Default value is True.
"""
def
_convert_to_pdf
(
dot_file_path
):
def
_convert_to_pdf
(
dot_file_path
):
pdf_save_path
=
os
.
path
.
splitext
(
dot_file_path
)[
0
]
+
'.pdf'
pdf_save_path
=
os
.
path
.
splitext
(
dot_file_path
)[
0
]
+
'.pdf'
exited_code
=
subprocess
.
call
(
'dot -Tpdf '
+
dot_file_path
\
exited_code
=
subprocess
.
call
(
'dot -Tpdf '
+
dot_file_path
\
...
@@ -1629,15 +1815,17 @@ class IrGraph(object):
...
@@ -1629,15 +1815,17 @@ class IrGraph(object):
print
(
'The {} is saved as the dot filetype.'
.
format
(
print
(
'The {} is saved as the dot filetype.'
.
format
(
dot_file_path
))
dot_file_path
))
if
remove_ctr_var
:
remove_ctr_vars
=
set
()
remove_ctr_vars
=
set
()
ops_num
=
0
for
node
in
self
.
graph
.
nodes
():
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_ctrl_var
():
if
node
.
is_ctrl_var
():
remove_ctr_vars
.
add
(
node
)
remove_ctr_vars
.
add
(
node
)
elif
node
.
is_op
():
self
.
safe_remove_nodes
(
remove_ctr_vars
)
ops_num
=
0
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_op
():
ops_num
+=
1
ops_num
+=
1
print
(
'Total ops num = {}.'
.
format
(
ops_num
))
print
(
'Total ops num = {}.'
.
format
(
ops_num
))
self
.
safe_remove_nodes
(
remove_ctr_vars
)
if
marked_nodes
is
not
None
:
if
marked_nodes
is
not
None
:
if
not
isinstance
(
marked_nodes
,
set
):
if
not
isinstance
(
marked_nodes
,
set
):
marked_nodes
=
set
(
marked_nodes
)
marked_nodes
=
set
(
marked_nodes
)
...
@@ -1652,10 +1840,20 @@ class IrGraph(object):
...
@@ -1652,10 +1840,20 @@ class IrGraph(object):
_convert_to_pdf
(
viz_dot_path
)
_convert_to_pdf
(
viz_dot_path
)
def
to_program
(
self
):
def
to_program
(
self
):
"""
Convert the graph into a Program.
Notes: When the graph includes backward operator nodes, the
conversion process may be failed. Usually, this function is
only used to convert a test graph.
Returns:
Program: a program converted from the graph.
"""
convert_pass
=
core
.
get_pass
(
'graph_to_program_pass'
)
convert_pass
=
core
.
get_pass
(
'graph_to_program_pass'
)
convert_pass
.
set
(
'program'
,
Program
().
desc
)
desc
=
core
.
ProgramDesc
()
convert_pass
.
set_not_owned
(
'program'
,
desc
)
convert_pass
.
apply
(
self
.
graph
)
convert_pass
.
apply
(
self
.
graph
)
desc
=
convert_pass
.
get_program
(
'program'
)
program
=
Program
.
_construct_from_desc
(
desc
)
program
=
Program
.
_construct_from_desc
(
desc
)
return
program
return
program
...
...
python/paddle/fluid/imperative/layers.py
浏览文件 @
d79d2f68
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
collections
import
contextlib
import
contextlib
import
sys
import
sys
import
numpy
as
np
import
numpy
as
np
...
@@ -30,31 +31,45 @@ class Layer(core.Layer):
...
@@ -30,31 +31,45 @@ class Layer(core.Layer):
def
__init__
(
self
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
name
=
None
):
def
__init__
(
self
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
name
=
None
):
self
.
_built
=
False
self
.
_built
=
False
self
.
_dtype
=
dtype
self
.
_dtype
=
dtype
self
.
_parameters
=
collections
.
OrderedDict
()
self
.
_sub_layers
=
collections
.
OrderedDict
()
def
parameters
(
self
,
include_sublayers
=
True
):
"""Returns a list of Parameters from current and sub-layers.
Args:
include_sublayers: If true, also include the parameters from
sublayers.
Returns a list of Parameters.
"""
ret
=
[
p
for
p
in
self
.
_parameters
.
values
()]
if
include_sublayers
:
for
l
in
self
.
_sub_layers
.
values
():
for
p
in
l
.
parameters
(
include_sublayers
):
ret
.
append
(
p
)
return
ret
def
parameters
(
self
):
def
sublayers
(
self
,
include_sublayers
=
True
):
params
=
[]
"""Returns a list of sub layers.
for
key
in
self
.
__dict__
.
keys
():
value
=
self
.
__dict__
[
key
]
Args:
if
isinstance
(
value
,
framework
.
Parameter
):
include_sublayers: If true, also include the layers from sublayers.
params
.
append
(
value
)
elif
isinstance
(
value
,
core
.
Layer
):
Returns a list of sub layers.
params
.
extend
(
value
.
parameters
())
"""
elif
isinstance
(
value
,
collections
.
Container
):
ret
=
[
l
for
l
in
self
.
_sub_layers
.
values
()]
if
len
(
value
)
==
0
:
if
include_sublayers
:
continue
for
l
in
self
.
_sub_layers
.
values
():
if
isinstance
(
value
[
0
],
framework
.
Parameter
):
for
sub_l
in
l
.
sublayers
(
include_sublayers
):
params
.
extend
(
value
)
ret
.
append
(
sub_l
)
elif
isinstance
(
value
[
0
],
core
.
Layer
):
return
ret
for
v
in
value
:
params
.
extend
(
v
.
parameters
())
return
params
def
clear_gradients
(
self
):
def
clear_gradients
(
self
):
for
p
in
self
.
parameters
():
for
p
in
self
.
parameters
():
p
.
_clear_gradient
()
p
.
_clear_gradient
()
def
_build_once
(
self
,
input
s
):
def
_build_once
(
self
,
*
arg
s
):
pass
pass
def
__call__
(
self
,
*
inputs
):
def
__call__
(
self
,
*
inputs
):
...
@@ -71,6 +86,66 @@ class Layer(core.Layer):
...
@@ -71,6 +86,66 @@ class Layer(core.Layer):
def
backward
(
self
,
*
inputs
):
def
backward
(
self
,
*
inputs
):
raise
ValueError
(
"Layer shouldn't implement backward"
)
raise
ValueError
(
"Layer shouldn't implement backward"
)
def
add_sublayer
(
self
,
name
,
sublayer
):
"""Adds a sub Layer instance.
Added sublayer can be access like self.name.
Args:
name: name of this sublayer.
sublayer: an instance of Layer.
Returns:
the sublayer passed in.
"""
assert
isinstance
(
sublayer
,
core
.
Layer
)
self
.
_sub_layers
[
name
]
=
sublayer
return
sublayer
def
add_parameter
(
self
,
name
,
parameter
):
"""Adds a Parameter instance.
Added parameter can be access like self.name.
Args:
name: name of this sublayer.
parameter: an instance of Parameter.
Returns:
the parameter passed in.
"""
assert
isinstance
(
parameter
,
framework
.
Parameter
)
self
.
_parameters
[
name
]
=
parameter
return
parameter
def
__getattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
return
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
return
self
.
_sub_layers
[
name
]
def
__setattr__
(
self
,
name
,
value
):
if
isinstance
(
value
,
framework
.
Parameter
):
params
=
self
.
__dict__
.
get
(
'_parameters'
,
None
)
if
params
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
params
[
name
]
=
value
elif
isinstance
(
value
,
core
.
Layer
):
layers
=
self
.
__dict__
.
get
(
'_sub_layers'
,
None
)
if
layers
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
layers
[
name
]
=
value
else
:
object
.
__setattr__
(
self
,
name
,
value
)
def
__delattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
del
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
del
self
.
_sub_layers
[
name
]
else
:
object
.
__delattr__
(
self
,
name
)
class
PyLayer
(
core
.
PyLayer
):
class
PyLayer
(
core
.
PyLayer
):
"""Layers composed of user-defined python codes."""
"""Layers composed of user-defined python codes."""
...
...
python/paddle/fluid/imperative/nn.py
浏览文件 @
d79d2f68
...
@@ -225,9 +225,6 @@ class FC(layers.Layer):
...
@@ -225,9 +225,6 @@ class FC(layers.Layer):
act
=
act
,
act
=
act
,
name
=
name
)
name
=
name
)
def
parameters
(
self
):
return
[
self
.
_w
,
self
.
_b
]
def
_build_once
(
self
,
input
):
def
_build_once
(
self
,
input
):
input_shape
=
input
.
shape
input_shape
=
input
.
shape
param_shape
=
[
param_shape
=
[
...
@@ -478,9 +475,6 @@ class Embedding(layers.Layer):
...
@@ -478,9 +475,6 @@ class Embedding(layers.Layer):
dtype
=
self
.
_dtype
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
is_bias
=
False
)
def
parameters
(
self
):
return
[
self
.
_w
]
def
forward
(
self
,
input
):
def
forward
(
self
,
input
):
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
self
.
_helper
.
append_op
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
d79d2f68
...
@@ -3236,7 +3236,7 @@ def group_norm(input,
...
@@ -3236,7 +3236,7 @@ def group_norm(input,
# create output
# create output
mean_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
mean_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
group_norm_out
=
helper
.
create_variable
(
dtype
)
group_norm_out
=
helper
.
create_variable
(
dtype
=
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"group_norm"
,
type
=
"group_norm"
,
...
@@ -5936,13 +5936,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
...
@@ -5936,13 +5936,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
than :attr:`shape`.
than :attr:`shape`.
act (str): The non-linear activation to be applied to the reshaped tensor
act (str): The non-linear activation to be applied to the reshaped tensor
variable.
variable.
inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
inplace(bool): If ``inplace`` is `True`, the input and output of ``layers.reshape``
operators. If this flag is set :attr:`True`, reuse input
are the same variable, otherwise, the input and output of
:attr:`x` to reshape, which will change the shape of
``layers.reshape`` are different variables. Note that if :attr:`x`
tensor variable :attr:`x` and might cause errors when
is more than one layer's input, ``inplace`` must be :attr:`False`.
:attr:`x` is used in multiple operators. If :attr:`False`,
preserve the shape :attr:`x` and create a new output tensor
variable whose data is copied from input x but reshaped.
name (str): The name of this layer. It is optional.
name (str): The name of this layer. It is optional.
Returns:
Returns:
...
@@ -8335,6 +8332,46 @@ def stack(x, axis=0):
...
@@ -8335,6 +8332,46 @@ def stack(x, axis=0):
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` is None, it would be replaced with 0.
If :code:`axis` is None, it would be replaced with 0.
For Example:
.. code-block:: text
Case 1:
Input:
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 0
Output:
Out.data =[ [ [1.0, 2.0] ],
[ [3.0, 4.0] ],
[ [5.0, 6.0] ] ]
Out.dims = [3, 1, 2]
Case 2:
Given
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 1 or axis = -2
Output:
Out.data =[ [ [1.0, 2.0]
[3.0, 4.0]
[5.0, 6.0] ] ]
Out.dims = [1, 3, 2]
Args:
Args:
x (Variable|list(Variable)|tuple(Variable)): Input variables.
x (Variable|list(Variable)|tuple(Variable)): Input variables.
axis (int|None): The axis along which all inputs are stacked.
axis (int|None): The axis along which all inputs are stacked.
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
d79d2f68
...
@@ -567,7 +567,7 @@ def ones(shape, dtype, force_cpu=False):
...
@@ -567,7 +567,7 @@ def ones(shape, dtype, force_cpu=False):
It also sets *stop_gradient* to True.
It also sets *stop_gradient* to True.
Args:
Args:
shape(tuple|list
|None
): Shape of output tensor
shape(tuple|list): Shape of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
Returns:
Returns:
...
@@ -578,6 +578,10 @@ def ones(shape, dtype, force_cpu=False):
...
@@ -578,6 +578,10 @@ def ones(shape, dtype, force_cpu=False):
data = fluid.layers.ones(shape=[1], dtype='int64')
data = fluid.layers.ones(shape=[1], dtype='int64')
"""
"""
assert
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
),
"The shape's type should be list or tuple."
assert
reduce
(
lambda
x
,
y
:
x
*
y
,
shape
)
>
0
,
"The shape is invalid: %s."
%
(
str
(
shape
))
return
fill_constant
(
value
=
1.0
,
**
locals
())
return
fill_constant
(
value
=
1.0
,
**
locals
())
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
d79d2f68
...
@@ -148,6 +148,8 @@ class ParallelExecutor(object):
...
@@ -148,6 +148,8 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
# if turn on python memory optimize, turn off the inplace_pass.
if
build_strategy
.
memory_optimize
is
None
:
build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
build_strategy
.
enable_inplace
is
None
:
if
build_strategy
.
enable_inplace
is
None
:
build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
d79d2f68
...
@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
...
@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
endforeach
(
TEST_OP
)
...
@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
...
@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
if
(
NOT APPLE
)
if
(
NOT APPLE
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
d79d2f68
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
# python memory optimization is conflict with inplace pass.
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
...
...
python/paddle/fluid/tests/unittests/test_base_layer.py
0 → 100644
浏览文件 @
d79d2f68
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.layer_helper
import
LayerHelper
class
L1
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L1
,
self
).
__init__
()
self
.
_helper
=
LayerHelper
(
'MyLayer'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
w1
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
w2
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
def
forward
(
self
):
return
self
.
w1
+
self
.
w2
class
L2
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L2
,
self
).
__init__
()
self
.
layer1
=
L1
()
self
.
layer2
=
L1
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
L3
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L3
,
self
).
__init__
()
self
.
layer1
=
L2
()
self
.
layer2
=
L2
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
TestBaseLayer
(
unittest
.
TestCase
):
def
test_one_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L1
()
ret
=
l
()
self
.
assertEqual
(
l
.
w1
.
name
,
"MyLayer_0.w_0"
)
self
.
assertEqual
(
l
.
w2
.
name
,
"MyLayer_0.w_1"
)
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.2
*
np
.
ones
([
2
,
2
])))
def
test_three_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L3
()
ret
=
l
()
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.8
*
np
.
ones
([
2
,
2
])))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
d79d2f68
...
@@ -22,6 +22,9 @@ import six
...
@@ -22,6 +22,9 @@ import six
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
gc
gc
.
set_debug
(
gc
.
DEBUG_COLLECTABLE
)
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -99,6 +102,12 @@ class TranspilerTest(unittest.TestCase):
...
@@ -99,6 +102,12 @@ class TranspilerTest(unittest.TestCase):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
with
fluid
.
program_guard
(
main
,
startup
):
self
.
transpiler_test_impl
()
self
.
transpiler_test_impl
()
# NOTE: run gc.collect to eliminate pybind side objects to
# prevent random double-deallocate when inherited in python.
del
self
.
transpiler
del
main
del
startup
gc
.
collect
()
class
TestBasicModel
(
TranspilerTest
):
class
TestBasicModel
(
TranspilerTest
):
...
@@ -797,6 +806,7 @@ class TestNCCL2Transpile(TranspilerTest):
...
@@ -797,6 +806,7 @@ class TestNCCL2Transpile(TranspilerTest):
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
gc
.
collect
()
else
:
else
:
pass
pass
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
d79d2f68
...
@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-6
))
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-6
))
return
optimizer
return
optimizer
# NOTE(dzh):
# need to make it compatible with elewise fuse act
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
model
,
feed_dict
=
{
"image"
:
img
,
feed_dict
=
{
"image"
:
img
,
...
@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
False
,
fuse_elewise_add_act_ops
=
False
,
memory_opt
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
optimizer
=
_optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
model
,
...
@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
True
,
fuse_elewise_add_act_ops
=
True
,
memory_opt
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
optimizer
=
_optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
浏览文件 @
d79d2f68
...
@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase):
...
@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
params
=
mlp
.
parameters
(
True
)
self
.
assertEqual
(
"FC_0.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"FC_0.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"FC_1.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"FC_1.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
len
(
params
),
4
)
sublayers
=
mlp
.
sublayers
(
True
)
self
.
assertEqual
(
mlp
.
_fc1
,
sublayers
[
0
])
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_rnn
(
self
):
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
[
10.0
,
11.0
,
12.0
]])
...
...
python/paddle/fluid/tests/unittests/test_imperative_gan.py
浏览文件 @
d79d2f68
...
@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer):
...
@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer):
self
.
_fc1
=
FC
(
size
=
32
,
act
=
'elu'
,
name
=
"d_fc1"
)
self
.
_fc1
=
FC
(
size
=
32
,
act
=
'elu'
,
name
=
"d_fc1"
)
self
.
_fc2
=
FC
(
size
=
1
,
name
=
"d_fc2"
)
self
.
_fc2
=
FC
(
size
=
1
,
name
=
"d_fc2"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
()
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc1
(
inputs
)
return
self
.
_fc2
(
x
)
return
self
.
_fc2
(
x
)
...
@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer):
...
@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer):
self
.
_fc2
=
FC
(
size
=
64
,
act
=
'elu'
,
name
=
"g_fc2"
)
self
.
_fc2
=
FC
(
size
=
64
,
act
=
'elu'
,
name
=
"g_fc2"
)
self
.
_fc3
=
FC
(
size
=
1
,
name
=
"g_fc3"
)
self
.
_fc3
=
FC
(
size
=
1
,
name
=
"g_fc3"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
(
)
+
self
.
_fc3
.
parameters
()
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc2
(
x
)
x
=
self
.
_fc2
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
d79d2f68
...
@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
...
@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self
.
hidden_array
.
append
(
pre_hidden
)
self
.
hidden_array
.
append
(
pre_hidden
)
self
.
cell_array
.
append
(
pre_cell
)
self
.
cell_array
.
append
(
pre_cell
)
def
parameters
(
self
):
parameters
=
list
()
for
param
in
self
.
weight_1_arr
:
parameters
.
append
(
param
)
for
param
in
self
.
weight_2_arr
:
parameters
.
append
(
param
)
for
bias
in
self
.
bias_arr
:
parameters
.
append
(
bias
)
return
parameters
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
res
=
[]
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
for
index
in
range
(
self
.
_num_steps
):
...
@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer):
...
@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer):
def
_build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
def
_build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
pass
def
parameters
(
self
):
parameters
=
self
.
simple_lstm_rnn
.
parameters
()
+
[
self
.
softmax_weight
,
self
.
softmax_bias
]
+
self
.
embedding
.
parameters
()
return
parameters
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
init_h
=
fluid
.
layers
.
reshape
(
...
...
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
浏览文件 @
d79d2f68
...
@@ -21,7 +21,6 @@ import paddle
...
@@ -21,7 +21,6 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.imperative.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.imperative.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.imperative.base
import
to_variable
from
paddle.fluid.imperative.base
import
to_variable
from
test_imperative_base
import
new_program_scope
from
test_imperative_base
import
new_program_scope
...
@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer):
...
@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer):
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
for
i
in
range
(
depth
[
block
]):
bottleneck_block
=
BottleneckBlock
(
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
num_channels
=
num_channels
,
num_channels
=
num_channels
,
num_filters
=
num_filters
[
block
],
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
)
shortcut
=
shortcut
)
)
num_channels
=
bottleneck_block
.
_num_channels_out
num_channels
=
bottleneck_block
.
_num_channels_out
self
.
bottleneck_block_list
.
append
(
bottleneck_block
)
self
.
bottleneck_block_list
.
append
(
bottleneck_block
)
shortcut
=
True
shortcut
=
True
...
@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase):
...
@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size
=
batch_size
)
batch_size
=
batch_size
)
dy_param_init_value
=
{}
dy_param_init_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
for
param
in
resnet
.
parameters
():
).
all_parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
...
@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase):
...
@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase):
dy_out
=
avg_loss
.
_numpy
()
dy_out
=
avg_loss
.
_numpy
()
if
batch_id
==
0
:
if
batch_id
==
0
:
for
param
in
fluid
.
default_main_program
().
global_block
(
for
param
in
resnet
.
parameters
():
).
all_parameters
():
if
param
.
name
not
in
dy_param_init_value
:
if
param
.
name
not
in
dy_param_init_value
:
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
avg_loss
.
_backward
()
dy_grad_value
=
{}
dy_grad_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
for
param
in
resnet
.
parameters
():
).
all_parameters
():
if
not
param
.
stop_gradient
:
if
not
param
.
stop_gradient
:
np_array
=
np
.
array
(
param
.
_ivar
.
_grad_ivar
().
value
()
np_array
=
np
.
array
(
param
.
_ivar
.
_grad_ivar
().
value
()
.
get_tensor
())
.
get_tensor
())
...
@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase):
...
@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase):
resnet
.
clear_gradients
()
resnet
.
clear_gradients
()
dy_param_value
=
{}
dy_param_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
for
param
in
resnet
.
parameters
():
).
all_parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
with
new_program_scope
():
with
new_program_scope
():
...
@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase):
...
@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
self
.
assertEqual
(
len
(
dy_param_init_value
),
len
(
static_param_init_value
))
self
.
assertEqual
(
len
(
dy_param_init_value
),
len
(
static_param_init_value
))
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
self
.
assertTrue
(
np
.
isfinite
(
value
.
all
()))
self
.
assertTrue
(
np
.
isfinite
(
value
.
all
()))
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
0 → 100644
浏览文件 @
d79d2f68
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
from
test_parallel_executor_transformer
import
transformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class
TestTransformerWithIR
(
TestTransformer
):
def
test_main
(
self
):
if
core
.
is_compiled_with_cuda
():
# check python transpiler
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
True
,
use_ir_memory_optimize
=
False
)
# check IR memory optimize
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录