Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
d79d2f68
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d79d2f68
编写于
2月 19, 2019
作者:
C
chengduozh
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_shape_api_doc
test=develop
上级
3ce12b1b
28609b34
变更
55
展开全部
隐藏空白更改
内联
并排
Showing
55 changed file
with
1952 addition
and
403 deletion
+1952
-403
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+6
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-0
paddle/fluid/framework/details/inplace_op_pass.cc
paddle/fluid/framework/details/inplace_op_pass.cc
+1
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+66
-6
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+2
-0
paddle/fluid/framework/details/memory_optimize_helper_test.cc
...le/fluid/framework/details/memory_optimize_helper_test.cc
+46
-0
paddle/fluid/framework/details/memory_optimize_pass.cc
paddle/fluid/framework/details/memory_optimize_pass.cc
+57
-51
paddle/fluid/framework/inplace_op_inference_test.cc
paddle/fluid/framework/inplace_op_inference_test.cc
+16
-16
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
+7
-3
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+1
-1
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+0
-71
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
...le/fluid/inference/analysis/ir_passes/subgraph_detector.h
+1
-26
paddle/fluid/operators/detection/density_prior_box_op.h
paddle/fluid/operators/detection/density_prior_box_op.h
+6
-7
paddle/fluid/operators/detection/prior_box_op.h
paddle/fluid/operators/detection/prior_box_op.h
+30
-39
paddle/fluid/operators/group_norm_op.cc
paddle/fluid/operators/group_norm_op.cc
+37
-2
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+75
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+132
-1
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+5
-1
paddle/fluid/operators/lstm_op.h
paddle/fluid/operators/lstm_op.h
+4
-0
paddle/fluid/operators/lstmp_op.h
paddle/fluid/operators/lstmp_op.h
+5
-0
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+5
-5
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+17
-13
paddle/fluid/pybind/ir.cc
paddle/fluid/pybind/ir.cc
+32
-26
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-7
python/CMakeLists.txt
python/CMakeLists.txt
+1
-0
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+4
-1
python/paddle/fluid/contrib/int8_inference/README.md
python/paddle/fluid/contrib/int8_inference/README.md
+2
-2
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+377
-17
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+6
-0
python/paddle/fluid/contrib/slim/tests/__init__.py
python/paddle/fluid/contrib/slim/tests/__init__.py
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
+1
-1
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
...on/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/test_factory.py
python/paddle/fluid/contrib/slim/tests/test_factory.py
+1
-1
python/paddle/fluid/contrib/slim/tests/test_graph.py
python/paddle/fluid/contrib/slim/tests/test_graph.py
+80
-0
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+372
-0
python/paddle/fluid/contrib/tests/CMakeLists.txt
python/paddle/fluid/contrib/tests/CMakeLists.txt
+5
-1
python/paddle/fluid/contrib/tests/test_calibration.py
python/paddle/fluid/contrib/tests/test_calibration.py
+0
-4
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+5
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+222
-24
python/paddle/fluid/imperative/layers.py
python/paddle/fluid/imperative/layers.py
+94
-19
python/paddle/fluid/imperative/nn.py
python/paddle/fluid/imperative/nn.py
+0
-6
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+45
-8
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+5
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-0
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_base_layer.py
python/paddle/fluid/tests/unittests/test_base_layer.py
+82
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+10
-0
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+4
-0
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+12
-0
python/paddle/fluid/tests/unittests/test_imperative_gan.py
python/paddle/fluid/tests/unittests/test_imperative_gan.py
+0
-7
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+0
-16
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
...on/paddle/fluid/tests/unittests/test_imperative_resnet.py
+12
-14
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
...id/tests/unittests/test_ir_memory_optimize_transformer.py
+48
-0
未找到文件。
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
d79d2f68
...
...
@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper
)
if
(
WITH_GPU
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info
)
else
()
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info
)
endif
()
cc_library
(
memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass
)
cc_library
(
inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info
)
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
d79d2f68
...
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue
;
}
}
VLOG
(
3
)
<<
"Start Apply Pass "
<<
pass
->
Type
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
VLOG
(
3
)
<<
"Finish Apply Pass "
<<
pass
->
Type
();
}
return
graph
;
}
...
...
paddle/fluid/framework/details/inplace_op_pass.cc
浏览文件 @
d79d2f68
...
...
@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add"
"By default, it's turned o
n
"
);
"By default, it's turned o
ff
"
);
DECLARE_string
(
memory_optimize_debug
);
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
d79d2f68
...
...
@@ -13,13 +13,19 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <algorithm>
#include <deque>
#include <functional>
#include <i
ostream
>
#include <i
terator
>
#include <numeric>
#include <sstream>
#include <string>
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
framework
{
...
...
@@ -166,6 +172,11 @@ struct NodeComparator {
bool
operator
()(
ir
::
Node
*
lhs
,
ir
::
Node
*
rhs
)
const
{
auto
*
lhs_desc
=
FindVarDescInBlock
(
lhs
);
auto
*
rhs_desc
=
FindVarDescInBlock
(
rhs
);
// match data type
if
(
lhs_desc
->
GetDataType
()
!=
rhs_desc
->
GetDataType
())
{
return
false
;
}
// match shape
auto
lhs_shape
=
lhs_desc
->
GetShape
();
auto
rhs_shape
=
rhs_desc
->
GetShape
();
if
((
lhs_shape
[
0
]
==
-
1
&&
rhs_shape
[
0
]
==
-
1
)
||
...
...
@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
return
found_node
;
}
ir
::
Node
*
OrderedSet
::
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
{
ir
::
Node
*
found_node
=
nullptr
;
NodeComparator
functor
;
auto
it
=
std
::
find_if
(
nodes_
.
begin
(),
nodes_
.
end
(),
[
&
](
const
NodeVector
&
v
)
{
if
(
v
.
front
()
==
prev
)
return
true
;
else
return
false
;
});
PADDLE_ENFORCE
(
it
!=
nodes_
.
end
(),
"Not found previous in node list!"
);
for
(
it
=
std
::
next
(
it
);
it
!=
nodes_
.
end
();
++
it
)
{
auto
&
candidate
=
it
->
front
();
if
(
functor
(
var
,
candidate
))
{
found_node
=
candidate
;
break
;
}
}
return
found_node
;
}
bool
OrderedSet
::
Has
(
ir
::
Node
*
var
)
const
{
if
(
mark_table_
.
count
(
var
->
Name
()))
{
auto
&
node_in_samename
=
mark_table_
.
at
(
var
->
Name
());
...
...
@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return
false
;
}
void
OrderedSet
::
Erase
(
const
std
::
string
&
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
));
nodes_
.
erase
(
mark_table_
[
var
]);
mark_table_
.
erase
(
var
);
}
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
->
Name
()));
nodes_
.
erase
(
mark_table_
[
var
->
Name
()]);
mark_table_
.
erase
(
var
->
Name
());
PADDLE_ENFORCE
(
var
!=
nullptr
);
Erase
(
var
->
Name
());
}
std
::
string
OrderedSet
::
ToString
()
const
{
...
...
@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
return
flag
;
}
int
MinChunkSize
()
{
int
size
{
0
};
#ifdef PADDLE_WITH_CUDA
size
=
platform
::
GpuMinChunkSize
();
#else
size
=
platform
::
CpuMinChunkSize
();
#endif // PADDLE_WITH_CUDA
return
size
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
// only these types holds bulk of gpu memory
if
(
!
(
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
return
false
;
}
if
(
node
.
Persistable
()
||
node
.
GetShape
().
empty
())
{
// persistable variable is parameter
if
(
node
.
Persistable
())
{
return
false
;
}
// shape < min_chunk_size is meaningless.
// further more, fetched loss always has size = 1
// which should not be reused.
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
abs
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
if
(
shape
.
empty
()
||
size
<
MinChunkSize
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
...
...
@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for
(
auto
*
node
:
ops_
)
{
if
(
node
==
op
)
break
;
for
(
auto
&
output
:
node
->
outputs
)
{
if
(
output
->
Name
()
==
name
)
{
PADDLE_ENFORCE
((
output
!=
nullptr
&&
output
->
IsVar
()),
"Output is empty!"
);
if
(
output
->
Var
()
&&
output
->
Name
()
==
name
)
{
found_node
=
output
;
}
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
d79d2f68
...
...
@@ -55,6 +55,7 @@ class OrderedSet {
void
Insert
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
const
std
::
string
&
var
);
bool
Has
(
ir
::
Node
*
var
)
const
;
void
Clear
()
{
mark_table_
.
clear
();
...
...
@@ -62,6 +63,7 @@ class OrderedSet {
}
// find the bestfit shape node block with var.
ir
::
Node
*
FindBestFitNode
(
ir
::
Node
*
var
)
const
;
ir
::
Node
*
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
;
// map store non-const iterator, can not promise const
int
GetNodeIndexInPool
(
ir
::
Node
*
var
);
// pool all node to string
...
...
paddle/fluid/framework/details/memory_optimize_helper_test.cc
浏览文件 @
d79d2f68
...
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
}
}
TEST
(
OrderedSet
,
FindBestFitNode
)
{
OrderedSet
pool
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
ProgramDesc
prog
;
BlockDesc
*
block_desc
=
prog
.
MutableBlock
(
0
);
auto
*
op_desc
=
block_desc
->
AppendOp
();
op_desc
->
SetType
(
"dummy"
);
std
::
unique_ptr
<
ir
::
Node
>
op
=
ir
::
CreateNodeForTest
(
op_desc
);
{
auto
desc
=
block_desc
->
Var
(
"a"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"b"
);
desc
->
SetShape
({
128
,
129
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"c"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
for
(
auto
&
node
:
nodes
)
{
pool
.
Insert
(
node
.
get
());
}
// FindNextBestFitNode
auto
*
n
=
nodes
[
0
].
get
();
auto
*
cache
=
pool
.
FindBestFitNode
(
n
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"a"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"c"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"b"
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/memory_optimize_pass.cc
浏览文件 @
d79d2f68
...
...
@@ -69,55 +69,59 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
}
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
!
NodeCanReused
(
var
)
||
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
||
skip_set_
.
count
(
var
->
Name
()))
if
(
var
->
IsVar
()
&&
!
var
->
IsCtrlVar
()
&&
skip_set_
.
count
(
var
->
Name
()))
{
VLOG
(
3
)
<<
"Skip set contains variable of "
<<
var
->
Name
()
<<
"disable reuse on it. skipped"
;
continue
;
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
VLOG
(
3
)
<<
pool_
.
ToString
();
VLOG
(
3
)
<<
"matched in pool : "
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
if
(
NodeCanReused
(
var
)
&&
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
)
{
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
while
(
cache
!=
nullptr
&&
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused. "
<<
cache
->
Name
()
<<
" is re-filled to the pool after "
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
cache
=
pool_
.
FindNextBestFitNode
(
var
,
cache
);
}
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
VLOG
(
3
)
<<
pool_
.
ToString
();
VLOG
(
3
)
<<
"matched in pool : "
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
if
(
cache
==
nullptr
)
continue
;
if
(
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused."
<<
var
->
Name
()
<<
" is re-filled to the pool after"
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
continue
;
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// update CFG Graph on the fly.
// reused var maybe re-fill into the pool
cfg_
->
RenameVarInCFGGraph
(
var
->
Name
(),
cache
->
Name
(),
idx
);
// NOTE(dzhwinter): we need to both update the ProgramDesc
// and IR Graph. because op_desc/var_desc is used in CreateOp,
// CreateVar when running happens. But IR Graph
// define the dependence relationship between nodes.
RenameVarInGraphDesc
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var
->
Name
(),
cache
->
Name
(),
idx
,
graph
.
get
());
pool_
.
Erase
(
cache
);
}
if
(
cache
!=
nullptr
)
{
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// NOTE(dzhwinter): update the ProgramDesc/IR Graph
// and the CFG Graph on the fly.
//
// IR Graph define the dependence relationship between nodes.
//
// ProgramDesc defines the input/output vars. Its used in
// CreateOp, CreateVar when running happens.
//
// CFG Graph store the liveness information, when reuse happens
// we also need to update the variable liveness.
const
std
::
string
var_name
=
var
->
Name
();
const
std
::
string
cache_name
=
cache
->
Name
();
// fill the pool
std
::
unordered_set
<
std
::
string
>
unlived_vars
;
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
unlived_vars
.
emplace
(
var
);
cfg_
->
RenameVarInCFGGraph
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphDesc
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphNode
(
var_name
,
cache_name
,
idx
,
graph
.
get
());
pool_
.
Erase
(
cache_name
);
}
}
for
(
auto
var
:
unlived_vars
)
{
}
// fill the pool
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
if
(
var_node
==
nullptr
||
var_node
->
IsCtrlVar
())
continue
;
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
pool_
.
Insert
(
var_node
);
}
...
...
@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var
for
(
auto
*
node
:
op
->
inputs
)
{
if
(
node
->
Name
()
==
var
)
{
ir
::
Node
*
cache_node
=
graph
->
CreateVarNode
(
var_desc
.
get
());
var_nodes_
[
cache_var
].
emplace_back
(
cache_node
);
ir
::
Node
*
cache_node
=
var_nodes_
[
cache_var
].
back
();
// swap node to cache_node
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
...
...
@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto
*
prev_op
=
node
->
inputs
[
0
];
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
cache_node
);
cache_node
->
inputs
.
emplace_back
(
prev_op
);
for
(
auto
*
next_op
:
node
->
outputs
)
{
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
...
...
@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
}
// release node of unused var in graph
for
(
auto
*
node
:
var_nodes_
[
var
])
{
graph
->
RemoveNode
(
node
);
}
var_nodes_
.
at
(
var
).
clear
();
}
}
// namespace details
...
...
paddle/fluid/framework/inplace_op_inference_test.cc
浏览文件 @
d79d2f68
...
...
@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
op
->
SetOutput
(
"Out"
,
{
"test2_out"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
,
128
,
128
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
128
,
128
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
op
->
SetOutput
(
GradVarName
(
"X"
),
{
"test2_a"
,
"test2_b"
,
"test2_c"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
浏览文件 @
d79d2f68
...
...
@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
->
assert_is_op
(
"scale"
)
->
assert_op_attr
<
float
>
(
"scale"
,
1.
)
->
assert_op_attr
<
float
>
(
"bias"
,
0.
);
auto
scale_out
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_out"
)
->
assert_is_op_output
(
"scale"
);
auto
scale_out
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_out"
)
->
assert_is_op_output
(
"scale"
)
// scale's output var should has only one consumer, or it can't be
// removed.
->
assert_more
([](
Node
*
x
)
{
return
x
->
outputs
.
size
()
==
1UL
;
});
pre_op
->
LinksTo
({
scale_in
});
scale_op
->
LinksFrom
({
scale_in
}).
LinksTo
({
scale_out
});
...
...
paddle/fluid/imperative/layer.cc
浏览文件 @
d79d2f68
...
...
@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() {
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
OpBase
::
ApplyGrad
()
{
if
(
grad_op_descs_
.
empty
()
&&
backward_id_
<=
0
)
{
LOG
(
WARNING
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
VLOG
(
3
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
return
{};
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
浏览文件 @
d79d2f68
...
...
@@ -460,77 +460,6 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
return
node
.
inputs
.
size
()
==
n
;
}
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
Agent
(
p
).
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inputs
.
begin
(),
p
->
inputs
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
->
bool
{
return
visited
.
count
(
x
)
!=
0
;
});
if
(
inlink_visited
.
size
()
==
p
->
inputs
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outputs
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
NodesTSIterator
::
NodesTSIterator
(
const
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
NodesTSIterator
&
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
NodesTSIterator
&
NodesTSIterator
::
operator
=
(
const
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
NodesTSIterator
::
operator
==
(
const
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
浏览文件 @
d79d2f68
...
...
@@ -30,6 +30,7 @@ namespace inference {
namespace
analysis
{
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
NodesTSIterator
;
const
char
kIsFunctionNode
[]
=
"__is_function_node__"
;
const
char
kFunctionNodeSubGraph
[]
=
"__function_node_sub_graph__"
;
...
...
@@ -132,32 +133,6 @@ struct Agent {
framework
::
ir
::
Node
*
x_
;
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
framework
::
ir
::
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
framework
::
ir
::
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
framework
::
ir
::
Node
*
operator
->
();
private:
std
::
vector
<
framework
::
ir
::
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
framework
::
ir
::
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
result
;
...
...
paddle/fluid/operators/detection/density_prior_box_op.h
浏览文件 @
d79d2f68
...
...
@@ -72,7 +72,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
for
(
in
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
sqrt_fixed_ratios
.
push_back
(
sqrt
(
fixed_ratios
[
i
]));
}
...
...
@@ -115,11 +115,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
}
}
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
ClipFunctor
<
T
>
clip_func
;
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
boxes
->
data
<
T
>
(),
clip_func
);
T
*
dt
=
boxes
->
data
<
T
>
();
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
});
}
framework
::
Tensor
var_t
;
var_t
.
mutable_data
<
T
>
(
...
...
@@ -141,7 +140,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#pragma omp parallel for collapse(2)
#endif
for
(
int
i
=
0
;
i
<
box_num
;
++
i
)
{
for
(
in
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
e_vars
(
i
,
j
)
=
variances
[
j
];
}
}
...
...
paddle/fluid/operators/detection/prior_box_op.h
浏览文件 @
d79d2f68
...
...
@@ -46,13 +46,6 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
}
}
template
<
typename
T
>
struct
ClipFunctor
{
HOSTDEVICE
inline
T
operator
()(
T
in
)
const
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
in
,
0.
),
1.
);
}
};
template
<
typename
T
>
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -101,31 +94,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
e_boxes
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
boxes
);
T
*
b_t
=
boxes
->
data
<
T
>
(
);
for
(
int
h
=
0
;
h
<
feature_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
feature_width
;
++
w
)
{
T
center_x
=
(
w
+
offset
)
*
step_width
;
T
center_y
=
(
h
+
offset
)
*
step_height
;
T
box_width
,
box_height
;
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
if
(
min_max_aspect_ratios_order
)
{
box_width
=
box_height
=
min_size
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
// priors with different aspect ratios
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
...
...
@@ -135,11 +127,11 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
}
else
{
// priors with different aspect ratios
...
...
@@ -147,21 +139,21 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
float
ar
=
aspect_ratios
[
r
];
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
}
}
...
...
@@ -169,11 +161,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
ClipFunctor
<
T
>
clip_func
;
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
boxes
->
data
<
T
>
(),
clip_func
);
T
*
dt
=
boxes
->
data
<
T
>
();
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
});
}
framework
::
Tensor
var_t
;
...
...
paddle/fluid/operators/group_norm_op.cc
浏览文件 @
d79d2f68
...
...
@@ -170,13 +170,48 @@ class GroupNormGradMaker : public framework::SingleGradOpDescMaker {
}
};
class
GroupNormInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
"X"
,
"Y"
}};
}
};
class
GroupNormGradInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
framework
::
GradVarName
(
"Y"
),
framework
::
GradVarName
(
"X"
)}};
}
};
class
GroupNormOpInferVarType
:
public
framework
::
PassInDtypeAndVarTypeToOutput
{
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
GetInputOutputWithSameType
()
const
override
{
return
{{
"X"
,
/*->*/
"Y"
}};
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
group_norm
,
ops
::
GroupNormOp
,
ops
::
GroupNormOpMaker
,
ops
::
GroupNormGradMaker
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
);
ops
::
GroupNormOpInferVarType
,
ops
::
GroupNormGradMaker
,
ops
::
GroupNormInplaceInToOut
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
,
ops
::
GroupNormGradInplaceInToOut
);
REGISTER_OP_CPU_KERNEL
(
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
d79d2f68
...
...
@@ -339,6 +339,71 @@ void BenchSoftmaxKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchLayerNormKernel
()
{
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
int
sz
=
left
*
right
;
Tensor
x
,
mean
,
var
,
scale
,
bias
,
out
;
x
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
out
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
mean
.
Resize
({
n
,
x_dim_0
});
var
.
Resize
({
n
,
x_dim_0
});
scale
.
Resize
({
x_dim_1
});
bias
.
Resize
({
x_dim_1
});
RandomVec
<
T
>
(
sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
<
T
>
();
const
T
*
bias_data
=
bias
.
data
<
T
>
();
T
*
x_data
=
x
.
data
<
T
>
();
T
*
mean_data
=
mean
.
data
<
T
>
();
T
*
var_data
=
var
.
data
<
T
>
();
T
*
out_data
=
out
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
>
(
right
,
x_data
,
out_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchCRFDecodingKernel
()
{
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
Tensor
x
,
w
,
alpha
,
track
;
x
.
Resize
({
seq_len
,
tag_num
});
w
.
Resize
({
tag_num
+
state_trans_base_idx
,
tag_num
});
alpha
.
Resize
({
seq_len
,
tag_num
});
track
.
Resize
({
seq_len
,
tag_num
});
RandomVec
<
T
>
(
x_sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
w_data
=
w
.
data
<
T
>
();
T
*
alpha_data
=
alpha
.
mutable_data
<
T
>
(
PlaceType
());
int
*
track_data
=
track
.
mutable_data
<
int
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
>
(
tag_num
,
seq_len
,
x_data
,
w_data
,
alpha_data
,
track_data
,
tag_num
);
}
}
}
using
T
=
float
;
using
CPUPlace
=
paddle
::
platform
::
CPUPlace
;
...
...
@@ -382,6 +447,16 @@ BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
// softmax
BENCH_FP32_CPU
(
kSoftmax
)
{
BenchSoftmaxKernel
<
jit
::
kSoftmax
,
T
,
CPUPlace
>
();
}
// layernorm
BENCH_FP32_CPU
(
kLayerNorm
)
{
BenchLayerNormKernel
<
jit
::
kLayerNorm
,
T
,
CPUPlace
>
();
}
// crfdecoding
BENCH_FP32_CPU
(
kCRFDecoding
)
{
BenchCRFDecodingKernel
<
jit
::
kCRFDecoding
,
T
,
CPUPlace
>
();
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// Options:
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
d79d2f68
...
...
@@ -292,6 +292,63 @@ struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
LayerNormTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
,
int
>
{
void
operator
()(
const
typename
jit
::
LayerNormTuples
<
T
>::
func_type
tgt
,
std
::
vector
<
T
>&
x
,
std
::
vector
<
T
>&
outref
,
// NOLINT
std
::
vector
<
T
>&
mean
,
std
::
vector
<
T
>&
var
,
// NOLINT
const
std
::
vector
<
T
>&
scale
,
const
std
::
vector
<
T
>&
bias
,
int
left
,
const
float
epsilon
,
int
right
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
outref
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
mean
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
var
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
scale
.
size
(),
static_cast
<
size_t
>
(
right
));
EXPECT_EQ
(
bias
.
size
(),
static_cast
<
size_t
>
(
right
));
std
::
vector
<
T
>
outtgt
(
outref
.
size
());
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
T
*
outtgt_data
=
outtgt
.
data
();
tgt
(
x_data
,
outtgt_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
ExpectEQ
<
T
>
(
outtgt_data
,
outref_data
,
left
*
right
);
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
CRFDecodingTuples
<
T
>
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
{
void
operator
()(
const
typename
jit
::
CRFDecodingTuples
<
T
>::
func_type
tgt
,
const
int
seq_len
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
w
,
std
::
vector
<
T
>&
alpharef
,
// NOLINT
std
::
vector
<
int
>&
trackref
,
int
tag_num
)
{
// NOLINT
constexpr
int
state_trans_base_idx
=
2
;
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
w
.
size
(),
static_cast
<
size_t
>
((
tag_num
+
state_trans_base_idx
)
*
tag_num
));
EXPECT_EQ
(
alpharef
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
trackref
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
std
::
vector
<
T
>
alphatgt
(
alpharef
.
size
());
std
::
vector
<
int
>
tracktgt
(
trackref
.
size
());
memcpy
(
trackref
.
data
(),
tracktgt
.
data
(),
tag_num
*
sizeof
(
int
));
tgt
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alphatgt
.
data
(),
tracktgt
.
data
(),
tag_num
);
ExpectEQ
<
T
>
(
alpharef
.
data
(),
alphatgt
.
data
(),
seq_len
*
tag_num
);
ExpectEQ
<
int
>
(
trackref
.
data
(),
tracktgt
.
data
(),
seq_len
*
tag_num
);
}
};
template
<
jit
::
KernelType
KT
,
typename
KernelTuples
,
typename
PlaceType
,
typename
...
Args
>
void
TestAllImpls
(
const
typename
KernelTuples
::
attr_type
&
attr
,
Args
...
args
)
{
...
...
@@ -640,6 +697,71 @@ void TestNCHW16CMulNCKernel() {
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestLayerNormKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
LayerNormTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
sz
=
left
*
right
;
std
::
vector
<
T
>
x
(
sz
),
mean
(
left
),
var
(
left
),
scale
(
right
),
bias
(
right
),
outref
(
sz
);
RandomVec
<
T
>
(
sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
ref
(
x_data
,
outref_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
TestAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
>
(
right
,
x
,
outref
,
mean
,
var
,
scale
,
bias
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestCRFDecodingKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
CRFDecodingTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
std
::
vector
<
T
>
x
(
x_sz
),
w
(
w_sz
),
alpharef
(
x_sz
);
std
::
vector
<
int
>
trackref
(
x_sz
);
RandomVec
<
T
>
(
x_sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
data
(),
-
2.
f
,
2.
f
);
ref
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alpharef
.
data
(),
trackref
.
data
(),
tag_num
);
TestAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
(
tag_num
,
seq_len
,
x
,
w
,
alpharef
,
trackref
,
tag_num
);
}
}
}
// XYZNTuple
TEST
(
JITKernel
,
kVMul
)
{
TestXYZNKernel
<
jit
::
kVMul
,
float
,
CPUPlace
>
();
...
...
@@ -761,7 +883,16 @@ TEST(JITKernel, kNCHW16CMulNC) {
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
();
}
// TODO(yihua/TJ): add crf decoding and layer norm unit tests
TEST
(
JITKernel
,
kLayerNorm
)
{
TestLayerNormKernel
<
jit
::
kLayerNorm
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLayerNormKernel
<
jit
::
kLayerNorm
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
kCRFDecoding
)
{
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
pool
)
{
// TODO(TJ): add some test
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
d79d2f68
...
...
@@ -64,7 +64,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
// Error checking
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
buffer
),
"Cannot read more"
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
*
buffer
),
"Cannot read more"
);
// Get data from fin to tensor
DeserializeFromStream
(
*
buffer
,
tensor
,
dev_ctx
);
...
...
@@ -90,6 +90,10 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
->
ShareDataWith
(
fp16_tensor
);
}
}
buffer
->
peek
();
PADDLE_ENFORCE
(
buffer
->
eof
(),
"You are not allowed to load partial data via "
"load_combine_op, use load_op instead."
);
}
};
...
...
paddle/fluid/operators/lstm_op.h
浏览文件 @
d79d2f68
...
...
@@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
lstm_grad
.
prev_state_grad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
}
// lstm_value.output_value not used in bp, set to nullptr
// lstm_grad.state_active_grad not used in bp, set to nullptr
lstm_value
.
output_value
=
nullptr
;
lstm_grad
.
state_active_grad
=
nullptr
;
int
cur_batch_size
=
bend
-
bstart
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstm_value
,
lstm_grad
,
frame_size
,
cur_batch_size
,
...
...
paddle/fluid/operators/lstmp_op.h
浏览文件 @
d79d2f68
...
...
@@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
}
int
cur_batch_size
=
bend
-
bstart
;
// lstmp_value.output_value not used in bp, set to null
// lstmp_grad.state_active_grad not used in bp, set to null
lstmp_value
.
output_value
=
nullptr
;
lstmp_grad
.
state_active_grad
=
nullptr
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstmp_value
,
lstmp_grad
,
frame_size
,
cur_batch_size
,
gate_act
,
cell_act
,
cand_act
);
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
d79d2f68
...
...
@@ -109,23 +109,23 @@ from future subsequences in a computationally efficient manner to improve
unidirectional recurrent neural networks. The row convolution operator is
different from the 1D sequence convolution, and is computed as follows:
Given an input sequence $
in$ of length $t$ and input dimension $d
$,
and a filter ($W$) of size $context \times
d$,
Given an input sequence $
X$ of length $t$ and input dimension $D
$,
and a filter ($W$) of size $context \times
D$,
the output sequence is convolved as:
$$
out_{i
, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :
}
out_{i
} = \\sum_{j=i}^{i + context - 1} X_{j} \\cdot W_{j-i
}
$$
In the above equation:
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $
\\tau
$: Future context size.
* $
context
$: Future context size.
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $W_{
i-j}$: The (i-j
)-th row of parameters with shape [1, D].
* $W_{
j-i}$: The (j-i
)-th row of parameters with shape [1, D].
More details about row_conv please refer to
the design document
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
d79d2f68
...
...
@@ -233,9 +233,11 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
#endif // __APPLE__ and windows
#endif // PADDLE_WITH_CUDA
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__)
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
} while (0)
#define PADDLE_ENFORCE(COND, ...) \
do { \
...
...
@@ -270,23 +272,25 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...)
\
do {
\
if (UNLIKELY(nullptr == (__VAL))) {
\
PADDLE_THROW(#__VAL " should not be null\n%s",
\
paddle::string::Sprintf(""
__VA_ARGS__)); \
}
\
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
PADDLE_THROW(#__VAL " should not be null\n%s", \
::paddle::string::Sprintf(
__VA_ARGS__)); \
} \
} while (0)
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \
auto __cond1__ = (__VAL0); \
auto __cond2__ = (__VAL1); \
if (UNLIKELY(!((__cond1__)__CMP(__cond2__)))) { \
PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
" %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
#__VAL0, #__VAL1, #__VAL0, \
paddle::string::to_string(__VAL0), #__VAL1,
\
paddle::string::to_string(__VAL1),
\
paddle::string::Sprintf("" __VA_ARGS__));
\
::paddle::string::to_string(__cond1__), #__VAL1,
\
::paddle::string::to_string(__cond2__),
\
::paddle::string::Sprintf(__VA_ARGS__));
\
} \
} while (0)
...
...
paddle/fluid/pybind/ir.cc
浏览文件 @
d79d2f68
...
...
@@ -13,10 +13,12 @@
// limitations under the License.
#include "paddle/fluid/pybind/ir.h"
#include <algorithm>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h"
...
...
@@ -27,6 +29,10 @@ namespace py = pybind11;
using
paddle
::
framework
::
ir
::
Graph
;
using
paddle
::
framework
::
ir
::
Node
;
using
paddle
::
framework
::
ir
::
GraphSafeRemoveNodes
;
using
paddle
::
framework
::
ir
::
HasCircle
;
using
paddle
::
framework
::
ir
::
GraphNum
;
using
paddle
::
framework
::
ir
::
TopologySortOperations
;
using
paddle
::
framework
::
ir
::
BuildOperationAdjList
;
using
paddle
::
framework
::
OpDesc
;
using
paddle
::
framework
::
ProgramDesc
;
using
paddle
::
framework
::
VarDesc
;
...
...
@@ -36,6 +42,12 @@ namespace paddle {
namespace
pybind
{
void
BindGraph
(
py
::
module
*
m
)
{
m
->
def
(
"graph_safe_remove_nodes"
,
GraphSafeRemoveNodes
);
m
->
def
(
"has_circle"
,
HasCircle
);
m
->
def
(
"graph_num"
,
GraphNum
);
m
->
def
(
"topology_sort"
,
TopologySortOperations
,
return_value_policy
::
reference
);
m
->
def
(
"build_adjacency_list"
,
BuildOperationAdjList
,
return_value_policy
::
reference
);
py
::
class_
<
Graph
,
std
::
shared_ptr
<
Graph
>>
(
*
m
,
"Graph"
,
"The graph is a Directed Acyclic Single Static Assignment Graph, see "
...
...
@@ -46,7 +58,6 @@ void BindGraph(py::module *m) {
.
def
(
"get_float"
,
&
Graph
::
Get
<
float
>
)
.
def
(
"get_double"
,
&
Graph
::
Get
<
double
>
)
.
def
(
"get_string"
,
&
Graph
::
Get
<
std
::
string
>
)
.
def
(
"get_program"
,
&
Graph
::
Get
<
ProgramDesc
>
)
.
def
(
"get_marked_nodes"
,
&
Graph
::
Get
<
std
::
unordered_set
<
const
Node
*>>
)
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
int
attr
)
{
return
self
.
Set
(
attr_name
,
new
int
(
attr
));
})
...
...
@@ -63,11 +74,6 @@ void BindGraph(py::module *m) {
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
double
attr
)
{
return
self
.
Set
(
attr_name
,
new
double
(
attr
));
})
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
ProgramDesc
&
attr
)
{
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
})
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
std
::
unordered_set
<
const
Node
*>
&
attr
)
{
...
...
@@ -108,42 +114,42 @@ void BindNode(py::module *m) {
.
def
(
"is_op"
,
&
Node
::
IsOp
)
.
def
(
"is_var"
,
&
Node
::
IsVar
)
.
def
(
"is_ctrl_var"
,
&
Node
::
IsCtrlVar
)
.
def
(
"clear_inputs"
,
[](
Node
&
self
)
{
self
.
inputs
.
clear
();
})
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
it
++
)
{
if
((
*
it
)
->
id
()
==
node_id
)
{
self
.
inputs
.
erase
(
it
);
}
auto
pos
=
std
::
find_if
(
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
if
(
pos
!=
self
.
inputs
.
end
())
{
self
.
inputs
.
erase
(
pos
);
}
})
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
it
++
)
{
if
(
*
it
==
&
node
)
{
self
.
inputs
.
erase
(
it
);
}
auto
pos
=
std
::
find
(
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
&
node
);
if
(
pos
!=
self
.
inputs
.
end
())
{
self
.
inputs
.
erase
(
pos
);
}
})
.
def
(
"inputs_append"
,
[](
Node
&
self
,
Node
&
node
)
{
self
.
inputs
.
push_back
(
&
node
);
})
.
def
(
"clear_outputs"
,
[](
Node
&
self
)
{
self
.
outputs
.
clear
();
})
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
it
++
)
{
if
((
*
it
)
->
id
()
==
node_id
)
{
self
.
outputs
.
erase
(
it
);
}
auto
pos
=
std
::
find_if
(
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
if
(
pos
!=
self
.
outputs
.
end
())
{
self
.
outputs
.
erase
(
pos
);
}
})
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
it
++
)
{
if
(
*
it
==
&
node
)
{
self
.
outputs
.
erase
(
it
);
}
auto
pos
=
std
::
find
(
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
&
node
);
if
(
pos
!=
self
.
outputs
.
end
())
{
self
.
outputs
.
erase
(
pos
);
}
})
.
def
(
"outputs_append"
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d79d2f68
...
...
@@ -829,8 +829,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"get_pass"
,
[](
const
py
::
bytes
&
binary_str
)
{
std
::
string
pass_type
(
binary_str
);
m
.
def
(
"get_pass"
,
[](
const
std
::
string
&
pass_type
)
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
return
std
::
shared_ptr
<
framework
::
ir
::
Pass
>
(
std
::
move
(
pass
));
});
...
...
@@ -838,10 +837,9 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
.
def
(
"has"
,
&
ir
::
Pass
::
Has
)
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
const
ProgramDesc
&
attr
)
{
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
.
def
(
"set_not_owned"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
ProgramDesc
&
attr
)
{
self
.
SetNotOwned
<
ProgramDesc
>
(
attr_name
,
&
attr
);
})
.
def
(
"set"
,
...
...
@@ -850,7 +848,6 @@ All parameter, weight, gradient are variables in Paddle.
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"get_program"
,
&
ir
::
Pass
::
Get
<
ProgramDesc
>
)
.
def
(
"type"
,
&
ir
::
Pass
::
Type
)
.
def
(
"apply"
,
[](
ir
::
Pass
&
self
,
std
::
shared_ptr
<
ir
::
Graph
>
graph
)
{
std
::
unique_ptr
<
ir
::
Graph
>
origin_graph
(
graph
.
get
());
...
...
python/CMakeLists.txt
浏览文件 @
d79d2f68
...
...
@@ -64,6 +64,7 @@ if (WITH_TESTING)
add_subdirectory
(
paddle/dataset/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
add_subdirectory
(
paddle/fluid/contrib/slim/tests
)
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/compiler.py
浏览文件 @
d79d2f68
...
...
@@ -177,7 +177,10 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
memory_optimize
is
None
:
self
.
_build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
enable_inplace
is
None
:
self
.
_build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
...
...
python/paddle/fluid/contrib/int8_inference/README.md
浏览文件 @
d79d2f68
...
...
@@ -63,10 +63,10 @@ Notes:
## 4. How to reproduce the results
*
Small dataset
```
bash
python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
python python/paddle/fluid/contrib/tests/test_calibration.py
```
*
Full dataset
```
bash
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
```
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
d79d2f68
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
0 → 100644
浏览文件 @
d79d2f68
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/contrib/slim/
unitest
/__init__.py
→
python/paddle/fluid/contrib/slim/
tests
/__init__.py
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/config.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/config.yaml
浏览文件 @
d79d2f68
version
:
1.0
include
:
[
"
./
unitest/configs/pruners.yaml"
,
"
./unitest
/configs/pruners_0.yaml"
]
include
:
[
"
./
configs/pruners.yaml"
,
"
.
/configs/pruners_0.yaml"
]
pruners
:
pruner_1
:
class
:
'
RatioPruner'
...
...
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners.yaml
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners_0.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners_0.yaml
浏览文件 @
d79d2f68
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/test_factory.py
→
python/paddle/fluid/contrib/slim/
tests
/test_factory.py
浏览文件 @
d79d2f68
...
...
@@ -18,7 +18,7 @@ import unittest
class
TestFactory
(
unittest
.
TestCase
):
def
test_parse
(
self
):
factory
=
ConfigFactory
(
'./
unitest/
configs/config.yaml'
)
factory
=
ConfigFactory
(
'./configs/config.yaml'
)
pruner
=
factory
.
instance
(
'pruner_1'
)
self
.
assertEquals
(
pruner
.
ratios
[
'conv1_1.w'
],
0.3
)
...
...
python/paddle/fluid/contrib/slim/tests/test_graph.py
0 → 100644
浏览文件 @
d79d2f68
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
import
six
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid
import
core
def
residual_block
(
num
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
,
bias_attr
=
False
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
bias_attr
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
conv
=
conv_bn_layer
(
hidden
,
16
,
3
,
1
,
1
,
act
=
None
,
bias_attr
=
True
)
short
=
conv_bn_layer
(
hidden
,
16
,
1
,
1
,
0
,
act
=
None
)
hidden
=
fluid
.
layers
.
elementwise_add
(
x
=
conv
,
y
=
short
,
act
=
'relu'
)
fc
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
10
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestGraph
(
unittest
.
TestCase
):
def
test_graph_functions
(
self
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
residual_block
(
2
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
marked_nodes
=
set
()
for
op
in
graph
.
all_ops
():
if
op
.
name
().
find
(
'conv2d'
)
>
-
1
:
marked_nodes
.
add
(
op
)
graph
.
draw
(
'.'
,
'residual'
,
marked_nodes
)
self
.
assertFalse
(
graph
.
has_circle
())
self
.
assertEqual
(
graph
.
graph_num
(),
1
)
nodes
=
graph
.
topology_sort
()
self
.
assertEqual
(
len
(
nodes
),
len
(
graph
.
all_ops
()))
nodes_map
=
graph
.
build_adjacency_list
()
self
.
assertEqual
(
len
(
nodes_map
),
len
(
graph
.
all_ops
()))
nodes_num
=
len
(
graph
.
all_nodes
())
graph
.
safe_remove_nodes
(
marked_nodes
)
self
.
assertEqual
(
len
(
graph
.
all_nodes
()),
nodes_num
-
len
(
marked_nodes
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/slim/
unitest
/test_quantization_pass.py
→
python/paddle/fluid/contrib/slim/
tests
/test_quantization_pass.py
浏览文件 @
d79d2f68
...
...
@@ -17,9 +17,12 @@ import random
import
numpy
as
np
import
paddle.fluid
as
fluid
import
six
from
paddle.fluid.framework
import
Program
import
paddle
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationFreezePass
from
paddle.fluid.contrib.slim.quantization
import
ConvertToInt8Pass
from
paddle.fluid.contrib.slim.quantization
import
TransformForMobilePass
from
paddle.fluid
import
core
...
...
@@ -65,6 +68,28 @@ def residual_block(num):
return
loss
def
conv_net
(
img
,
label
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
img
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_1
=
fluid
.
layers
.
batch_norm
(
conv_pool_1
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
class
TestQuantizationTransformPass
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
quantizable_op_and_inputs
=
{
...
...
@@ -171,5 +196,177 @@ class TestQuantizationTransformPass(unittest.TestCase):
self
.
residual_block_quant
(
'range_abs_max'
)
class
TestQuantizationFreezePass
(
unittest
.
TestCase
):
def
freeze_graph
(
self
,
use_cuda
,
seed
,
quant_type
):
def
build_program
(
main
,
startup
,
is_test
):
main
.
random_seed
=
seed
startup
.
random_seed
=
seed
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
conv_net
(
img
,
label
)
if
not
is_test
:
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
return
[
img
,
label
],
loss
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
feeds
,
loss
=
build_program
(
main
,
startup
,
False
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
main_graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
test_graph
=
IrGraph
(
core
.
Graph
(
test_program
.
desc
),
for_test
=
True
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
scope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
exe
.
run
(
startup
)
transform_pass
=
QuantizationTransformPass
(
scope
=
scope
,
program_exe
=
exe
,
activation_quantize_type
=
quant_type
)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
marked_nodes
=
set
()
for
op
in
main_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
main_graph
.
draw
(
'.'
,
'main'
+
dev_name
+
quant_type
,
marked_nodes
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test'
+
dev_name
+
quant_type
,
marked_nodes
)
quantized_main_program
=
main_graph
.
to_program
()
quantized_test_program
=
test_graph
.
to_program
()
iters
=
5
batch_size
=
8
#train_exe = fluid.ParallelExecutor(
# main_program=quantized_main_program,
# use_cuda=bool(use_cuda),
# loss_name=loss.name,
# scope=scope)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
with
fluid
.
scope_guard
(
scope
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
quantized_main_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
#loss_v = train_exe.run(feed=feeder.feed(data),
# fetch_list=[loss.name])
#print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data
=
next
(
test_reader
())
with
fluid
.
program_guard
(
quantized_test_program
):
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
quantized_test_program
)
# Testing
with
fluid
.
scope_guard
(
scope
):
test_loss1
,
w_quant
=
exe
.
run
(
program
=
quantized_test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
freeze_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_freeze'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
test_loss2
,
=
exe
.
run
(
program
=
server_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
#print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
#print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0'
).
get_tensor
())
# Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
#print('{}: {}'.format('w_quant' + dev_name + quant_type,
# np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass
=
ConvertToInt8Pass
(
scope
=
scope
,
place
=
place
)
convert_int8_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_int8'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program_int8
=
test_graph
.
to_program
()
# Save the 8-bit parameter and model file.
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
server_program_int8
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0.int8'
).
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
#print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
mobile_pass
=
TransformForMobilePass
()
mobile_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_mobile'
+
dev_name
+
quant_type
,
marked_nodes
)
mobile_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'mobile_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
mobile_program
)
def
test_freeze_graph_cuda_dynamic
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cpu_dynamic
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cuda_static
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
)
def
test_freeze_graph_cpu_static
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/tests/CMakeLists.txt
浏览文件 @
d79d2f68
...
...
@@ -6,5 +6,9 @@ if(APPLE OR WIN32 OR NOT WITH_MKL)
endif
()
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
if
(
src MATCHES
"test_calibration"
)
py_test
(
${
src
}
SRCS
${
src
}
.py ENVS FLAGS_use_mkldnn=true
)
else
()
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endif
()
endforeach
()
python/paddle/fluid/contrib/tests/test_calibration.py
浏览文件 @
d79d2f68
...
...
@@ -199,7 +199,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
def
run_program
(
self
,
model_path
,
generate_int8
=
False
,
algo
=
'direct'
):
image_shape
=
[
3
,
224
,
224
]
os
.
environ
[
'FLAGS_use_mkldnn'
]
=
'True'
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
...
...
@@ -241,9 +240,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
label
=
label
.
reshape
([
-
1
,
1
])
running_program
=
calibrator
.
sampling_program
.
clone
(
)
if
generate_int8
else
infer_program
.
clone
()
for
op
in
running_program
.
current_block
().
ops
:
if
op
.
has_attr
(
"use_mkldnn"
):
op
.
_set_attr
(
"use_mkldnn"
,
True
)
t1
=
time
.
time
()
_
,
acc1
,
_
=
exe
.
run
(
...
...
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
浏览文件 @
d79d2f68
...
...
@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
.
training_transpile
(
test_program
)
quant_type
=
'range_abs_max'
# 'range_abs_max' or 'abs_max'
quant_transpiler
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
quant_transpiler
.
training_transpile
(
main
,
startup
)
quant_transpiler
.
training_transpile
(
test_program
,
startup
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
d79d2f68
...
...
@@ -16,6 +16,8 @@ from __future__ import print_function
import
collections
from
collections
import
defaultdict
from
collections
import
Iterable
import
contextlib
from
.wrapped_decorator
import
signature_safe_contextmanager
import
os
import
re
...
...
@@ -1529,12 +1531,16 @@ class Block(object):
class
IrGraph
(
object
):
"""
IrGraph uses core.Graph as the delegation to accomplish the manipulation.
Python IrGraph. Beneath it is a core.Graph, which is used for
create a c++ Ir Pass Graph. An IrGraph is just a graph view of
a Program. In an IrGraph, both Variables and Operators are graph
nodes.
"""
def
__init__
(
self
,
graph
,
for_test
=
False
):
"""
Construct the IrGraph using core.Graph.
Construct an IrGraph using core.Graph.
Args:
graph(core.Graph): C++ Graph.
for_test(bool): True for the test graph and false for the train graph.
...
...
@@ -1545,23 +1551,81 @@ class IrGraph(object):
self
.
_for_test
=
for_test
def
is_test
(
self
):
"""
If the graph is used for testing, the function returns true. Otherwise, returns false.
"""
return
self
.
_for_test
def
all_parameters
(
self
):
param_nodes
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
).
persistable
():
param_nodes
.
add
(
node
)
return
param_nodes
def
all_nodes
(
self
):
"""
Return all nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()}
def
all_vars
(
self
):
"""
Return all variable nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_var
()}
def
all_persistable_vars
(
self
):
"""
Return all persistable variable nodes included in the graph as a set.
"""
persistable_nodes
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
).
persistable
():
persistable_nodes
.
add
(
node
)
return
persistable_nodes
def
all_ops
(
self
):
"""
Return all operator nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_op
()}
def
var_node
(
self
,
name
):
"""
Get a variable node by name from the graph.
Args:
name(str): the name of the variable node.
Raises:
ValueError: The If input's type is not str, or this graph
doesn't have a variable with the giving name.
Returns:
core.Node: the variable node with the giving name.
"""
if
not
isinstance
(
name
,
six
.
string_types
):
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
(
type
(
name
)))
target_var_node
=
None
var_nodes
=
self
.
all_vars
()
for
var_node
in
var_nodes
:
if
var_node
.
name
()
==
name
:
target_var_node
=
var_node
if
target_var_node
is
None
:
raise
ValueError
(
"var_node %s not in this graph"
%
name
)
return
target_var_node
def
create_param_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a persistable variable node in the graph. In IrGraph,
it can not distinguish between persistable variables and parameters.
Args:
name(str): the name of the persistable variable node.
vart_type(core.VarDesc.VarType): the type of the persistable variable node.
shape(list): the shape of the persistable variable node.
var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
Returns:
core.Node: the created persistable variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
...
...
@@ -1570,6 +1634,20 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a variable node in the graph. The created variable node is
not persistable.
Args:
name(str): the name of the variable node.
vart_type(core.VarDesc.VarType): the type of the variable node.
shape(list): the shape of the variable node.
var_dtype(core.VarDesc.VarType): the data type of the variable node.
Returns:
core.Node: the created variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
...
...
@@ -1577,19 +1655,41 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node_from_desc
(
self
,
var_desc
):
"""
Create a variable node by using an existing VarDesc in the graph.
Depend on the giving VarDesc, the created variable node may be persistable.
Args:
var_desc(core.VarDesc): the giving variable description.
Returns:
core.Node: the created variable node.
"""
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_op_node
(
self
,
op_type
,
attrs
,
inputs
,
outputs
):
"""
Create a operator node in the graph.
Args:
op_type(str): the type of the operator node.
attrs(dict): the attributes of the operator node.
inputs(dict): the inputs of the operator node.
outputs(dict): the outpus of the operator node.
Returns:
core.Node: the created operator node.
"""
op_desc
=
core
.
OpDesc
()
op_desc
.
set_type
(
op_type
)
for
attr
,
value
in
attrs
.
iteritems
(
):
for
attr
,
value
in
six
.
iteritems
(
attrs
):
self
.
_update_desc_attr
(
op_desc
,
attr
,
value
)
for
input_name
,
var_nodes
in
inputs
.
iteritems
(
):
for
input_name
,
var_nodes
in
six
.
iteritems
(
inputs
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
op_desc
.
set_input
(
input_name
,
[
var_node
.
name
()
for
var_node
in
var_nodes
])
for
output_name
,
var_nodes
in
outputs
.
iteritems
(
):
for
output_name
,
var_nodes
in
six
.
iteritems
(
outputs
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
op_desc
.
set_output
(
output_name
,
...
...
@@ -1597,11 +1697,29 @@ class IrGraph(object):
return
self
.
graph
.
create_op_node
(
op_desc
)
def
create_op_node_from_desc
(
self
,
op_desc
):
"""
Create a operator node by using an existing OpDesc in the graph.
Args:
op_desc(core.VarDesc): the giving operator description.
Returns:
core.Node: the created operator node.
"""
return
self
.
graph
.
create_op_node
(
op_desc
)
def
update_input_link
(
self
,
old_input_node
,
new_input_node
,
op_node
):
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
self
.
graph
.
nodes
()
and
\
op_node
in
self
.
graph
.
nodes
(),
'Th three arguments must be in the graph nodes.'
"""
Update the input's link of a operator node.
Args:
old_input_node(core.Node): the old input node of the giving op_node.
new_input_node(core.Node): the new input node of the giving op_node.
op_node(core.Node): the operator node that is needed to update input's link.
"""
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
\
self
.
graph
.
nodes
()
and
op_node
in
self
.
graph
.
nodes
(),
\
'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
old_input_node
.
outputs_remove
(
op_node
)
op_node
.
inputs_remove
(
old_input_node
)
new_input_node
.
outputs_append
(
op_node
)
...
...
@@ -1609,17 +1727,85 @@ class IrGraph(object):
op_node
.
op
().
_rename_input
(
old_input_node
.
name
(),
new_input_node
.
name
())
def
link_to
(
self
,
node_in
,
node_out
):
"""
Connect two nodes.
Args:
node_in(core.Node): the input node.
node_out(core.Node): the output node.
"""
assert
node_in
in
self
.
graph
.
nodes
()
and
node_out
in
self
.
graph
.
nodes
(),
\
'Th
two arguments
must be in the graph nodes.'
'Th
e two arguments(node_in&node_out)
must be in the graph nodes.'
node_in
.
outputs_append
(
node_out
)
node_out
.
inputs_append
(
node_in
)
def
safe_remove_nodes
(
self
,
remove_nodes
):
"""
Remove nodes safely since links connected to these removed nodes are
also removed.
Args:
remove_nodes(set): the nodes prepared to be removed.
"""
if
not
isinstance
(
remove_nodes
,
set
):
remove_nodes
=
set
(
remove_nodes
)
if
isinstance
(
remove_nodes
,
Iterable
):
remove_nodes
=
set
(
remove_nodes
)
else
:
remove_nodes
=
{
remove_nodes
}
core
.
graph_safe_remove_nodes
(
self
.
graph
,
remove_nodes
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
):
def
has_circle
(
self
):
"""
Check if the graph has a circle.
Returns:
bool: True if the graph has a circle else False.
"""
return
core
.
has_circle
(
self
.
graph
)
def
graph_num
(
self
):
"""
Count the number of unconnected graphs in this graph.
Returns:
int: the number of unconnected graphs.
"""
return
core
.
graph_num
(
self
.
graph
)
def
topology_sort
(
self
):
"""
Perform the topology sort operation on the graph.
Notes: the `graph` cannot contain a circle.
Returns:
set(core.Node): nodes in topology order.
"""
return
core
.
topology_sort
(
self
.
graph
)
def
build_adjacency_list
(
self
):
"""
Build an adjacency list of operations for the `graph`.
Returns:
dict{core.Node: set(core.Node)}: the adjacency list.
"""
return
core
.
build_adjacency_list
(
self
.
graph
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
,
remove_ctr_var
=
True
):
"""
Draw the graph. If `dot` command is installed, the drawn graph
will be saved as pdf file type, otherwise dot file type is used.
Args:
save_path(str): the save path of drawn graph.
name(str): the name of drawn graph.
marked_nodes(set(core.Node)): nodes that are needed to be marked.
Default value is None.
remove_ctr_var(bool): If it is set True, all control variable nodes
in the graph will be removed. Default value is True.
"""
def
_convert_to_pdf
(
dot_file_path
):
pdf_save_path
=
os
.
path
.
splitext
(
dot_file_path
)[
0
]
+
'.pdf'
exited_code
=
subprocess
.
call
(
'dot -Tpdf '
+
dot_file_path
\
...
...
@@ -1629,15 +1815,17 @@ class IrGraph(object):
print
(
'The {} is saved as the dot filetype.'
.
format
(
dot_file_path
))
remove_ctr_vars
=
set
()
if
remove_ctr_var
:
remove_ctr_vars
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_ctrl_var
():
remove_ctr_vars
.
add
(
node
)
self
.
safe_remove_nodes
(
remove_ctr_vars
)
ops_num
=
0
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_ctrl_var
():
remove_ctr_vars
.
add
(
node
)
elif
node
.
is_op
():
if
node
.
is_op
():
ops_num
+=
1
print
(
'Total ops num = {}.'
.
format
(
ops_num
))
self
.
safe_remove_nodes
(
remove_ctr_vars
)
if
marked_nodes
is
not
None
:
if
not
isinstance
(
marked_nodes
,
set
):
marked_nodes
=
set
(
marked_nodes
)
...
...
@@ -1652,10 +1840,20 @@ class IrGraph(object):
_convert_to_pdf
(
viz_dot_path
)
def
to_program
(
self
):
"""
Convert the graph into a Program.
Notes: When the graph includes backward operator nodes, the
conversion process may be failed. Usually, this function is
only used to convert a test graph.
Returns:
Program: a program converted from the graph.
"""
convert_pass
=
core
.
get_pass
(
'graph_to_program_pass'
)
convert_pass
.
set
(
'program'
,
Program
().
desc
)
desc
=
core
.
ProgramDesc
()
convert_pass
.
set_not_owned
(
'program'
,
desc
)
convert_pass
.
apply
(
self
.
graph
)
desc
=
convert_pass
.
get_program
(
'program'
)
program
=
Program
.
_construct_from_desc
(
desc
)
return
program
...
...
python/paddle/fluid/imperative/layers.py
浏览文件 @
d79d2f68
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
import
contextlib
import
sys
import
numpy
as
np
...
...
@@ -30,31 +31,45 @@ class Layer(core.Layer):
def
__init__
(
self
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
name
=
None
):
self
.
_built
=
False
self
.
_dtype
=
dtype
self
.
_parameters
=
collections
.
OrderedDict
()
self
.
_sub_layers
=
collections
.
OrderedDict
()
def
parameters
(
self
,
include_sublayers
=
True
):
"""Returns a list of Parameters from current and sub-layers.
Args:
include_sublayers: If true, also include the parameters from
sublayers.
Returns a list of Parameters.
"""
ret
=
[
p
for
p
in
self
.
_parameters
.
values
()]
if
include_sublayers
:
for
l
in
self
.
_sub_layers
.
values
():
for
p
in
l
.
parameters
(
include_sublayers
):
ret
.
append
(
p
)
return
ret
def
sublayers
(
self
,
include_sublayers
=
True
):
"""Returns a list of sub layers.
def
parameters
(
self
):
params
=
[]
for
key
in
self
.
__dict__
.
keys
():
value
=
self
.
__dict__
[
key
]
if
isinstance
(
value
,
framework
.
Parameter
):
params
.
append
(
value
)
elif
isinstance
(
value
,
core
.
Layer
):
params
.
extend
(
value
.
parameters
())
elif
isinstance
(
value
,
collections
.
Container
):
if
len
(
value
)
==
0
:
continue
if
isinstance
(
value
[
0
],
framework
.
Parameter
):
params
.
extend
(
value
)
elif
isinstance
(
value
[
0
],
core
.
Layer
):
for
v
in
value
:
params
.
extend
(
v
.
parameters
())
return
params
Args:
include_sublayers: If true, also include the layers from sublayers.
Returns a list of sub layers.
"""
ret
=
[
l
for
l
in
self
.
_sub_layers
.
values
()]
if
include_sublayers
:
for
l
in
self
.
_sub_layers
.
values
():
for
sub_l
in
l
.
sublayers
(
include_sublayers
):
ret
.
append
(
sub_l
)
return
ret
def
clear_gradients
(
self
):
for
p
in
self
.
parameters
():
p
.
_clear_gradient
()
def
_build_once
(
self
,
input
s
):
def
_build_once
(
self
,
*
arg
s
):
pass
def
__call__
(
self
,
*
inputs
):
...
...
@@ -71,6 +86,66 @@ class Layer(core.Layer):
def
backward
(
self
,
*
inputs
):
raise
ValueError
(
"Layer shouldn't implement backward"
)
def
add_sublayer
(
self
,
name
,
sublayer
):
"""Adds a sub Layer instance.
Added sublayer can be access like self.name.
Args:
name: name of this sublayer.
sublayer: an instance of Layer.
Returns:
the sublayer passed in.
"""
assert
isinstance
(
sublayer
,
core
.
Layer
)
self
.
_sub_layers
[
name
]
=
sublayer
return
sublayer
def
add_parameter
(
self
,
name
,
parameter
):
"""Adds a Parameter instance.
Added parameter can be access like self.name.
Args:
name: name of this sublayer.
parameter: an instance of Parameter.
Returns:
the parameter passed in.
"""
assert
isinstance
(
parameter
,
framework
.
Parameter
)
self
.
_parameters
[
name
]
=
parameter
return
parameter
def
__getattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
return
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
return
self
.
_sub_layers
[
name
]
def
__setattr__
(
self
,
name
,
value
):
if
isinstance
(
value
,
framework
.
Parameter
):
params
=
self
.
__dict__
.
get
(
'_parameters'
,
None
)
if
params
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
params
[
name
]
=
value
elif
isinstance
(
value
,
core
.
Layer
):
layers
=
self
.
__dict__
.
get
(
'_sub_layers'
,
None
)
if
layers
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
layers
[
name
]
=
value
else
:
object
.
__setattr__
(
self
,
name
,
value
)
def
__delattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
del
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
del
self
.
_sub_layers
[
name
]
else
:
object
.
__delattr__
(
self
,
name
)
class
PyLayer
(
core
.
PyLayer
):
"""Layers composed of user-defined python codes."""
...
...
python/paddle/fluid/imperative/nn.py
浏览文件 @
d79d2f68
...
...
@@ -225,9 +225,6 @@ class FC(layers.Layer):
act
=
act
,
name
=
name
)
def
parameters
(
self
):
return
[
self
.
_w
,
self
.
_b
]
def
_build_once
(
self
,
input
):
input_shape
=
input
.
shape
param_shape
=
[
...
...
@@ -478,9 +475,6 @@ class Embedding(layers.Layer):
dtype
=
self
.
_dtype
,
is_bias
=
False
)
def
parameters
(
self
):
return
[
self
.
_w
]
def
forward
(
self
,
input
):
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
d79d2f68
...
...
@@ -3236,7 +3236,7 @@ def group_norm(input,
# create output
mean_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
group_norm_out
=
helper
.
create_variable
(
dtype
)
group_norm_out
=
helper
.
create_variable
(
dtype
=
dtype
)
helper
.
append_op
(
type
=
"group_norm"
,
...
...
@@ -5936,13 +5936,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
than :attr:`shape`.
act (str): The non-linear activation to be applied to the reshaped tensor
variable.
inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
operators. If this flag is set :attr:`True`, reuse input
:attr:`x` to reshape, which will change the shape of
tensor variable :attr:`x` and might cause errors when
:attr:`x` is used in multiple operators. If :attr:`False`,
preserve the shape :attr:`x` and create a new output tensor
variable whose data is copied from input x but reshaped.
inplace(bool): If ``inplace`` is `True`, the input and output of ``layers.reshape``
are the same variable, otherwise, the input and output of
``layers.reshape`` are different variables. Note that if :attr:`x`
is more than one layer's input, ``inplace`` must be :attr:`False`.
name (str): The name of this layer. It is optional.
Returns:
...
...
@@ -8335,6 +8332,46 @@ def stack(x, axis=0):
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` is None, it would be replaced with 0.
For Example:
.. code-block:: text
Case 1:
Input:
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 0
Output:
Out.data =[ [ [1.0, 2.0] ],
[ [3.0, 4.0] ],
[ [5.0, 6.0] ] ]
Out.dims = [3, 1, 2]
Case 2:
Given
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 1 or axis = -2
Output:
Out.data =[ [ [1.0, 2.0]
[3.0, 4.0]
[5.0, 6.0] ] ]
Out.dims = [1, 3, 2]
Args:
x (Variable|list(Variable)|tuple(Variable)): Input variables.
axis (int|None): The axis along which all inputs are stacked.
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
d79d2f68
...
...
@@ -567,7 +567,7 @@ def ones(shape, dtype, force_cpu=False):
It also sets *stop_gradient* to True.
Args:
shape(tuple|list
|None
): Shape of output tensor
shape(tuple|list): Shape of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
Returns:
...
...
@@ -578,6 +578,10 @@ def ones(shape, dtype, force_cpu=False):
data = fluid.layers.ones(shape=[1], dtype='int64')
"""
assert
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
),
"The shape's type should be list or tuple."
assert
reduce
(
lambda
x
,
y
:
x
*
y
,
shape
)
>
0
,
"The shape is invalid: %s."
%
(
str
(
shape
))
return
fill_constant
(
value
=
1.0
,
**
locals
())
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
d79d2f68
...
...
@@ -148,6 +148,8 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if
build_strategy
.
memory_optimize
is
None
:
build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
build_strategy
.
enable_inplace
is
None
:
build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
d79d2f68
...
...
@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
...
...
@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
if
(
NOT APPLE
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
d79d2f68
...
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
...
...
python/paddle/fluid/tests/unittests/test_base_layer.py
0 → 100644
浏览文件 @
d79d2f68
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.layer_helper
import
LayerHelper
class
L1
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L1
,
self
).
__init__
()
self
.
_helper
=
LayerHelper
(
'MyLayer'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
w1
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
w2
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
def
forward
(
self
):
return
self
.
w1
+
self
.
w2
class
L2
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L2
,
self
).
__init__
()
self
.
layer1
=
L1
()
self
.
layer2
=
L1
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
L3
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L3
,
self
).
__init__
()
self
.
layer1
=
L2
()
self
.
layer2
=
L2
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
TestBaseLayer
(
unittest
.
TestCase
):
def
test_one_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L1
()
ret
=
l
()
self
.
assertEqual
(
l
.
w1
.
name
,
"MyLayer_0.w_0"
)
self
.
assertEqual
(
l
.
w2
.
name
,
"MyLayer_0.w_1"
)
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.2
*
np
.
ones
([
2
,
2
])))
def
test_three_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L3
()
ret
=
l
()
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.8
*
np
.
ones
([
2
,
2
])))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
d79d2f68
...
...
@@ -22,6 +22,9 @@ import six
import
unittest
import
numpy
as
np
import
gc
gc
.
set_debug
(
gc
.
DEBUG_COLLECTABLE
)
import
paddle.fluid
as
fluid
...
...
@@ -99,6 +102,12 @@ class TranspilerTest(unittest.TestCase):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
self
.
transpiler_test_impl
()
# NOTE: run gc.collect to eliminate pybind side objects to
# prevent random double-deallocate when inherited in python.
del
self
.
transpiler
del
main
del
startup
gc
.
collect
()
class
TestBasicModel
(
TranspilerTest
):
...
...
@@ -797,6 +806,7 @@ class TestNCCL2Transpile(TranspilerTest):
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
gc
.
collect
()
else
:
pass
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
d79d2f68
...
...
@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-6
))
return
optimizer
# NOTE(dzh):
# need to make it compatible with elewise fuse act
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
...
...
@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
...
...
@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
浏览文件 @
d79d2f68
...
...
@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
params
=
mlp
.
parameters
(
True
)
self
.
assertEqual
(
"FC_0.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"FC_0.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"FC_1.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"FC_1.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
len
(
params
),
4
)
sublayers
=
mlp
.
sublayers
(
True
)
self
.
assertEqual
(
mlp
.
_fc1
,
sublayers
[
0
])
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
...
...
python/paddle/fluid/tests/unittests/test_imperative_gan.py
浏览文件 @
d79d2f68
...
...
@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer):
self
.
_fc1
=
FC
(
size
=
32
,
act
=
'elu'
,
name
=
"d_fc1"
)
self
.
_fc2
=
FC
(
size
=
1
,
name
=
"d_fc2"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
()
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
return
self
.
_fc2
(
x
)
...
...
@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer):
self
.
_fc2
=
FC
(
size
=
64
,
act
=
'elu'
,
name
=
"g_fc2"
)
self
.
_fc3
=
FC
(
size
=
1
,
name
=
"g_fc3"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
(
)
+
self
.
_fc3
.
parameters
()
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc2
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
d79d2f68
...
...
@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self
.
hidden_array
.
append
(
pre_hidden
)
self
.
cell_array
.
append
(
pre_cell
)
def
parameters
(
self
):
parameters
=
list
()
for
param
in
self
.
weight_1_arr
:
parameters
.
append
(
param
)
for
param
in
self
.
weight_2_arr
:
parameters
.
append
(
param
)
for
bias
in
self
.
bias_arr
:
parameters
.
append
(
bias
)
return
parameters
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
...
...
@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer):
def
_build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
parameters
(
self
):
parameters
=
self
.
simple_lstm_rnn
.
parameters
()
+
[
self
.
softmax_weight
,
self
.
softmax_bias
]
+
self
.
embedding
.
parameters
()
return
parameters
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
...
...
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
浏览文件 @
d79d2f68
...
...
@@ -21,7 +21,6 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.imperative.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.imperative.base
import
to_variable
from
test_imperative_base
import
new_program_scope
...
...
@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer):
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
bottleneck_block
=
BottleneckBlock
(
num_channels
=
num_channels
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
num_channels
=
num_channels
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
))
num_channels
=
bottleneck_block
.
_num_channels_out
self
.
bottleneck_block_list
.
append
(
bottleneck_block
)
shortcut
=
True
...
...
@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size
=
batch_size
)
dy_param_init_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
...
...
@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase):
dy_out
=
avg_loss
.
_numpy
()
if
batch_id
==
0
:
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
if
param
.
name
not
in
dy_param_init_value
:
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
dy_grad_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
if
not
param
.
stop_gradient
:
np_array
=
np
.
array
(
param
.
_ivar
.
_grad_ivar
().
value
()
.
get_tensor
())
...
...
@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase):
resnet
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
with
new_program_scope
():
...
...
@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
self
.
assertEqual
(
len
(
dy_param_init_value
),
len
(
static_param_init_value
))
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
self
.
assertTrue
(
np
.
isfinite
(
value
.
all
()))
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
0 → 100644
浏览文件 @
d79d2f68
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
from
test_parallel_executor_transformer
import
transformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class
TestTransformerWithIR
(
TestTransformer
):
def
test_main
(
self
):
if
core
.
is_compiled_with_cuda
():
# check python transpiler
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
True
,
use_ir_memory_optimize
=
False
)
# check IR memory optimize
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录