Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c2feab7f
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c2feab7f
编写于
10月 23, 2017
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into sparse_vector
上级
36ebf00f
abce9eb7
变更
61
隐藏空白更改
内联
并排
Showing
61 changed file
with
1846 addition
and
1079 deletion
+1846
-1079
Dockerfile
Dockerfile
+1
-1
doc/design/block.md
doc/design/block.md
+1
-1
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+3
-3
paddle/framework/backward.cc
paddle/framework/backward.cc
+14
-2
paddle/framework/block_desc.cc
paddle/framework/block_desc.cc
+14
-1
paddle/framework/block_desc.h
paddle/framework/block_desc.h
+5
-0
paddle/framework/framework.proto
paddle/framework/framework.proto
+1
-0
paddle/framework/lod_tensor.cc
paddle/framework/lod_tensor.cc
+39
-30
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+19
-6
paddle/framework/lod_tensor_test.cc
paddle/framework/lod_tensor_test.cc
+15
-16
paddle/framework/op_proto_maker.h
paddle/framework/op_proto_maker.h
+5
-0
paddle/framework/operator.cc
paddle/framework/operator.cc
+15
-0
paddle/framework/operator.h
paddle/framework/operator.h
+36
-10
paddle/framework/program_desc.cc
paddle/framework/program_desc.cc
+2
-2
paddle/framework/program_desc.h
paddle/framework/program_desc.h
+1
-0
paddle/framework/proto_desc.h
paddle/framework/proto_desc.h
+26
-0
paddle/gserver/activations/MKLDNNActivation.cpp
paddle/gserver/activations/MKLDNNActivation.cpp
+3
-3
paddle/gserver/layers/MKLDNNBase.h
paddle/gserver/layers/MKLDNNBase.h
+2
-2
paddle/gserver/layers/MKLDNNConvLayer.cpp
paddle/gserver/layers/MKLDNNConvLayer.cpp
+34
-202
paddle/gserver/layers/MKLDNNConvLayer.h
paddle/gserver/layers/MKLDNNConvLayer.h
+0
-66
paddle/gserver/layers/MKLDNNFcLayer.cpp
paddle/gserver/layers/MKLDNNFcLayer.cpp
+25
-81
paddle/gserver/layers/MKLDNNFcLayer.h
paddle/gserver/layers/MKLDNNFcLayer.h
+0
-8
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+333
-0
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+142
-199
paddle/gserver/layers/MKLDNNPoolLayer.cpp
paddle/gserver/layers/MKLDNNPoolLayer.cpp
+13
-90
paddle/gserver/layers/MKLDNNPoolLayer.h
paddle/gserver/layers/MKLDNNPoolLayer.h
+0
-13
paddle/gserver/tests/MKLDNNTester.cpp
paddle/gserver/tests/MKLDNNTester.cpp
+24
-20
paddle/gserver/tests/MKLDNNTester.h
paddle/gserver/tests/MKLDNNTester.h
+2
-6
paddle/gserver/tests/mkldnn_branches_fc.conf
paddle/gserver/tests/mkldnn_branches_fc.conf
+58
-0
paddle/gserver/tests/mkldnn_branches_pool.conf
paddle/gserver/tests/mkldnn_branches_pool.conf
+60
-0
paddle/gserver/tests/test_MKLDNN.cpp
paddle/gserver/tests/test_MKLDNN.cpp
+1
-1
paddle/math/MKLDNNMatrix.cpp
paddle/math/MKLDNNMatrix.cpp
+4
-4
paddle/math/MKLDNNMatrix.h
paddle/math/MKLDNNMatrix.h
+16
-3
paddle/operators/clip_op.cc
paddle/operators/clip_op.cc
+2
-2
paddle/operators/dynamic_recurrent_op.cc
paddle/operators/dynamic_recurrent_op.cc
+194
-115
paddle/operators/dynamic_recurrent_op.h
paddle/operators/dynamic_recurrent_op.h
+108
-57
paddle/operators/dynamic_recurrent_op_test.cc
paddle/operators/dynamic_recurrent_op_test.cc
+22
-26
paddle/operators/gaussian_random_op.cc
paddle/operators/gaussian_random_op.cc
+1
-1
paddle/operators/momentum_op.cc
paddle/operators/momentum_op.cc
+7
-2
paddle/operators/momentum_op.h
paddle/operators/momentum_op.h
+8
-1
paddle/operators/recurrent_op.cc
paddle/operators/recurrent_op.cc
+13
-13
paddle/operators/reduce_op.cc
paddle/operators/reduce_op.cc
+63
-0
paddle/operators/rnn/recurrent_op_utils.cc
paddle/operators/rnn/recurrent_op_utils.cc
+11
-11
paddle/operators/rnn/recurrent_op_utils.h
paddle/operators/rnn/recurrent_op_utils.h
+6
-6
paddle/operators/uniform_random_op.cc
paddle/operators/uniform_random_op.cc
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+7
-5
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+7
-0
paddle/trainer/tests/sample_trainer_config_branch_net.conf
paddle/trainer/tests/sample_trainer_config_branch_net.conf
+13
-13
paddle/trainer/tests/sample_trainer_config_simple_net.conf
paddle/trainer/tests/sample_trainer_config_simple_net.conf
+1
-1
python/paddle/v2/framework/nets.py
python/paddle/v2/framework/nets.py
+6
-3
python/paddle/v2/framework/optimizer.py
python/paddle/v2/framework/optimizer.py
+58
-1
python/paddle/v2/framework/tests/test_cross_entropy_op.py
python/paddle/v2/framework/tests/test_cross_entropy_op.py
+1
-1
python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py
...on/paddle/v2/framework/tests/test_dynamic_recurrent_op.py
+94
-37
python/paddle/v2/framework/tests/test_momentum_op.py
python/paddle/v2/framework/tests/test_momentum_op.py
+43
-2
python/paddle/v2/framework/tests/test_op_support_gpu.py
python/paddle/v2/framework/tests/test_op_support_gpu.py
+11
-0
python/paddle/v2/framework/tests/test_optimizer.py
python/paddle/v2/framework/tests/test_optimizer.py
+41
-0
python/paddle/v2/framework/tests/test_recognize_digits_conv.py
...n/paddle/v2/framework/tests/test_recognize_digits_conv.py
+92
-0
python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
...on/paddle/v2/framework/tests/test_recognize_digits_mlp.py
+83
-0
python/paddle/v2/framework/tests/test_recurrent_op.py
python/paddle/v2/framework/tests/test_recurrent_op.py
+10
-10
python/paddle/v2/framework/tests/test_reduce_op.py
python/paddle/v2/framework/tests/test_reduce_op.py
+28
-0
python/paddle/v2/framework/tests/test_rmsprop_op.py
python/paddle/v2/framework/tests/test_rmsprop_op.py
+1
-1
未找到文件。
Dockerfile
浏览文件 @
c2feab7f
...
...
@@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN
apt-get update
&&
\
apt-get
install
-y
\
git python-pip python-dev openssh-server bison
\
git python-pip python-dev openssh-server bison
libnccl-dev
\
wget unzip unrar
tar
xz-utils bzip2
gzip
coreutils ntp
\
curl
sed grep
graphviz libjpeg-dev zlib1g-dev
\
python-matplotlib gcc-4.8 g++-4.8
\
...
...
doc/design/block.md
浏览文件 @
c2feab7f
...
...
@@ -189,7 +189,7 @@ OpDesc {
inputs = {0} // the index of x in vars of BlockDesc above
outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above
attrs {
"
memori
es" : {1} // the index of h
"
stat
es" : {1} // the index of h
"step_net" : <above step net>
}
};
...
...
paddle/framework/CMakeLists.txt
浏览文件 @
c2feab7f
...
...
@@ -19,15 +19,15 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library
(
framework_proto SRCS framework.proto
)
cc_library
(
attribute SRCS attribute.cc DEPS framework_proto
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info
)
cc_test
(
program_desc_test SRCS program_desc_test.cc DEPS proto_desc
)
cc_library
(
op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute
)
cc_test
(
op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker
)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope
proto_desc
glog
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info operator
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog
proto_desc
)
cc_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
py_proto_compile
(
framework_py_proto SRCS framework.proto
)
...
...
paddle/framework/backward.cc
浏览文件 @
c2feab7f
...
...
@@ -21,6 +21,7 @@
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h"
...
...
@@ -220,8 +221,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// process recurrent gradient op as a special operator.
if
(
forwardOp
.
Type
()
==
"recurrent"
)
{
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or
// this will result in infinite loop.
// or this will result in infinite loop.
const
auto
&
rnnop
=
*
static_cast
<
const
operators
::
RecurrentOp
*>
(
&
forwardOp
);
auto
rnn_grad_op
=
...
...
@@ -231,6 +231,18 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// create stepnet's gradient op
rnn_grad_op
->
set_stepnet
(
BackwardRecursive
(
stepnet_op
,
no_grad_names
,
grad_to_var
,
uniq_id
));
}
else
if
(
forwardOp
.
Type
()
==
"dynamic_recurrent"
)
{
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or this will result in infinite loop.
const
auto
&
rnnop
=
*
static_cast
<
const
operators
::
DynamicRecurrentOp
*>
(
&
forwardOp
);
auto
rnn_grad_op
=
static_cast
<
operators
::
DynamicRecurrentGradientOp
*>
(
grad_op
.
get
());
const
auto
&
stepnet_op
=
*
static_cast
<
const
OperatorBase
*>
(
&
rnnop
.
rnn
.
GetStepUnit
());
// create stepnet's gradient op
rnn_grad_op
->
rnn
.
SetStepUnit
(
BackwardRecursive
(
stepnet_op
,
no_grad_names
,
grad_to_var
,
uniq_id
));
}
if
(
net
->
ops_
.
empty
())
{
// Current no aux op is added to network
...
...
paddle/framework/block_desc.cc
浏览文件 @
c2feab7f
...
...
@@ -41,6 +41,19 @@ bool BlockDescBind::HasVar(const std::string &name) const {
return
vars_
.
find
(
name
)
!=
vars_
.
end
();
}
VarDescBind
*
BlockDescBind
::
FindVarRecursive
(
const
std
::
string
&
name
)
const
{
auto
it
=
vars_
.
find
(
name
);
if
(
it
==
vars_
.
end
())
{
return
Parent
()
==
kNoneBlockIndex
?
nullptr
:
ParentBlock
()
->
FindVarRecursive
(
name
);
}
return
it
->
second
.
get
();
}
bool
BlockDescBind
::
HasVarRecursive
(
const
std
::
string
&
name
)
const
{
return
FindVarRecursive
(
name
)
!=
nullptr
;
}
std
::
vector
<
VarDescBind
*>
BlockDescBind
::
AllVars
()
const
{
std
::
vector
<
VarDescBind
*>
res
;
for
(
const
auto
&
p
:
vars_
)
{
...
...
@@ -97,7 +110,7 @@ void BlockDescBind::Flush() {
}
BlockDescBind
*
BlockDescBind
::
ParentBlock
()
const
{
if
(
this
->
desc_
->
parent_idx
()
==
-
1
)
{
if
(
this
->
desc_
->
parent_idx
()
==
kNoneBlockIndex
)
{
return
nullptr
;
}
return
prog_
->
Block
(
static_cast
<
size_t
>
(
this
->
desc_
->
parent_idx
()));
...
...
paddle/framework/block_desc.h
浏览文件 @
c2feab7f
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/op_desc.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/platform/macros.h"
...
...
@@ -56,6 +57,10 @@ class BlockDescBind {
bool
HasVar
(
const
std
::
string
&
var_name
)
const
;
VarDescBind
*
FindVarRecursive
(
const
std
::
string
&
name_bytes
)
const
;
bool
HasVarRecursive
(
const
std
::
string
&
var_name
)
const
;
std
::
set
<
std
::
string
>
LocalVarNames
()
const
{
std
::
set
<
std
::
string
>
var_names
;
for
(
auto
&
var
:
vars_
)
{
...
...
paddle/framework/framework.proto
浏览文件 @
c2feab7f
...
...
@@ -68,6 +68,7 @@ message OpProto {
optional
bool
duplicable
=
3
[
default
=
false
];
optional
bool
intermediate
=
4
[
default
=
false
];
optional
bool
dispensable
=
5
[
default
=
false
];
}
// AttrProto describes the C++ type Attribute.
...
...
paddle/framework/lod_tensor.cc
浏览文件 @
c2feab7f
...
...
@@ -25,31 +25,50 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
for
(
size_t
i
=
level_begin
;
i
<
level_end
;
i
++
)
{
new_lod
.
emplace_back
(
in
.
at
(
i
));
}
// transform the lowest level to absolute offset.
LoD
abs_offset_lod
=
ToAbsOffset
(
in
);
new_lod
.
back
()
=
abs_offset_lod
[
level_end
-
1
];
return
new_lod
;
}
LoD
SliceInLevel
(
const
LoD
&
in
,
size_t
level
,
size_t
elem_begin
,
size_t
elem_end
)
{
// slice the lod.
LoD
new_lod
;
new_lod
.
reserve
(
in
.
size
()
-
level
);
auto
start
=
in
.
at
(
level
)[
elem_begin
];
auto
end
=
in
.
at
(
level
)[
elem_end
];
for
(
auto
it
=
in
.
begin
()
+
level
;
it
!=
in
.
end
();
it
++
)
{
auto
it_begin
=
std
::
find
(
it
->
begin
(),
it
->
end
(),
start
);
auto
it_end
=
std
::
find
(
it_begin
,
it
->
end
(),
end
);
PADDLE_ENFORCE
(
it_begin
!=
it
->
end
(),
"error in parsing lod info"
);
PADDLE_ENFORCE
(
it_end
!=
it
->
end
(),
"error in parsing lod info"
);
new_lod
.
emplace_back
(
it_begin
,
it_end
+
1
);
// reset offset if tensor is copyed and sliced.
std
::
transform
(
new_lod
.
back
().
begin
(),
new_lod
.
back
().
end
(),
new_lod
.
back
().
begin
(),
[
start
](
int
v
)
{
return
v
-
start
;
});
PADDLE_ENFORCE_EQ
(
new_lod
.
back
().
front
(),
0
,
"error in slice LoD"
);
PADDLE_ENFORCE_LT
(
level
,
in
.
size
());
PADDLE_ENFORCE_LT
(
elem_end
,
in
[
level
].
size
());
LoD
res
;
res
.
resize
(
in
.
size
()
-
level
);
// copy the first level
res
[
0
].
assign
(
in
[
level
].
begin
()
+
elem_begin
,
in
[
level
].
begin
()
+
elem_end
+
1
);
for
(
size_t
lvl
=
1
;
lvl
<
res
.
size
();
lvl
++
)
{
const
auto
&
in_level
=
in
[
level
+
lvl
];
const
auto
&
above_level
=
res
[
lvl
-
1
];
auto
&
out_level
=
res
[
lvl
];
out_level
.
assign
(
in_level
.
begin
()
+
above_level
.
front
(),
in_level
.
begin
()
+
above_level
.
back
()
+
1
);
}
PADDLE_ENFORCE_LE
(
new_lod
.
size
(),
in
.
size
());
return
new_lod
;
for
(
size_t
lvl
=
0
;
lvl
<
res
.
size
();
lvl
++
)
{
// to make the first offset equals 0, all the elements minus the first
// element
size_t
front
=
res
[
lvl
].
front
();
for
(
auto
&
ele
:
res
[
lvl
])
{
ele
-=
front
;
}
}
return
res
;
}
LoD
ToAbsOffset
(
const
LoD
&
in
)
{
// the lowest level stores relative offsets
if
(
in
.
empty
()
||
in
.
size
()
==
1
)
return
in
;
LoD
result
=
in
;
for
(
int
level
=
result
.
size
()
-
2
;
level
>=
0
;
level
--
)
{
for
(
auto
&
ele
:
result
[
level
])
{
ele
=
result
[
level
+
1
][
ele
];
}
}
return
result
;
}
bool
operator
==
(
const
LoD
&
a
,
const
LoD
&
b
)
{
...
...
@@ -75,17 +94,7 @@ bool operator==(const LoD& a, const LoD& b) {
size_t
LoDTensor
::
NumElements
(
size_t
level
,
size_t
idx
)
const
{
PADDLE_ENFORCE_LT
(
level
,
NumLevels
());
PADDLE_ENFORCE_LT
(
idx
,
NumElements
(
level
));
// the last level of LoD, just return number of records in Tensor
if
(
level
==
NumLevels
()
-
1
)
{
return
lod_
[
level
][
idx
+
1
]
-
lod_
[
level
][
idx
];
}
// high level of LoD, and there is another lower level, return number of
// lower-level elements
auto
tmp
=
SliceInLevel
(
lod_
,
level
,
idx
,
idx
+
1
);
PADDLE_ENFORCE_GE
(
tmp
.
size
(),
2
);
// there is a 0 as a placeholder stored in LoD, so the number of elements
// equals lod.size() - 1
return
tmp
[
1
].
size
()
-
1
;
return
lod_
[
level
][
idx
+
1
]
-
lod_
[
level
][
idx
];
}
void
LoDTensor
::
ShrinkLevels
(
size_t
level_begin
,
size_t
level_end
)
{
...
...
paddle/framework/lod_tensor.h
浏览文件 @
c2feab7f
...
...
@@ -39,23 +39,36 @@ using Vector = thrust::host_vector<
#endif
/*
*
3-level LoD stores
*
LoD is short for Level of Details.
*
* 0 10 20
* 0 5 10 15 20
* 0 2 5 7 10 12 15 20
*
* - in a level, each element indicates offset in the underlying Tensor
* - in a level, each element indicates relative offset of the lower level
* - the first element should be 0 and that indicates that this sequence start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using
LoD
=
std
::
vector
<
Vector
<
size_t
>>
;
/*
* Slice levels from a LoD.
* NOTE the lowest level should always be the absolute offsets of the underlying
* tensor instances. So if higher layers are sliced without the lowest level,
* the lower level of the sliced LoD will be transformed to the absolute offset.
*/
LoD
SliceLevels
(
const
LoD
&
in
,
size_t
level_begin
,
size_t
level_end
);
LoD
SliceInLevel
(
const
LoD
&
in
,
size_t
level
,
size_t
elem_begin
,
size_t
elem_end
);
/*
* Transform an LoD from relative offsets to absolute offsets.
*/
LoD
ToAbsOffset
(
const
LoD
&
in
);
bool
operator
==
(
const
LoD
&
a
,
const
LoD
&
b
);
...
...
paddle/framework/lod_tensor_test.cc
浏览文件 @
c2feab7f
...
...
@@ -30,8 +30,8 @@ class LoDTensorTester : public ::testing::Test {
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
LoD
lod
;
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
10
,
20
});
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
5
,
10
,
15
,
20
});
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
2
,
3
});
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
2
,
5
,
8
});
lod
.
push_back
(
std
::
vector
<
size_t
>
{
0
,
2
,
5
,
7
,
10
,
12
,
15
,
17
,
20
});
ASSERT_EQ
(
lod
.
size
(),
3UL
);
...
...
@@ -52,14 +52,14 @@ TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); }
TEST_F
(
LoDTensorTester
,
NumElements
)
{
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
0
),
2UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
1
),
4
UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
1
),
3
UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
2
),
8UL
);
}
TEST_F
(
LoDTensorTester
,
NumElements2
)
{
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
0
,
0
),
2UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
0
,
1
),
2
UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
1
,
1
),
2
UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
0
,
1
),
1
UL
);
ASSERT_EQ
(
lod_tensor_
.
NumElements
(
1
,
1
),
3
UL
);
}
TEST_F
(
LoDTensorTester
,
ShrinkLevels
)
{
...
...
@@ -68,17 +68,16 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
LoDTensor
new_lod_tensor
=
lod_tensor_
;
new_lod_tensor
.
ShrinkLevels
(
level
,
level
+
1
);
ASSERT_EQ
(
new_lod_tensor
.
NumLevels
(),
1UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
lod_tensor_
.
NumElements
(
level
));
ASSERT_EQ
(
new_lod_tensor
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
}
// shrink 2 level
for
(
size_t
level
=
0
;
level
<
2UL
;
++
level
)
{
LoDTensor
new_lod_tensor
=
lod_tensor_
;
new_lod_tensor
.
ShrinkLevels
(
level
,
level
+
2
);
// the lowest level's last element should be the tensor's batch_size.
ASSERT_EQ
(
new_lod_tensor
.
lod
().
back
().
back
(),
lod_tensor_
.
lod
().
back
().
back
());
ASSERT_EQ
(
new_lod_tensor
.
NumLevels
(),
2UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
lod_tensor_
.
NumElements
(
level
));
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
1
),
lod_tensor_
.
NumElements
(
level
+
1
));
ASSERT_EQ
(
new_lod_tensor
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
}
}
...
...
@@ -86,19 +85,19 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
TEST_F
(
LoDTensorTester
,
ShrinkInLevel
)
{
size_t
level
=
0
;
LoDTensor
new_lod_tensor
=
lod_tensor_
;
new_lod_tensor
.
ShrinkInLevel
(
level
,
0
,
2
);
new_lod_tensor
.
ShrinkInLevel
(
level
,
0
,
1
);
EXPECT_EQ
(
new_lod_tensor
.
NumLevels
(),
3UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
2
UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
1
),
4
UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
2
),
8
UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
1
UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
1
),
2
UL
);
EXPECT_EQ
(
new_lod_tensor
.
NumElements
(
2
),
5
UL
);
ASSERT_EQ
(
new_lod_tensor
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
level
=
1
;
new_lod_tensor
=
lod_tensor_
;
new_lod_tensor
.
ShrinkInLevel
(
level
,
0
,
2
);
new_lod_tensor
.
ShrinkInLevel
(
level
,
1
,
2
);
ASSERT_EQ
(
new_lod_tensor
.
NumLevels
(),
2UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
2
UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
1
),
4
UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
0
),
1
UL
);
ASSERT_EQ
(
new_lod_tensor
.
NumElements
(
1
),
3
UL
);
ASSERT_EQ
(
new_lod_tensor
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
}
...
...
paddle/framework/op_proto_maker.h
浏览文件 @
c2feab7f
...
...
@@ -44,6 +44,11 @@ class OpProtoAndCheckerMaker {
var_
->
set_intermediate
(
true
);
return
*
this
;
}
VariableBuilder
&
AsDispensable
()
{
var_
->
set_dispensable
(
true
);
return
*
this
;
}
};
VariableBuilder
AddInput
(
const
std
::
string
&
name
,
const
std
::
string
&
comment
);
...
...
paddle/framework/operator.cc
浏览文件 @
c2feab7f
...
...
@@ -252,5 +252,20 @@ std::ostream& operator<<(std::ostream& os,
return
os
;
}
bool
OpSupportGPU
(
const
std
::
string
&
op_type
)
{
auto
&
all_kernels
=
OperatorWithKernel
::
AllOpKernels
();
auto
it
=
all_kernels
.
find
(
op_type
);
if
(
it
==
all_kernels
.
end
())
{
// All control operator must support GPU
return
true
;
}
for
(
auto
&
kern_pair
:
it
->
second
)
{
if
(
platform
::
is_gpu_place
(
kern_pair
.
first
.
place_
))
{
return
true
;
}
}
return
false
;
}
}
// namespace framework
}
// namespace paddle
paddle/framework/operator.h
浏览文件 @
c2feab7f
...
...
@@ -327,37 +327,47 @@ class CompileTimeInferShapeContext : public InferShapeContext {
bool
HasInput
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>&
input_names
=
op_
.
Input
(
name
);
auto
length
=
input_names
.
size
();
if
(
length
==
0
)
{
return
false
;
}
PADDLE_ENFORCE_EQ
(
length
,
1UL
,
"Input(%s) should have only one value, "
"but it have %d now"
,
name
,
length
);
return
block_
.
HasVar
(
input_names
[
0
]);
return
block_
.
HasVar
Recursive
(
input_names
[
0
]);
}
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>&
output_names
=
op_
.
Output
(
name
);
auto
length
=
output_names
.
size
();
if
(
length
==
0
)
{
return
false
;
}
PADDLE_ENFORCE_EQ
(
length
,
1UL
,
"Output(%s) should have only one value, "
"but it have %d now"
,
name
,
length
);
return
block_
.
HasVar
(
output_names
[
0
]);
return
block_
.
HasVar
Recursive
(
output_names
[
0
]);
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>&
input_names
=
op_
.
Input
(
name
);
PADDLE_ENFORCE
(
!
input_names
.
empty
(),
"Inputs(%s) length is 0"
,
name
);
if
(
input_names
.
empty
())
{
return
false
;
}
for
(
auto
&
input
:
input_names
)
{
if
(
!
block_
.
HasVar
(
input
))
return
false
;
if
(
!
block_
.
HasVar
Recursive
(
input
))
return
false
;
}
return
true
;
}
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>&
output_names
=
op_
.
Output
(
name
);
PADDLE_ENFORCE
(
!
output_names
.
empty
(),
"Inputs(%s) length is 0"
,
name
);
if
(
output_names
.
empty
())
{
return
false
;
}
for
(
auto
&
output
:
output_names
)
{
if
(
!
block_
.
HasVar
(
output
))
return
false
;
if
(
!
block_
.
HasVar
Recursive
(
output
))
return
false
;
}
return
true
;
}
...
...
@@ -404,11 +414,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private:
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
{
return
framework
::
make_ddim
(
block_
.
FindVar
(
name
)
->
Shape
());
return
framework
::
make_ddim
(
block_
.
FindVar
Recursive
(
name
)
->
Shape
());
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
block_
.
FindVar
(
name
)
->
SetShape
(
framework
::
vectorize
(
dim
));
block_
.
FindVar
Recursive
(
name
)
->
SetShape
(
framework
::
vectorize
(
dim
));
}
const
OpDescBind
&
op_
;
...
...
@@ -421,13 +431,27 @@ class RuntimeInferShapeContext : public InferShapeContext {
:
op_
(
op
),
scope_
(
scope
)
{}
bool
HasInput
(
const
std
::
string
&
name
)
const
override
{
auto
ipt
=
op_
.
Input
(
name
);
auto
&
ins
=
Inputs
(
name
);
size_t
length
=
ins
.
size
();
if
(
length
==
0
)
{
return
false
;
}
PADDLE_ENFORCE_EQ
(
length
,
1UL
,
"Input %s should have more than one inputs"
,
name
);
auto
ipt
=
ins
[
0
];
auto
*
var
=
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
return
var
!=
nullptr
;
}
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
auto
ipt
=
op_
.
Output
(
name
);
auto
&
outs
=
Outputs
(
name
);
size_t
length
=
outs
.
size
();
if
(
length
==
0
)
{
return
false
;
}
PADDLE_ENFORCE_EQ
(
length
,
1UL
,
"Output %s should have more than one inputs"
,
name
);
auto
ipt
=
outs
[
0
];
auto
*
var
=
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
return
var
!=
nullptr
;
}
...
...
@@ -649,5 +673,7 @@ class OperatorWithKernel : public OperatorBase {
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
OperatorWithKernel
::
OpKernelKey
&
kernel_key
);
extern
bool
OpSupportGPU
(
const
std
::
string
&
op_type
);
}
// namespace framework
}
// namespace paddle
paddle/framework/program_desc.cc
浏览文件 @
c2feab7f
...
...
@@ -35,8 +35,8 @@ ProgramDesc *ProgramDescBind::Proto() {
ProgramDescBind
::
ProgramDescBind
()
{
auto
*
block
=
prog_
.
mutable_blocks
()
->
Add
();
block
->
set_idx
(
0
);
block
->
set_parent_idx
(
-
1
);
block
->
set_idx
(
kRootBlockIndex
);
block
->
set_parent_idx
(
kNoneBlockIndex
);
blocks_
.
emplace_back
(
new
BlockDescBind
(
this
,
block
));
}
...
...
paddle/framework/program_desc.h
浏览文件 @
c2feab7f
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/platform/macros.h"
namespace
paddle
{
...
...
paddle/framework/proto_desc.h
0 → 100644
浏览文件 @
c2feab7f
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace
paddle
{
namespace
framework
{
// The Index of first Block in Program. also called root block.
constexpr
int
kRootBlockIndex
=
0
;
// The Parent Index of root Block, this block does not exist.
constexpr
int
kNoneBlockIndex
=
-
1
;
}
// namespace framework
}
// namespace paddle
paddle/gserver/activations/MKLDNNActivation.cpp
浏览文件 @
c2feab7f
...
...
@@ -126,7 +126,7 @@ void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
copyInVal_
=
nullptr
;
if
(
act
.
grad
&&
algo
==
algorithm
::
eltwise_tanh
)
{
// tanh need save src input for backward
inVal_
=
MKLDNNMatrix
::
create
(
nullptr
,
val_
->
getPrimitiveDesc
());
inVal_
=
MKLDNNMatrix
::
create
(
val_
->
getPrimitiveDesc
());
copyInVal_
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
val_
,
*
inVal_
);
CHECK
(
copyInVal_
)
<<
"should not be emptry"
;
pipelineFwd_
.
push_back
(
*
copyInVal_
);
...
...
@@ -145,7 +145,7 @@ void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
algorithm
algo
=
getAlgo
(
this
->
getName
());
float
alpha
=
getBwdAlpha
();
float
beta
=
getBeta
();
grad_
=
MKLDNNMatrix
::
create
(
act
.
grad
,
val_
->
getPrimitiveDesc
()
);
grad_
=
MKLDNNMatrix
::
create
(
val_
->
getPrimitiveDesc
(),
act
.
grad
);
auto
eng
=
CPUEngine
::
Instance
().
getEngine
();
auto
bwdDesc
=
eltwise_bwd
::
desc
(
algo
,
grad_
->
getMemoryDesc
(),
val_
->
getMemoryDesc
(),
alpha
,
beta
);
...
...
@@ -230,7 +230,7 @@ void MKLDNNActivation::resetFwd(Argument& act) {
int
ic
=
cnt_
/
bs
/
ih
/
iw
;
CHECK_EQ
(
cnt_
,
(
size_t
)
bs
*
ic
*
ih
*
iw
);
val_
=
MKLDNNMatrix
::
create
(
act
.
value
,
{
bs
,
ic
,
ih
,
iw
},
mkldnn
::
memory
::
format
::
nchw
,
*
engine_
);
{
bs
,
ic
,
ih
,
iw
},
mkldnn
::
memory
::
format
::
nchw
,
*
engine_
,
act
.
value
);
CHECK
(
val_
);
val_
->
downSpatial
();
}
...
...
paddle/gserver/layers/MKLDNNBase.h
浏览文件 @
c2feab7f
...
...
@@ -21,8 +21,8 @@ namespace paddle {
typedef
enum
{
MKLDNN_BASE
=
1
,
// basical info of MKLDNN
MKLDNN_TESTS
=
1
,
// gtest info of MKLDNN
MKLDNN_
SIZES
=
2
,
// size
info of MKLDNN
MKLDNN_
FMTS
=
3
,
// format
info of MKLDNN
MKLDNN_
FMTS
=
2
,
// format
info of MKLDNN
MKLDNN_
SIZES
=
3
,
// size
info of MKLDNN
MKLDNN_ALL
=
4
,
// show all info of MKLDNN
}
MKLDNN_LOG_LEVEL
;
...
...
paddle/gserver/layers/MKLDNNConvLayer.cpp
浏览文件 @
c2feab7f
...
...
@@ -116,8 +116,6 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdBuffers
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNConvLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -135,12 +133,6 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdBuffers
(
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNConvLayer
::
updateInputData
()
{
cpuInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNConvLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -211,11 +203,18 @@ void MKLDNNConvLayer::resetFwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
pd
);
resetInValue
(
pd
,
in
);
resetInValue
(
in
,
std
::
make_shared
<
memory
::
primitive_desc
>
(
pd
->
src_primitive_desc
()));
resetOutValue
(
out
,
pd
->
dst_primitive_desc
());
resetW
gtBiasValue
(
pd
,
wgt
,
bias
);
resetW
ithMatrix
(
wgt
,
weight_
->
getW
(),
pd
->
weights_primitive_desc
()
);
resetOutValue
(
pd
,
out
);
if
(
biases_
&&
biases_
->
getW
())
{
resetWithMatrix
(
bias
,
biases_
->
getW
(),
pd
->
bias_primitive_desc
());
}
else
{
bias
=
nullptr
;
}
}
void
MKLDNNConvLayer
::
resetFwdPipeline
(
...
...
@@ -225,104 +224,12 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtInVal_
)
{
pipeline
.
push_back
(
*
cvtInVal_
);
}
if
(
bias
)
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
bias
,
*
out
));
}
else
{
fwd_
.
reset
(
new
conv_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNConvLayer
::
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
)
{
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
MKLDNNMatrix
::
create
(
inMat
,
pd
->
src_primitive_desc
());
// create buffer and reorder if input value do not match
cpuInVal_
=
nullptr
;
cvtInVal_
=
nullptr
;
MKLDNNMatrixPtr
dnnIn
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
dnnIn
!=
nullptr
);
if
(
dnnIn
!=
nullptr
&&
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
if
(
dnnIn
)
{
if
(
dnnIn
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
)
<<
"when input is nc format"
;
// create a new one with nchw format and same data
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
1
,
1
};
dnnIn
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
}
if
(
dnnIn
->
getPrimitiveDesc
()
==
in
->
getPrimitiveDesc
())
{
in
=
dnnIn
;
return
;
}
cpuInVal_
=
dnnIn
;
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
cpuInVal_
=
MKLDNNMatrix
::
create
(
inMat
,
inDims
,
format
::
nchw
,
engine_
);
if
(
cpuInVal_
->
getPrimitiveDesc
()
!=
in
->
getPrimitiveDesc
())
{
// create new mkldnn matrix
in
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
src_primitive_desc
());
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
cpuInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
else
{
in
=
cpuInVal_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
pd
->
weights_primitive_desc
());
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
pd
->
bias_primitive_desc
())
:
nullptr
;
}
void
MKLDNNConvLayer
::
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
pd
->
dst_primitive_desc
());
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
pd
->
dst_primitive_desc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
pd
->
dst_primitive_desc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNConvLayer
::
resetBwdWgtPD
(
...
...
@@ -331,8 +238,8 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
// create backward weight using input, output and weight value memory desc
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
CHECK
(
wgtVal_
)
<<
"Should have weight value"
;
algorithm
algo
=
algorithm
::
convolution_direct
;
padding_kind
padKind
=
padding_kind
::
zero
;
...
...
@@ -372,8 +279,8 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory
::
dims
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
;
loadConvSettings
(
wgtDims
,
biasDims
,
strides
,
dilations
,
padL
,
padR
);
CHECK
(
inVal_
)
<<
"Should have input value"
;
CHECK
(
outVal_
)
<<
"Should have output value"
;
CHECK
(
inVal_
)
<<
"Should have in
ternal in
put value"
;
CHECK
(
outVal_
)
<<
"Should have
internal
output value"
;
// create backward data using input and output value memory desc
// but using weight memory desc with any format
auto
bwdDataDesc
=
conv_bwdData
::
desc
(
algorithm
::
convolution_direct
,
...
...
@@ -399,12 +306,27 @@ void MKLDNNConvLayer::resetBwdBuffers(
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
wgtPD
);
resetOutGrad
(
wgtPD
,
out
);
resetOutGrad
(
out
,
wgtPD
->
diff_dst_primitive_desc
()
);
resetWgtBiasGrad
(
wgtPD
,
wgt
,
bias
);
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
wgtVal_
!=
nullptr
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
resetInGrad
(
dataPD
,
in
);
bias
=
nullptr
;
if
(
biases_
&&
biases_
->
getWGrad
())
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
&&
biasVal_
&&
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
if
(
dataPD
==
nullptr
)
{
return
;
}
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
resetWgtValBwdData
(
dataPD
,
wgtValBwdData_
);
}
...
...
@@ -416,10 +338,7 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
);
}
CHECK
(
inVal_
);
// add bwdWgt handle
if
(
bias
)
{
bwdWgt_
.
reset
(
new
conv_bwdWgt
(
*
wgtPD
,
*
inVal_
,
*
out
,
*
wgt
,
*
bias
));
...
...
@@ -431,99 +350,13 @@ void MKLDNNConvLayer::resetBwdPipeline(
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
cvtWgtVal_
)
{
pipeline
.
push_back
(
*
cvtWgtVal_
);
}
// add bwdData handle
CHECK
(
wgtValBwdData_
)
<<
"Should have weight memory"
;
bwdData_
.
reset
(
new
conv_bwdData
(
*
dataPD
,
*
out
,
*
wgtValBwdData_
,
*
in
));
pipeline
.
push_back
(
*
bwdData_
);
if
(
cvtInGrad_
)
{
pipeline
.
push_back
(
*
cvtInGrad_
);
}
}
void
MKLDNNConvLayer
::
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
wgtPD
->
diff_dst_primitive_desc
())
<<
"primitive desc of out grad and value should be equal"
;
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuOutVal_
);
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
cpuOutVal_
->
getPrimitiveDesc
());
// create reorder if primitive desc does not match
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNConvLayer
::
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtPD
->
diff_weights_primitive_desc
());
CHECK
(
nullptr
!=
wgtVal_
&&
wgt
->
getPrimitiveDesc
()
==
wgtVal_
->
getPrimitiveDesc
())
<<
"primitive desc of weight grad and value should be equal"
;
VLOG
(
MKLDNN_FMTS
)
<<
"weight grad format: "
<<
wgt
->
getFormat
();
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
wgtPD
->
diff_bias_primitive_desc
());
CHECK
(
bias
->
getPrimitiveDesc
()
==
biasVal_
->
getPrimitiveDesc
())
<<
"primitive desc of bias grad should equal the bias value"
;
}
void
MKLDNNConvLayer
::
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
cpuInGrad_
=
nullptr
;
cvtInGrad_
=
nullptr
;
if
(
dataPD
==
nullptr
)
{
return
;
}
if
(
inputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetInGrad
(
in
,
dataPD
->
diff_src_primitive_desc
());
CHECK
(
nullptr
!=
inVal_
&&
in
->
getPrimitiveDesc
()
==
inVal_
->
getPrimitiveDesc
())
<<
"primitive desc of input grad and value should be equal"
;
}
else
{
const
MatrixPtr
&
cpuIn
=
getInputGrad
(
0
,
CPU_DEVICE
);
// same PrimitiveDesc with cpuInVal_
CHECK
(
cpuInVal_
);
cpuInGrad_
=
MKLDNNMatrix
::
create
(
cpuIn
,
cpuInVal_
->
getPrimitiveDesc
());
in
=
cpuInGrad_
;
// create reorder if PrimitiveDesc does not match
if
(
cpuInGrad_
->
getPrimitiveDesc
()
!=
dataPD
->
diff_src_primitive_desc
())
{
in
=
MKLDNNMatrix
::
create
(
getInputGrad
(
0
,
MKLDNN_DEVICE
),
dataPD
->
diff_src_primitive_desc
());
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
cpuInGrad_
);
CHECK
(
cvtInGrad_
);
}
}
}
void
MKLDNNConvLayer
::
resetWgtValBwdData
(
...
...
@@ -537,8 +370,7 @@ void MKLDNNConvLayer::resetWgtValBwdData(
// since the primitive_desc would be different with wgtVal_
CHECK
(
wgtVal_
)
<<
"should have weight value"
;
if
(
dataPD
->
weights_primitive_desc
()
!=
wgtVal_
->
getPrimitiveDesc
())
{
wgtValBwdData_
=
MKLDNNMatrix
::
create
(
nullptr
,
dataPD
->
weights_primitive_desc
());
wgtValBwdData_
=
MKLDNNMatrix
::
create
(
dataPD
->
weights_primitive_desc
());
cvtWgtVal_
=
MKLDNNMatrix
::
createReorder
(
wgtVal_
,
wgtValBwdData_
);
CHECK
(
cvtWgtVal_
);
}
else
{
...
...
paddle/gserver/layers/MKLDNNConvLayer.h
浏览文件 @
c2feab7f
...
...
@@ -48,17 +48,6 @@ protected:
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
fwdPD_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuInVal_
;
MKLDNNMatrixPtr
cpuInGrad_
;
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// whether the weight has been init
bool
hasInitedWgt_
;
...
...
@@ -94,8 +83,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -109,26 +96,6 @@ public:
<<
", sw: "
<<
sw_
<<
", dh: "
<<
dh_
<<
", dw: "
<<
dw_
;
}
void
printValueFormatFlow
()
override
{
if
(
cpuInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInVal_
->
getFormat
()
<<
" >>>"
;
}
MKLDNNLayer
::
printValueFormatFlow
();
if
(
cpuOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" >>> "
<<
cpuOutVal_
->
getFormat
();
}
}
void
printGradFormatFlow
()
override
{
if
(
cpuInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
cpuInGrad_
->
getFormat
()
<<
" <<<"
;
}
MKLDNNLayer
::
printGradFormatFlow
();
if
(
cpuOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
" <<< "
<<
cpuOutGrad_
->
getFormat
();
}
}
protected:
/**
* load the dims settings of this conv
...
...
@@ -162,23 +129,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of input value
*/
void
resetInValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight and bias value
*/
void
resetWgtBiasValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of output value
*/
void
resetOutValue
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
out
);
/**
* reset the backward weight primitive descriptor.
*/
...
...
@@ -207,22 +157,6 @@ protected:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of output grad
*/
void
resetOutGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
out
);
/**
* reset MKLDNNMatrix of weight and bias grad
*/
void
resetWgtBiasGrad
(
std
::
shared_ptr
<
conv_bwdWgt
::
primitive_desc
>&
wgtPD
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
/**
* reset MKLDNNMatrix of input grad
*/
void
resetInGrad
(
std
::
shared_ptr
<
conv_bwdData
::
primitive_desc
>&
dataPD
,
MKLDNNMatrixPtr
&
in
);
/**
* reset MKLDNNMatrix of weight value for backward data
* since the primitive_desc would be different with wgtVal_
...
...
paddle/gserver/layers/MKLDNNFcLayer.cpp
浏览文件 @
c2feab7f
...
...
@@ -62,7 +62,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
srcFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
srcFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataFrom
(
wgtVal_
,
srcFmt
,
targetDim
);
hasInitedWgt_
=
true
;
}
...
...
@@ -71,7 +71,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
CHECK
(
wgtVal_
)
<<
"should have been initialized"
;
bool
hasNoSpatial_
=
ih_
==
1
&&
iw_
==
1
;
auto
targetDim
=
wgtVal_
->
getDims
();
auto
dstFmt
=
hasNoSpatial_
?
memory
::
format
::
io
:
memory
::
format
::
ihwo
;
auto
dstFmt
=
hasNoSpatial_
?
format
::
io
:
format
::
ihwo
;
wgtVal_
->
reorderDataTo
(
wgtVal_
,
dstFmt
,
targetDim
);
}
...
...
@@ -100,8 +100,6 @@ void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
wgt
,
bias
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
wgt
,
bias
,
out
);
printValueFormatFlow
();
}
void
MKLDNNFcLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -119,12 +117,6 @@ void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD
(
bwdDataPD
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
bwdWgtPD
,
bwdDataPD
,
in
,
wgt
,
bias
,
out
);
printGradFormatFlow
();
}
void
MKLDNNFcLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNFcLayer
::
updateWeights
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -139,51 +131,30 @@ void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetWgtBiasValue
(
wgt
,
bias
);
resetOutValue
(
out
);
}
void
MKLDNNFcLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
CHECK
(
in
);
in
->
downSpatial
();
}
void
MKLDNNFcLayer
::
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
bs_
,
oc_
},
format
::
nc
,
engine_
);
resetOutValue
(
out
,
outPD
);
format
wgtFmt
=
format
::
oihw
;
if
(
in
Val_
->
getFormat
()
==
format
::
nChw8c
)
{
if
(
in
->
getFormat
()
==
format
::
nChw8c
)
{
wgtFmt
=
format
::
oIhw8i
;
}
else
if
(
in
Val_
->
getFormat
()
==
format
::
nChw16c
)
{
}
else
if
(
in
->
getFormat
()
==
format
::
nChw16c
)
{
wgtFmt
=
format
::
oIhw16i
;
}
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getW
(),
{
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
auto
wgtPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
,
ic_
,
ih_
,
iw_
},
wgtFmt
,
engine_
);
resetWithMatrix
(
wgt
,
weight_
->
getW
(),
wgtPD
);
wgt
->
downSpatial
();
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgt
->
getFormat
();
bias
=
(
biases_
&&
biases_
->
getW
())
?
MKLDNNMatrix
::
create
(
biases_
->
getW
(),
{
oc_
},
format
::
x
,
engine_
)
:
nullptr
;
}
void
MKLDNNFcLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
{
bs_
,
oc_
},
format
::
nc
,
engine_
);
if
(
!
outputIsOnlyMKLDNN
())
{
// fc cpu output value do not need create convert, just share data
getOutput
(
CPU_DEVICE
).
value
->
setData
(
out
->
getData
())
;
if
(
biases_
&&
biases_
->
getW
()
)
{
auto
biasPD
=
MKLDNNMatrix
::
createPrimitiveDesc
({
oc_
},
format
::
x
,
engine_
);
resetWithMatrix
(
bias
,
biases_
->
getW
(),
biasPD
);
}
else
{
bias
=
nullptr
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
out
);
}
void
MKLDNNFcLayer
::
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
...
...
@@ -219,7 +190,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
}
else
{
fwd_
.
reset
(
new
fc_fwd
(
*
pd
,
*
in
,
*
wgt
,
*
out
));
}
pipeline
.
push_back
(
*
fwd_
);
}
...
...
@@ -227,44 +197,18 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetWgtBiasGrad
(
wgt
,
bias
);
resetInGrad
(
in
);
}
void
MKLDNNFcLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
output_
.
grad
->
setData
(
cpuOut
->
getData
());
out
=
MKLDNNMatrix
::
create
(
cpuOut
,
outVal_
->
getPrimitiveDesc
());
}
}
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
void
MKLDNNFcLayer
::
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
)
{
CHECK
(
wgtVal_
);
wgt
=
MKLDNNMatrix
::
create
(
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
resetWithMatrix
(
wgt
,
weight_
->
getWGrad
(),
wgtVal_
->
getPrimitiveDesc
());
bias
=
nullptr
;
if
(
biasVal_
==
nullptr
)
{
return
;
}
bias
=
MKLDNNMatrix
::
create
(
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
if
(
biasVal_
)
{
resetWithMatrix
(
bias
,
biases_
->
getWGrad
(),
biasVal_
->
getPrimitiveDesc
());
}
else
{
bias
=
nullptr
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNFcLayer
::
resetBwdWgtPD
(
...
...
paddle/gserver/layers/MKLDNNFcLayer.h
浏览文件 @
c2feab7f
...
...
@@ -66,8 +66,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
updateWeights
(
const
UpdateCallback
&
callback
)
override
;
void
convertWeightsFromPaddle
()
override
;
...
...
@@ -84,9 +82,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetWgtBiasValue
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
fc_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
wgt
,
...
...
@@ -109,9 +104,6 @@ protected:
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetWgtBiasGrad
(
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdWgtPD
(
std
::
shared_ptr
<
fc_bwdWgt
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
wgt
,
MKLDNNMatrixPtr
&
bias
,
...
...
paddle/gserver/layers/MKLDNNLayer.cpp
0 → 100644
浏览文件 @
c2feab7f
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNLayer.h"
using
namespace
mkldnn
;
// NOLINT
typedef
memory
::
format
format
;
namespace
paddle
{
bool
MKLDNNLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
return
true
;
}
void
MKLDNNLayer
::
forward
(
PassType
passType
)
{
passType_
=
passType
;
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutputValue
()
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
// all cpu device output grad or value share output's
shareCPUDevice
();
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// Then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK
(
extOutVal_
)
<<
"external output value is necessary"
;
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
extOutVal_
);
CHECK
(
inVal_
&&
outVal_
)
<<
"internal memories are necessary"
;
if
(
cvtInVal_
)
{
pipelineFwd_
.
insert
(
pipelineFwd_
.
begin
(),
*
cvtInVal_
);
}
if
(
cvtOutVal_
)
{
pipelineFwd_
.
push_back
(
*
cvtOutVal_
);
}
convertWeightsFromPaddle
();
printSizeInfo
();
printValueFormat
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
// Update input value data when input layer is "data" type,
// since the input value data address might be changed.
CHECK
(
extInVal_
);
extInVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
if
(
!
outputOnlyMKLDNN_
)
{
clearGrads
();
}
stream_
->
submit
(
pipelineFwd_
);
}
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
MKLDNNLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK
(
outGrad_
)
<<
"internal output grad is necessary"
;
if
(
extOutGrad_
)
{
CHECK_EQ
(
extOutGrad_
->
getData
(),
output_
.
grad
->
getData
())
<<
"the external buffer should share the same data with output_.grad"
;
}
if
(
cvtOutGrad_
)
{
pipelineBwd_
.
insert
(
pipelineBwd_
.
begin
(),
*
cvtOutGrad_
);
}
if
(
cvtInGrad_
)
{
pipelineBwd_
.
push_back
(
*
cvtInGrad_
);
}
printGradFormat
();
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
}
}
void
MKLDNNLayer
::
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
!=
0
)
{
width
=
w
;
}
}
void
MKLDNNLayer
::
reshapeOutput
(
size_t
height
,
size_t
width
)
{
output_
.
setFrameHeight
(
height
);
output_
.
setFrameWidth
(
width
);
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
height
);
outputOtherDevice_
[
i
].
setFrameWidth
(
width
);
}
}
void
MKLDNNLayer
::
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
memory
::
primitive_desc
pd
)
{
dnn
=
nullptr
;
if
(
mat
==
nullptr
)
{
return
;
}
dnn
=
MKLDNNMatrix
::
create
(
pd
,
mat
);
}
void
MKLDNNLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
memory
::
primitive_desc
>&
intPD
)
{
cvtInVal_
=
nullptr
;
extInVal_
=
nullptr
;
in
=
nullptr
;
CHECK_GT
(
bs_
*
ic_
*
ih_
*
iw_
,
0
);
auto
extPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
const
MatrixPtr
&
inMat
=
inputLayers_
[
0
]
->
getOutputValue
();
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
inMat
);
CHECK_EQ
(
inputIsOnlyMKLDNN
(),
in
!=
nullptr
);
if
(
in
==
nullptr
||
in
->
getFormat
()
==
format
::
nc
)
{
in
=
MKLDNNMatrix
::
create
(
extPD
,
inMat
);
}
extInVal_
=
isPaddleFormat
(
in
->
getFormat
())
?
in
:
nullptr
;
if
(
in
->
getFormat
()
==
format
::
nc
)
{
CHECK
(
ih_
==
1
&&
iw_
==
1
);
}
if
(
nullptr
==
intPD
||
in
->
getPrimitiveDesc
()
==
*
intPD
)
{
return
;
}
// need create reorder
in
=
MKLDNNMatrix
::
create
(
*
intPD
);
extInVal_
=
extInVal_
?
extInVal_
:
MKLDNNMatrix
::
create
(
extPD
,
inMat
);
cvtInVal_
=
MKLDNNMatrix
::
createReorder
(
extInVal_
,
in
);
CHECK
(
cvtInVal_
)
<<
"should not be emptry"
;
}
void
MKLDNNLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
memory
::
primitive_desc
intPD
)
{
cvtOutVal_
=
nullptr
;
out
=
MKLDNNMatrix
::
create
(
intPD
,
output_
.
value
);
extOutVal_
=
out
;
if
(
outputIsOnlyMKLDNN
()
||
isPaddleFormat
(
extOutVal_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK_GT
(
bs_
*
oc_
*
oh_
*
ow_
,
0
);
extOutVal_
=
MKLDNNMatrix
::
create
(
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
,
output_
.
value
);
out
=
MKLDNNMatrix
::
create
(
intPD
);
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
extOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be empty"
;
}
void
MKLDNNLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
memory
::
primitive_desc
intPD
)
{
cvtInGrad_
=
nullptr
;
extInGrad_
=
nullptr
;
in
=
nullptr
;
LayerPtr
&
input
=
inputLayers_
[
0
];
if
(
input
->
getOutputGrad
()
==
nullptr
)
{
// no need input grad
return
;
}
CHECK
(
inputIsOnlyMKLDNN
()
||
input
->
getOutputMapSize
()
<=
1
)
<<
"only support input is MKLDNN layer or only have one output layer"
;
// when input is a mkldnn branch node,
// this layer will save input grad to a internal buffer,
// and the mkldnn input layer will merge them to actual prev->output_.grad
const
MatrixPtr
&
inMat
=
input
->
getOutputMapSize
()
<=
1
?
input
->
getOutputGrad
()
:
nullptr
;
in
=
MKLDNNMatrix
::
create
(
intPD
,
inMat
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
CHECK
(
inVal_
);
CHECK
(
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"the primitive desc must equal"
;
if
(
inputIsOnlyMKLDNN
())
{
return
;
}
extInGrad_
=
in
;
if
(
isPaddleFormat
(
extInGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
// TODO(TJ): add macro definition to simplify it
CHECK
(
extInVal_
!=
nullptr
&&
isPaddleFormat
(
extInVal_
->
getFormat
()))
<<
"should have external input value and the format must be nchw(nc)"
;
extInGrad_
=
MKLDNNMatrix
::
create
(
extInVal_
->
getPrimitiveDesc
(),
inMat
);
CHECK
(
inVal_
!=
nullptr
&&
inVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal input value and primitive desc must equal"
;
in
=
MKLDNNMatrix
::
create
(
intPD
);
cvtInGrad_
=
MKLDNNMatrix
::
createReorder
(
in
,
extInGrad_
);
CHECK
(
cvtInGrad_
);
}
void
MKLDNNLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
memory
::
primitive_desc
intPD
)
{
cvtOutGrad_
=
nullptr
;
extOutGrad_
=
nullptr
;
out
=
nullptr
;
MatrixPtr
&
outMat
=
output_
.
grad
;
out
=
MKLDNNMatrix
::
create
(
intPD
,
outMat
);
resetMergeGrad
(
out
);
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
CHECK_LE
(
outputMap_
.
size
(),
1U
)
<<
"do not support mixed with cpu device"
;
extOutGrad_
=
out
;
if
(
isPaddleFormat
(
extOutGrad_
->
getFormat
()))
{
return
;
}
// need create reorder
CHECK
(
extOutVal_
!=
nullptr
&&
isPaddleFormat
(
extOutVal_
->
getFormat
()))
<<
"should have external output value and the format must be nchw(nc)"
;
extOutGrad_
=
MKLDNNMatrix
::
create
(
extOutVal_
->
getPrimitiveDesc
(),
outMat
);
CHECK
(
outVal_
!=
nullptr
&&
outVal_
->
getPrimitiveDesc
()
==
intPD
)
<<
"should have internal output value and primitive desc must equal"
;
out
=
MKLDNNMatrix
::
create
(
intPD
);
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
extOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
);
}
void
MKLDNNLayer
::
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
)
{
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
if
(
outputMap_
.
size
()
<=
1
||
!
outputIsOnlyMKLDNN
())
{
// do not merge when output is not all MKLDNN or only one output
return
;
}
CHECK
(
out
)
<<
"should have reset internal ouput grad"
;
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
<<
", format "
<<
src
->
getFormat
();
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
out
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
auto
sumPD
=
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
}
// namespace paddle
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
c2feab7f
...
...
@@ -58,11 +58,31 @@ protected:
std
::
vector
<
mkldnn
::
primitive
>
pipelineFwd_
;
std
::
vector
<
mkldnn
::
primitive
>
pipelineBwd_
;
// MKLDNNMatrixPtr with internal format
/* Value and grad are seperated as internal and external buffers.
* Each MKLDNNLayer must init or reset internal buffer at least,
* and the external buffer format is always nchw of nc(when h==w==1),
* which is the same format as paddle.
* The output_.value and output_.grad always save the external data,
* when mixed with cpu device.
* When all layers are mkldnn layers, they could save internal data.
*/
// below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr
inVal_
;
MKLDNNMatrixPtr
inGrad_
;
MKLDNNMatrixPtr
outVal_
;
MKLDNNMatrixPtr
outGrad_
;
// below are external value and grad
MKLDNNMatrixPtr
extInVal_
;
MKLDNNMatrixPtr
extInGrad_
;
MKLDNNMatrixPtr
extOutVal_
;
MKLDNNMatrixPtr
extOutGrad_
;
// convert handle between external and internal buffers
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtInGrad_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// weight and bias are always internal buffers
MKLDNNMatrixPtr
wgtVal_
;
MKLDNNMatrixPtr
wgtGrad_
;
MKLDNNMatrixPtr
biasVal_
;
...
...
@@ -91,6 +111,7 @@ public:
oh_
(
0
),
ow_
(
0
),
needResetBwd_
(
true
),
outputOnlyMKLDNN_
(
false
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
stream_
(
nullptr
),
fwd_
(
nullptr
),
...
...
@@ -99,92 +120,9 @@ public:
~
MKLDNNLayer
()
{}
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK
(
FLAGS_use_mkldnn
)
<<
"MkldnnLayers only support use_mkldnn."
<<
"Please set WITH_MKLDNN=ON "
<<
"and set use_mkldnn=True"
;
CHECK
(
!
useGpu_
)
<<
"Do not support GPU yet"
;
// set device id before Layer::init
setDevice
(
MKLDNN_DEVICE
);
// change param device to MKLDNN device
setParamsDevice
(
MKLDNN_DEVICE
,
parameterMap
);
if
(
!
Layer
::
init
(
layerMap
,
parameterMap
))
{
return
false
;
}
setOutputMap
();
checkCPUOutputsNumber
();
stream_
.
reset
(
new
MKLDNNStream
());
engine_
=
CPUEngine
::
Instance
().
getEngine
();
return
true
;
}
void
forward
(
PassType
passType
)
override
{
passType_
=
passType
;
{
REGISTER_TIMER_INFO
(
"mkldnn_FwdTimer"
,
getName
().
c_str
());
CHECK
(
!
inputLayers_
.
empty
());
copySeqInfoToOutputs
();
size_t
elemenCnt
=
inputLayers_
[
0
]
->
getOutput
().
value
->
getElementCnt
();
if
(
inputElemenCnt_
!=
elemenCnt
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn forward"
;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_
=
elemenCnt
;
pipelineFwd_
.
clear
();
reshape
(
bs_
,
ic_
,
ih_
,
iw_
,
oc_
,
oh_
,
ow_
);
resetFwd
(
pipelineFwd_
,
inVal_
,
wgtVal_
,
biasVal_
,
outVal_
);
convertWeightsFromPaddle
();
needResetBwd_
=
true
;
}
if
(
inputLayers_
[
0
]
->
getType
()
==
"data"
)
{
updateInputData
();
}
if
(
!
outputOnlyMKLDNN_
)
{
clearGrads
();
}
stream_
->
submit
(
pipelineFwd_
);
}
/* activation */
{
REGISTER_TIMER_INFO
(
"FwActTimer"
,
getName
().
c_str
());
forwardActivation
();
}
}
void
backward
(
const
UpdateCallback
&
callback
)
override
{
if
(
needResetBwd_
)
{
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" reset mkldnn backward"
;
pipelineBwd_
.
clear
();
pipelineMergeGrad_
.
clear
();
mergeGrad_
=
nullptr
;
resetBwd
(
pipelineBwd_
,
inGrad_
,
wgtGrad_
,
biasGrad_
,
outGrad_
);
needResetBwd_
=
false
;
}
// merge grad must before backward activation
if
(
mergeGrad_
)
{
REGISTER_TIMER_INFO
(
"MergeBpGrad"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineMergeGrad_
);
}
{
REGISTER_TIMER_INFO
(
"BpActTimer"
,
getName
().
c_str
());
backwardActivation
();
}
{
REGISTER_TIMER_INFO
(
"mkldnn_bwdTimer"
,
getName
().
c_str
());
stream_
->
submit
(
pipelineBwd_
);
}
{
REGISTER_TIMER_INFO
(
"WeightUpdate"
,
getName
().
c_str
());
updateWeights
(
callback
);
}
}
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
virtual
void
forward
(
PassType
passType
);
virtual
void
backward
(
const
UpdateCallback
&
callback
);
/**
* reshape the input image sizes
...
...
@@ -195,7 +133,7 @@ public:
int
&
bs
,
int
&
ic
,
int
&
ih
,
int
&
iw
,
int
oc
,
int
&
oh
,
int
&
ow
)
=
0
;
/**
* reset the mkldnn forward primitve and memor
y
* reset the mkldnn forward primitve and memor
ies
* only would be called when input size changes
*/
virtual
void
resetFwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -205,7 +143,7 @@ public:
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* reset the mkldnn backward primitve and memor
y for mkldnn fc
* reset the mkldnn backward primitve and memor
ies
* only would be called when needed
*/
virtual
void
resetBwd
(
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
,
...
...
@@ -214,12 +152,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
=
0
;
/**
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual
void
updateInputData
()
{}
/**
* Update weights and biases if necessary.
*/
...
...
@@ -246,131 +178,78 @@ protected:
/**
* reshape the input image sizes and input batchsize
*/
virtual
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
)
{
const
Argument
&
input
=
inputLayers_
[
0
]
->
getOutput
();
batchsize
=
input
.
getBatchSize
();
int
h
=
input
.
getFrameHeight
();
int
w
=
input
.
getFrameWidth
();
if
(
h
!=
0
)
{
height
=
h
;
}
if
(
w
!=
0
)
{
width
=
w
;
}
}
void
reshapeInput
(
int
&
batchsize
,
int
&
height
,
int
&
width
);
/**
* reshape output image sizes
*/
virtual
void
reshapeOutput
(
size_t
height
,
size_t
width
)
{
output_
.
setFrameHeight
(
height
);
output_
.
setFrameWidth
(
width
);
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
setFrameHeight
(
height
);
outputOtherDevice_
[
i
].
setFrameWidth
(
width
);
}
}
void
reshapeOutput
(
size_t
height
,
size_t
width
);
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
* reset MKLDNNMatrix from Matrix and internal primitive desc.
* reset nullptr if matrix or primitive desc is empty
*/
virtual
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
CHECK
(
outputIsOnlyMKLDNN
())
<<
"do not support mixed with other device yet"
;
mergeGrad_
=
nullptr
;
pipelineMergeGrad_
.
clear
();
out
=
MKLDNNMatrix
::
create
(
output_
.
grad
,
pd
);
if
(
outputMap_
.
size
()
<=
1
)
{
return
;
}
std
::
vector
<
double
>
scales
(
outputMap_
.
size
(),
1.0
);
std
::
vector
<
mkldnn
::
memory
::
primitive_desc
>
srcPDs
;
std
::
vector
<
mkldnn
::
primitive
::
at
>
srcs
;
for
(
auto
it
=
outputMap_
.
begin
();
it
!=
outputMap_
.
end
();
++
it
)
{
MKLDNNMatrixPtr
src
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
it
->
second
->
grad
);
VLOG
(
MKLDNN_BASE
)
<<
getName
()
<<
" has output grad "
<<
it
->
first
;
CHECK
(
src
)
<<
"should be MKLDNNMatrix"
;
auto
srcDims
=
src
->
getDims
();
auto
dstDims
=
out
->
getDims
();
CHECK_EQ
(
srcDims
.
size
(),
dstDims
.
size
());
for
(
size_t
i
=
0
;
i
<
srcDims
.
size
();
++
i
)
{
CHECK_EQ
(
srcDims
[
i
],
dstDims
[
i
]);
}
srcPDs
.
push_back
(
src
->
getPrimitiveDesc
());
srcs
.
push_back
(
*
src
);
}
void
resetWithMatrix
(
MKLDNNMatrixPtr
&
dnn
,
const
MatrixPtr
&
mat
,
mkldnn
::
memory
::
primitive_desc
pd
);
// TODO(TJ): remove me when mkldnn sum support different formats
for
(
size_t
i
=
1
;
i
<
srcPDs
.
size
();
++
i
)
{
CHECK
(
srcPDs
[
0
]
==
srcPDs
[
i
]);
}
tmpOutGrad_
=
nullptr
;
tmpCvt_
=
nullptr
;
if
(
out
->
getPrimitiveDesc
()
!=
srcPDs
[
0
])
{
tmpOutGrad_
=
MKLDNNMatrix
::
create
(
nullptr
,
srcPDs
[
0
]);
tmpCvt_
=
MKLDNNMatrix
::
createReorder
(
tmpOutGrad_
,
out
);
CHECK
(
tmpCvt_
);
pipelineMergeGrad_
.
push_back
(
*
tmpCvt_
);
}
else
{
tmpOutGrad_
=
out
;
}
/**
* reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetInValue
(
MKLDNNMatrixPtr
&
in
,
const
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>&
intPD
=
nullptr
);
auto
sumPD
=
mkldnn
::
sum
::
primitive_desc
(
tmpOutGrad_
->
getMemoryDesc
(),
scales
,
srcPDs
);
mergeGrad_
.
reset
(
new
mkldnn
::
sum
(
sumPD
,
srcs
,
*
tmpOutGrad_
));
pipelineMergeGrad_
.
insert
(
pipelineMergeGrad_
.
begin
(),
*
mergeGrad_
);
}
/**
* reset output value from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
* reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary.
*/
virtual
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
pd
)
{
LayerPtr
&
input
=
inputLayers_
[
0
];
const
MatrixPtr
&
grad
=
input
->
getOutputMapSize
()
>
1
?
nullptr
:
input
->
getOutput
().
grad
;
in
=
MKLDNNMatrix
::
create
(
grad
,
pd
);
Argument
&
arg
=
input
->
getOutput
(
this
->
getName
());
arg
.
grad
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
in
);
}
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* print info about sizes
* reset output grad from internal primitive desc.
* merge grad if necessary.
* reset both internal and external buffer and create reorder if necessary.
* note: about merge grad, when this layer has several outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
printSizeInfo
()
{
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": bs: "
<<
bs_
<<
", ic: "
<<
ic_
<<
", ih: "
<<
ih_
<<
", iw: "
<<
iw_
<<
", oc: "
<<
oc_
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
,
mkldnn
::
memory
::
primitive_desc
intPD
);
/**
* Print the mkldnn memory format flow of value
* reset the merge grad primitive if necessary.
* note: do not support the grads mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual
void
printValueFormatFlow
()
{
if
(
inVal_
&&
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>> "
<<
outVal_
->
getFormat
();
}
}
void
resetMergeGrad
(
MKLDNNMatrixPtr
&
out
);
protected:
/**
*
Print the mkldnn memory format flow of grad
*
Set deviceId of this layer.
*/
virtual
void
printGradFormatFlow
()
{
if
(
inGrad_
&&
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<< "
<<
outGrad_
->
getFormat
();
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
/**
* check the format is nchw or nc,
* which is supported by Paddle default memory layout
*/
bool
isPaddleFormat
(
mkldnn
::
memory
::
format
fmt
)
{
if
(
fmt
==
mkldnn
::
memory
::
format
::
nchw
||
fmt
==
mkldnn
::
memory
::
format
::
nc
)
{
return
true
;
}
else
{
return
false
;
}
}
protected:
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
...
...
@@ -380,7 +259,6 @@ protected:
if
(
prevDevice
==
MKLDNN_DEVICE
)
{
return
true
;
}
else
{
// do not support GPU yet
CHECK_EQ
(
prevDevice
,
CPU_DEVICE
)
<<
"Only support CPU yet"
;
return
false
;
}
...
...
@@ -400,9 +278,61 @@ protected:
}
/**
*
Set deviceId of this layer.
*
print info about sizes
*/
void
setDevice
(
int
id
)
{
deviceId_
=
id
;
}
virtual
void
printSizeInfo
()
{
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": bs: "
<<
bs_
<<
", ic: "
<<
ic_
<<
", ih: "
<<
ih_
<<
", iw: "
<<
iw_
<<
", oc: "
<<
oc_
<<
", oh: "
<<
oh_
<<
", ow: "
<<
ow_
;
}
/**
* print the mkldnn memory format of value
*/
virtual
void
printValueFormat
()
{
if
(
extInVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
inVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inVal_
->
getFormat
()
<<
" >>>"
;
}
if
(
outVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outVal_
->
getFormat
()
<<
" >>> "
;
}
if
(
extOutVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutVal_
->
getFormat
();
}
if
(
wgtVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight value format: "
<<
wgtVal_
->
getFormat
();
}
if
(
biasVal_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias value format: "
<<
biasVal_
->
getFormat
();
}
}
/**
* print the mkldnn memory format of grad
*/
virtual
void
printGradFormat
()
{
if
(
extOutGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extOutGrad_
->
getFormat
();
}
if
(
outGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
outGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
inGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
inGrad_
->
getFormat
()
<<
" <<<"
;
}
if
(
extInGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
extInGrad_
->
getFormat
()
<<
" <<< "
;
}
if
(
wgtGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Weight grad format: "
<<
wgtGrad_
->
getFormat
();
}
if
(
biasGrad_
)
{
VLOG
(
MKLDNN_FMTS
)
<<
"Bias grad format: "
<<
biasGrad_
->
getFormat
();
}
}
private:
/**
...
...
@@ -449,6 +379,19 @@ private:
}
}
/**
* if have cpu device, share value and grad data with output_
*/
void
shareCPUDevice
()
{
if
(
outputIsOnlyMKLDNN
())
{
return
;
}
for
(
size_t
i
=
0
;
i
<
outputOtherDevice_
.
size
();
i
++
)
{
outputOtherDevice_
[
i
].
value
=
output_
.
value
;
outputOtherDevice_
[
i
].
grad
=
output_
.
grad
;
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
...
...
paddle/gserver/layers/MKLDNNPoolLayer.cpp
浏览文件 @
c2feab7f
...
...
@@ -85,8 +85,6 @@ void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
resetFwdPD
(
fwdPD_
,
in
,
out
);
resetFwdPipeline
(
pipeline
,
fwdPD_
,
in
,
out
);
printValueFormatFlow
();
}
void
MKLDNNPoolLayer
::
resetBwd
(
std
::
vector
<
primitive
>&
pipeline
,
...
...
@@ -101,65 +99,22 @@ void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdPD
(
pd
,
in
,
out
);
resetBwdPipeline
(
pipeline
,
pd
,
in
,
out
);
printGradFormatFlow
();
}
void
MKLDNNPoolLayer
::
updateInputData
()
{
inVal_
->
setData
(
getInputValue
(
0
,
CPU_DEVICE
)
->
getData
());
}
void
MKLDNNPoolLayer
::
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetInValue
(
in
);
resetOutValue
(
out
);
}
void
MKLDNNPoolLayer
::
resetInValue
(
MKLDNNMatrixPtr
&
in
)
{
if
(
inputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
dnnIn
=
getInputValue
(
0
);
in
=
std
::
dynamic_pointer_cast
<
MKLDNNMatrix
>
(
dnnIn
);
CHECK
(
in
)
<<
"Input should be MKLDNNMatrix"
;
}
else
{
CHECK_EQ
(
getPrev
(
0
)
->
getDeviceId
(),
CPU_DEVICE
)
<<
"Only support CPU yet"
;
const
MatrixPtr
&
cpuIn
=
getInputValue
(
0
,
CPU_DEVICE
);
in
=
MKLDNNMatrix
::
create
(
cpuIn
,
{
bs_
,
ic_
,
ih_
,
iw_
},
format
::
nchw
,
engine_
);
}
}
void
MKLDNNPoolLayer
::
resetOutValue
(
MKLDNNMatrixPtr
&
out
)
{
CHECK
(
inVal_
)
<<
"Should reset input value first"
;
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
out
=
MKLDNNMatrix
::
create
(
output_
.
value
,
outDims
,
inVal_
->
getFormat
(),
engine_
);
// create reorder if output value has cpu device and pd do not match
cpuOutVal_
=
nullptr
;
cvtOutVal_
=
nullptr
;
if
(
!
outputIsOnlyMKLDNN
())
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
value
;
cpuOutVal_
=
MKLDNNMatrix
::
create
(
cpuOut
,
outDims
,
format
::
nchw
,
engine_
);
if
(
cpuOutVal_
->
getPrimitiveDesc
()
!=
out
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
out
->
getPrimitiveDesc
());
cvtOutVal_
=
MKLDNNMatrix
::
createReorder
(
out
,
cpuOutVal_
);
CHECK
(
cvtOutVal_
)
<<
"should not be emptry"
;
}
else
{
cpuOut
->
setData
(
output_
.
value
->
getData
());
cpuOutVal_
=
out
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
cpuOutVal_
);
return
;
}
output_
.
value
=
std
::
dynamic_pointer_cast
<
Matrix
>
(
outVal_
);
CHECK
(
in
);
auto
outPD
=
MKLDNNMatrix
::
createPrimitiveDesc
(
outDims
,
in
->
getFormat
(),
engine_
);
resetOutValue
(
out
,
outPD
);
}
void
MKLDNNPoolLayer
::
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
)
{
memory
::
dims
inDims
=
memory
::
dims
{
bs_
,
ic_
,
ih_
,
iw_
};
memory
::
dims
outDims
=
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
};
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
...
...
@@ -194,58 +149,26 @@ void MKLDNNPoolLayer::resetFwdPipeline(
?
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
,
*
workspace_
))
:
std
::
make_shared
<
pool_fwd
>
(
pool_fwd
(
*
pd
,
*
in
,
*
out
));
pipeline
.
push_back
(
*
fwd_
);
if
(
cvtOutVal_
)
{
pipeline
.
push_back
(
*
cvtOutVal_
);
}
}
void
MKLDNNPoolLayer
::
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
resetOutGrad
(
out
);
resetInGrad
(
in
);
}
void
MKLDNNPoolLayer
::
resetOutGrad
(
MKLDNNMatrixPtr
&
out
)
{
cpuOutGrad_
=
nullptr
;
cvtOutGrad_
=
nullptr
;
CHECK
(
outVal_
);
if
(
outputIsOnlyMKLDNN
())
{
MKLDNNLayer
::
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
}
else
{
const
MatrixPtr
&
cpuOut
=
getOutput
(
CPU_DEVICE
).
grad
;
// always share the same grad data of CPU output
// then the activation can get the right grad from output_.grad
output_
.
grad
->
setData
(
cpuOut
->
getData
());
cpuOutGrad_
=
MKLDNNMatrix
::
create
(
cpuOut
,
memory
::
dims
{
bs_
,
oc_
,
oh_
,
ow_
},
format
::
nchw
,
engine_
);
if
(
cpuOutGrad_
->
getPrimitiveDesc
()
!=
outVal_
->
getPrimitiveDesc
())
{
out
=
MKLDNNMatrix
::
create
(
nullptr
,
outVal_
->
getPrimitiveDesc
());
cvtOutGrad_
=
MKLDNNMatrix
::
createReorder
(
cpuOutGrad_
,
out
);
CHECK
(
cvtOutGrad_
)
<<
"should not be emptry"
;
}
else
{
out
=
cpuOutGrad_
;
}
}
}
void
MKLDNNPoolLayer
::
resetInGrad
(
MKLDNNMatrixPtr
&
in
)
{
in
=
nullptr
;
if
(
inputLayers_
[
0
]
->
getOutput
().
grad
==
nullptr
)
{
return
;
}
CHECK
(
inVal_
);
MKLDNNLayer
::
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
CHECK
(
inVal_
&&
outVal_
);
resetOutGrad
(
out
,
outVal_
->
getPrimitiveDesc
());
resetInGrad
(
in
,
inVal_
->
getPrimitiveDesc
());
}
void
MKLDNNPoolLayer
::
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
pd
=
nullptr
;
if
(
in
==
nullptr
)
{
return
;
}
memory
::
dims
kernels
=
memory
::
dims
{
fh_
,
fw_
};
memory
::
dims
strides
=
memory
::
dims
{
sh_
,
sw_
};
memory
::
dims
padL
=
memory
::
dims
{
ph_
,
pw_
};
memory
::
dims
padR
=
getPaddingR
();
CHECK
(
in
);
CHECK
(
out
);
auto
bwdDesc
=
pool_bwd
::
desc
(
poolAlgo_
,
in
->
getMemoryDesc
(),
...
...
@@ -263,8 +186,8 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
)
{
if
(
cvtOutGrad_
)
{
pipeline
.
push_back
(
*
cvtOutGrad_
)
;
if
(
pd
==
nullptr
)
{
return
;
}
bwdData_
=
...
...
paddle/gserver/layers/MKLDNNPoolLayer.h
浏览文件 @
c2feab7f
...
...
@@ -38,13 +38,6 @@ protected:
// pooling_avg or pooling_max
mkldnn
::
algorithm
poolAlgo_
;
// MKLDNNMatrixPtr which should be created from CPU Device
MKLDNNMatrixPtr
cpuOutVal_
;
MKLDNNMatrixPtr
cpuOutGrad_
;
// convert handle between CPU device and MKLDNN device
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutVal_
;
std
::
shared_ptr
<
mkldnn
::
reorder
>
cvtOutGrad_
;
// save forward primitive_desc, which can be used backward
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>
fwdPD_
;
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
...
...
@@ -74,8 +67,6 @@ public:
MKLDNNMatrixPtr
&
bias
,
MKLDNNMatrixPtr
&
out
)
override
;
void
updateInputData
()
override
;
void
printSizeInfo
()
override
{
MKLDNNLayer
::
printSizeInfo
();
VLOG
(
MKLDNN_SIZES
)
<<
getName
()
<<
": fh: "
<<
fh_
<<
", fw: "
<<
fw_
...
...
@@ -90,8 +81,6 @@ protected:
* reset pipeline.
*/
void
resetFwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetInValue
(
MKLDNNMatrixPtr
&
in
);
void
resetOutValue
(
MKLDNNMatrixPtr
&
out
);
void
resetFwdPD
(
std
::
shared_ptr
<
pool_fwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
in
,
MKLDNNMatrixPtr
out
);
...
...
@@ -106,8 +95,6 @@ protected:
* reset pipeline.
*/
void
resetBwdBuffers
(
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
void
resetOutGrad
(
MKLDNNMatrixPtr
&
out
);
void
resetInGrad
(
MKLDNNMatrixPtr
&
in
);
void
resetBwdPD
(
std
::
shared_ptr
<
pool_bwd
::
primitive_desc
>&
pd
,
MKLDNNMatrixPtr
&
in
,
MKLDNNMatrixPtr
&
out
);
...
...
paddle/gserver/tests/MKLDNNTester.cpp
浏览文件 @
c2feab7f
...
...
@@ -97,7 +97,7 @@ void MKLDNNTester::randomWgtDatas() {
parameters_
[
REF
][
i
]
->
randomize
();
dnnValue
->
copyFrom
(
*
refValue
);
VLOG
(
lvl_
)
<<
"Random weight data
"
<<
parameters_
[
DNN
][
i
]
->
getName
();
VLOG
(
MKLDNN_TESTS
)
<<
"Random weight
"
<<
parameters_
[
DNN
][
i
]
->
getName
();
printVector
(
dnnValue
);
}
}
...
...
@@ -109,7 +109,7 @@ void MKLDNNTester::randomBotDatas() {
dataLayers_
[
REF
][
i
]
->
getOutputValue
()
->
randomizeUniform
();
dataLayers_
[
DNN
][
i
]
->
getOutputValue
()
->
copyFrom
(
*
(
dataLayers_
[
REF
][
i
]
->
getOutputValue
()));
VLOG
(
lvl_
)
<<
"Input "
<<
i
<<
" data:"
;
VLOG
(
MKLDNN_TESTS
)
<<
"Random Foward, InputValue "
<<
i
;
printMatrix
(
dataLayers_
[
REF
][
i
]
->
getOutputValue
());
}
}
...
...
@@ -118,12 +118,12 @@ void MKLDNNTester::randomTopDiffs() {
refLayer_
->
getOutputGrad
()
->
randomizeUniform
();
dnnLayer_
->
getOutput
(
CPU_DEVICE
)
.
grad
->
copyFrom
(
*
(
refLayer_
->
getOutputGrad
()));
VLOG
(
lvl_
)
<<
"Random Backward Input, TopDiff:
"
;
VLOG
(
MKLDNN_TESTS
)
<<
"Random Backward, OutputGrad
"
;
printMatrix
(
refLayer_
->
getOutputGrad
());
}
void
MKLDNNTester
::
checkForward
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Forward"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Forward"
;
printTopDatas
();
double
delta
=
compareMatrix
(
dnnLayer_
->
getOutputValue
(),
refLayer_
->
getOutputValue
());
...
...
@@ -131,15 +131,15 @@ void MKLDNNTester::checkForward() {
}
void
MKLDNNTester
::
checkBackwardData
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Backward Data"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Backward Data"
;
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for
(
size_t
i
=
0
;
i
<
dataLayers_
[
DNN
].
size
();
++
i
)
{
const
MatrixPtr
&
dnnDiff
=
dataLayers_
[
DNN
][
i
]
->
getOutputGrad
();
const
MatrixPtr
&
refDiff
=
dataLayers_
[
REF
][
i
]
->
getOutputGrad
();
VLOG
(
lvl_
)
<<
"Mkldnn Backward Output BotDiff
"
<<
i
;
VLOG
(
MKLDNN_ALL
)
<<
"MKLDNN Backward Result: InputGrad
"
<<
i
;
printMatrix
(
dnnDiff
);
VLOG
(
lvl_
)
<<
"Reference Backward Output BotDiff
"
<<
i
;
VLOG
(
MKLDNN_ALL
)
<<
"Reference Backward Result: InputGrad
"
<<
i
;
printMatrix
(
refDiff
);
double
delta
=
compareMatrix
(
dnnDiff
,
refDiff
);
...
...
@@ -153,7 +153,7 @@ void MKLDNNTester::checkBackwardData() {
}
void
MKLDNNTester
::
checkBackwardWgts
()
{
VLOG
(
MKLDNN_
ALL
)
<<
"Check Backward Weight"
;
VLOG
(
MKLDNN_
TESTS
)
<<
"Check Backward Weight"
;
CHECK_EQ
(
parameters_
[
DNN
].
size
(),
parameters_
[
REF
].
size
());
vector
<
VectorPtr
>
dnnWgts
;
// used to temply save mkldnn weights
saveWgt
(
parameters_
[
DNN
],
dnnWgts
);
...
...
@@ -165,9 +165,11 @@ void MKLDNNTester::checkBackwardWgts() {
for
(
size_t
i
=
0
;
i
<
parameters_
[
DNN
].
size
();
++
i
)
{
const
VectorPtr
&
dnn
=
parameters_
[
DNN
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
const
VectorPtr
&
ref
=
parameters_
[
REF
][
i
]
->
getBuf
(
PARAMETER_VALUE
);
VLOG
(
lvl_
)
<<
"Mkldnn Output weight "
<<
parameters_
[
DNN
][
i
]
->
getName
();
VLOG
(
MKLDNN_ALL
)
<<
"MKLDNN Result: weight value"
<<
parameters_
[
DNN
][
i
]
->
getName
();
printVector
(
dnn
);
VLOG
(
lvl_
)
<<
"Reference Output weight "
<<
parameters_
[
REF
][
i
]
->
getName
();
VLOG
(
MKLDNN_ALL
)
<<
"Reference Result: weight value "
<<
parameters_
[
REF
][
i
]
->
getName
();
printVector
(
ref
);
double
delta
=
compareVector
(
dnn
,
ref
);
...
...
@@ -240,7 +242,8 @@ void MKLDNNTester::printTopDatas() {
}
for
(
int
n
=
0
;
n
<
NUM
;
++
n
)
{
VLOG
(
lvl_
)
<<
testLayers_
[
n
]
->
getType
()
<<
" forward output TopData: "
;
VLOG
(
MKLDNN_ALL
)
<<
testLayers_
[
n
]
->
getType
()
<<
" Forward Result: OutputValue"
;
printMatrix
(
testLayers_
[
n
]
->
getOutputValue
());
}
}
...
...
@@ -252,7 +255,7 @@ void MKLDNNTester::printMatrix(const MatrixPtr& m) {
std
::
ostringstream
ostr
;
m
->
print
(
ostr
);
VLOG
(
lvl_
)
<<
std
::
endl
<<
ostr
.
str
();
VLOG
(
MKLDNN_ALL
)
<<
std
::
endl
<<
ostr
.
str
();
}
void
MKLDNNTester
::
printVector
(
const
VectorPtr
&
v
)
{
...
...
@@ -262,7 +265,7 @@ void MKLDNNTester::printVector(const VectorPtr& v) {
std
::
ostringstream
ostr
;
v
->
print
(
ostr
,
v
->
getSize
());
VLOG
(
lvl_
)
<<
std
::
endl
<<
ostr
.
str
();
VLOG
(
MKLDNN_ALL
)
<<
std
::
endl
<<
ostr
.
str
();
}
double
MKLDNNTester
::
getDelta
(
const
real
*
d1
,
...
...
@@ -314,7 +317,7 @@ void MKLDNNTester::runOnce() {
UpdateCallback
updateCallback
=
[](
Parameter
*
para
)
{
auto
&
grad
=
para
->
getBuf
(
PARAMETER_GRADIENT
);
auto
&
value
=
para
->
getBuf
(
PARAMETER_VALUE
);
real
lr
=
1e-
3
;
real
lr
=
1e-
2
;
value
->
add
(
*
grad
,
lr
);
grad
->
zeroMem
();
};
...
...
@@ -340,10 +343,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
size_t
batchSize
,
size_t
inputImgH
,
size_t
inputImgW
,
bool
printDetails
,
size_t
iter
,
float
epsilon
,
bool
log
,
int
level
)
{
float
epsilon
)
{
CHECK
(
dnn
.
layerConfig
.
type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
||
dnn
.
layerConfig
.
active_type
().
compare
(
0
,
7
,
"mkldnn_"
)
==
0
)
<<
"should be MKLDNN layer or MKLDNN activation"
;
...
...
@@ -359,10 +361,9 @@ void MKLDNNTester::run(const TestConfig& dnn,
ih_
=
inputImgH
;
iw_
=
inputImgW
;
log_
=
printDetails
;
iter_
=
iter
;
eps_
=
epsilon
;
log_
=
log
;
lvl_
=
level
;
// Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight
reset
(
dnn
,
ref
,
batchSize
);
...
...
@@ -531,9 +532,11 @@ void MKLDNNTester::getOutResult(const std::string& configPath,
void
MKLDNNTester
::
compareResult
(
DataOut
&
ref
,
DataOut
&
dnn
,
float
eps
)
{
CHECK_EQ
(
ref
.
outValues
.
size
(),
dnn
.
outValues
.
size
());
CHECK_EQ
(
ref
.
paraValues
.
size
(),
dnn
.
paraValues
.
size
());
VLOG
(
MKLDNN_TESTS
)
<<
"compare value size: "
<<
ref
.
outValues
.
size
();
for
(
size_t
i
=
0
;
i
<
ref
.
outValues
.
size
();
i
++
)
{
EXPECT_LE
(
fabs
(
compareMatrix
(
ref
.
outValues
[
i
],
dnn
.
outValues
[
i
])),
eps
);
}
VLOG
(
MKLDNN_TESTS
)
<<
"compare param size: "
<<
ref
.
outValues
.
size
();
for
(
size_t
i
=
0
;
i
<
ref
.
paraValues
.
size
();
i
++
)
{
EXPECT_LE
(
fabs
(
compareVector
(
ref
.
paraValues
[
i
],
dnn
.
paraValues
[
i
])),
eps
);
}
...
...
@@ -544,9 +547,10 @@ void MKLDNNTester::runBranchesTest(const std::string& configPath,
float
eps
)
{
DataIn
in
;
initArgument
(
in
,
configPath
,
iter
);
DataOut
outCpu
,
outDnn
;
VLOG
(
MKLDNN_TESTS
)
<<
"runing cpu network"
;
getOutResult
(
configPath
,
in
,
outCpu
,
false
,
iter
);
VLOG
(
MKLDNN_TESTS
)
<<
"runing mkldnn network"
;
getOutResult
(
configPath
,
in
,
outDnn
,
true
,
iter
);
compareResult
(
outCpu
,
outDnn
,
eps
);
...
...
paddle/gserver/tests/MKLDNNTester.h
浏览文件 @
c2feab7f
...
...
@@ -58,8 +58,6 @@ protected:
size_t
iter_
;
/// whether to print out the details
bool
log_
;
/// vlog level to print the matrix details datas
int
lvl_
;
/// epsilon
float
eps_
;
/// input image size, default 1
...
...
@@ -70,7 +68,6 @@ public:
iter_
=
iter
;
eps_
=
epsilon
;
log_
=
false
;
lvl_
=
MKLDNN_ALL
;
}
~
MKLDNNTester
()
{}
...
...
@@ -81,10 +78,9 @@ public:
size_t
batchSize
,
size_t
inputImgH
=
1
,
size_t
inputImgW
=
1
,
bool
printDetails
=
false
,
size_t
iter
=
3
,
float
epsilon
=
1e-4
,
bool
log
=
false
,
int
level
=
MKLDNN_ALL
);
float
epsilon
=
1e-4
);
static
void
runBranchesTest
(
const
std
::
string
&
configPath
,
size_t
iter
=
3
,
float
eps
=
1e-4
);
...
...
paddle/gserver/tests/mkldnn_branches_fc.conf
0 → 100644
浏览文件 @
c2feab7f
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
settings
(
batch_size
=
16
)
channels
=
get_config_arg
(
"channels"
,
int
,
2
)
def
two_fc
(
input
,
group_name
):
out1
=
fc_layer
(
input
=
input
,
name
=
group_name
+
'_fc1'
,
size
=
channels
,
bias_attr
=
False
,
act
=
LinearActivation
())
out2
=
fc_layer
(
input
=
input
,
name
=
group_name
+
'_fc2'
,
size
=
channels
,
bias_attr
=
False
,
act
=
LinearActivation
())
return
out1
,
out2
data
=
data_layer
(
name
=
"input"
,
size
=
channels
*
16
*
16
)
conv
=
img_conv_layer
(
input
=
data
,
num_channels
=
channels
,
filter_size
=
3
,
num_filters
=
channels
,
padding
=
1
,
shared_biases
=
True
,
act
=
LinearActivation
())
pool
=
img_pool_layer
(
input
=
conv
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
pool_type
=
AvgPooling
())
a1
,
a2
=
two_fc
(
input
=
pool
,
group_name
=
'a'
)
concat
=
concat_layer
(
input
=[
a1
,
a2
])
b1
,
b2
=
two_fc
(
input
=
pool
,
group_name
=
'b'
)
addto
=
addto_layer
(
input
=[
b1
,
b2
])
outputs
([
concat
,
addto
])
paddle/gserver/tests/mkldnn_branches_pool.conf
0 → 100644
浏览文件 @
c2feab7f
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
settings
(
batch_size
=
16
)
channels
=
get_config_arg
(
"channels"
,
int
,
2
)
def
two_pool
(
input
,
group_name
):
out1
=
img_pool_layer
(
input
=
input
,
name
=
group_name
+
'_pool1'
,
pool_size
=
3
,
stride
=
2
,
padding
=
0
,
pool_type
=
MaxPooling
())
out2
=
img_pool_layer
(
input
=
input
,
name
=
group_name
+
'_pool2'
,
pool_size
=
5
,
stride
=
2
,
padding
=
1
,
pool_type
=
MaxPooling
())
return
out1
,
out2
data
=
data_layer
(
name
=
"input"
,
size
=
channels
*
16
*
16
)
conv
=
img_conv_layer
(
input
=
data
,
num_channels
=
channels
,
filter_size
=
3
,
num_filters
=
channels
,
padding
=
1
,
shared_biases
=
True
,
act
=
LinearActivation
())
pool
=
img_pool_layer
(
input
=
conv
,
pool_size
=
3
,
stride
=
1
,
padding
=
1
,
pool_type
=
AvgPooling
())
a1
,
a2
=
two_pool
(
input
=
pool
,
group_name
=
'a'
)
concat
=
concat_layer
(
input
=[
a1
,
a2
])
b1
,
b2
=
two_pool
(
input
=
pool
,
group_name
=
'b'
)
addto
=
addto_layer
(
input
=[
b1
,
b2
])
outputs
([
concat
,
addto
])
paddle/gserver/tests/test_MKLDNN.cpp
浏览文件 @
c2feab7f
...
...
@@ -250,7 +250,7 @@ TEST(MKLDNNActivation, Activations) {
DECLARE_string
(
config_args
);
TEST
(
MKLDNNLayer
,
branches
)
{
std
::
vector
<
std
::
string
>
cases
=
{
"conv"
};
std
::
vector
<
std
::
string
>
cases
=
{
"conv"
,
"pool"
,
"fc"
};
for
(
auto
name
:
cases
)
{
std
::
string
config
=
"./gserver/tests/mkldnn_branches_"
+
name
+
".conf"
;
for
(
auto
channels
:
{
2
,
32
})
{
...
...
paddle/math/MKLDNNMatrix.cpp
浏览文件 @
c2feab7f
...
...
@@ -18,7 +18,7 @@ using namespace mkldnn; // NOLINT
namespace
paddle
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
primitive_desc
pd
)
{
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
memory
::
primitive_desc
pd
,
MatrixPtr
m
)
{
memory
::
desc
md
=
pd
.
desc
();
size_t
ndims
=
md
.
data
.
ndims
;
int
*
dims
=
md
.
data
.
dims
;
...
...
@@ -41,12 +41,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
return
std
::
make_shared
<
MKLDNNMatrix
>
(
cpuMatrix
,
pd
);
}
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
MatrixPtr
m
,
memory
::
dims
dims
,
MKLDNNMatrixPtr
MKLDNNMatrix
::
create
(
memory
::
dims
dims
,
memory
::
format
fmt
,
engine
&
eg
,
MatrixPtr
m
,
mkldnn
::
memory
::
data_type
dtype
)
{
return
create
(
m
,
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
)
);
return
create
(
createPrimitiveDesc
(
dims
,
fmt
,
eg
,
dtype
),
m
);
}
std
::
shared_ptr
<
reorder
>
MKLDNNMatrix
::
createReorder
(
const
MKLDNNMatrixPtr
&
src
,
...
...
paddle/math/MKLDNNMatrix.h
浏览文件 @
c2feab7f
...
...
@@ -40,24 +40,37 @@ public:
/**
* Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
primitive_desc
pd
);
static
MKLDNNMatrixPtr
create
(
mkldnn
::
memory
::
primitive_desc
pd
,
MatrixPtr
m
=
nullptr
);
/**
* Create MKLDNNMatrix from a MatrixPtr and memory details info
*/
static
MKLDNNMatrixPtr
create
(
MatrixPtr
m
,
mkldnn
::
memory
::
dims
dims
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
engine
&
eg
,
MatrixPtr
m
=
nullptr
,
mkldnn
::
memory
::
data_type
dtype
=
mkldnn
::
memory
::
data_type
::
f32
);
/**
* Create primitive descriptor.
* default with f32 dtype
*/
static
mkldnn
::
memory
::
primitive_desc
createPrimitiveDesc
(
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
,
const
mkldnn
::
engine
&
eg
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
primitive_desc
(
memory
::
desc
(
dims
,
dtype
,
fmt
),
eg
);
}
/**
* Create Memory descriptor.
* default with any format and f32 dtype
*/
static
mkldnn
::
memory
::
desc
createMemoryDesc
(
const
mkldnn
::
memory
::
dims
&
dims
,
const
mkldnn
::
memory
::
dims
dims
,
const
mkldnn
::
memory
::
format
&
fmt
=
mkldnn
::
memory
::
format
::
any
,
const
mkldnn
::
memory
::
data_type
&
dtype
=
mkldnn
::
memory
::
data_type
::
f32
)
{
return
mkldnn
::
memory
::
desc
(
dims
,
dtype
,
fmt
);
...
...
paddle/operators/clip_op.cc
浏览文件 @
c2feab7f
...
...
@@ -27,8 +27,8 @@ class ClipOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ClipOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
max
=
Attr
<
float
>
(
"max"
);
auto
min
=
Attr
<
float
>
(
"min"
);
auto
max
=
ctx
->
Attrs
().
Get
<
float
>
(
"max"
);
auto
min
=
ctx
->
Attrs
().
Get
<
float
>
(
"min"
);
PADDLE_ENFORCE_LT
(
min
,
max
,
"max should be greater than min."
);
ctx
->
SetOutputDim
(
"Out"
,
x_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
...
...
paddle/operators/dynamic_recurrent_op.cc
浏览文件 @
c2feab7f
...
...
@@ -23,6 +23,7 @@ using framework::Scope;
using
framework
::
TensorArray
;
using
framework
::
LoDTensor
;
using
framework
::
Variable
;
using
framework
::
OperatorBase
;
using
framework
::
DySeqMetaBatch
;
namespace
detail
{
...
...
@@ -43,10 +44,9 @@ inline void CreateVariables(Scope& scope,
* be reordered, but the RNN op should not change the `boot_state` as an input
* variable's content.
*/
template
<
typename
T
>
inline
void
ReorderBootState
(
const
DySeqMetaBatch
&
metas
,
const
LoDTensor
&
boot_state
,
LoDTensor
*
tensor
,
const
platform
::
Place
&
dst_place
)
{
inline
void
ReorderInitialState
(
const
DySeqMetaBatch
&
metas
,
const
LoDTensor
&
boot_state
,
LoDTensor
*
tensor
,
const
platform
::
Place
&
dst_place
)
{
for
(
size_t
seq_id
=
0
;
seq_id
<
metas
.
size
();
seq_id
++
)
{
auto
slice
=
tensor
->
Slice
(
seq_id
,
seq_id
+
1
);
auto
boot_slice
=
...
...
@@ -56,58 +56,60 @@ inline void ReorderBootState(const DySeqMetaBatch& metas,
}
}
}
// namespace detail
class
DynamicRecurrentOpProtoAndCheckerMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
DynamicRecurrentOpProtoAndCheckerMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
const
auto
&
name
=
DynamicRecurrentOp
::
kArgName
;
// inputs and outputs stored in proto
AddInput
(
name
.
inlinks
,
"the inputs that need to be segmented for each step."
)
.
AsDuplicable
();
AddInput
(
name
.
boot_memories
,
"variables to initialize memories."
)
.
AsDuplicable
();
AddOutput
(
name
.
outlinks
,
"the outputs that need to concated for all steps."
)
.
AsDuplicable
();
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
// Attributes stored in AttributeMap
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
pre_memories
,
"names of pre-memories"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
memories
,
"names of memories"
);
AddComment
(
"This is a RNN operator for varience-length sequences."
);
inline
void
RestoreInitialState
(
const
DySeqMetaBatch
&
metas
,
const
LoDTensor
&
tensor
,
LoDTensor
*
boot_state
,
const
platform
::
Place
&
dst_place
)
{
for
(
size_t
seq_id
=
0
;
seq_id
<
metas
.
size
();
seq_id
++
)
{
auto
slice
=
tensor
.
Slice
(
seq_id
,
seq_id
+
1
);
auto
boot_slice
=
boot_state
->
Slice
(
metas
[
seq_id
].
ori_idx
,
metas
[
seq_id
].
ori_idx
+
1
);
boot_slice
.
CopyFrom
(
slice
,
dst_place
,
platform
::
CPUDeviceContext
());
}
}
;
}
void
DynamicRecurrentOp
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
cache_
.
Init
(
kArgName
,
*
this
,
scope
,
&
arg_
);
}
// namespace detail
// Implementation for forward propagation.
template
<
>
void
RNNAlgorithm
::
Run
<
RNNAlgorithm
::
ComputeMode
::
kForward
>
(
const
framework
::
Scope
&
scope
,
const
framework
::
OperatorBase
&
op
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
SetComputeMode
(
ComputeMode
::
kForward
);
cache_
.
Init
(
kArgNames
[
mode_
],
op
,
scope
,
&
dev_ctx
,
&
arg_
);
SplitInputs
();
CreateScopes
();
WriteStepInputs
();
InitStates
();
WriteStepOutputs
();
RunSteps
();
ConcatOutputs
();
}
// call stepnet in all the time steps
for
(
size_t
step
=
0
;
step
<
cache_
.
num_steps
;
step
++
)
{
auto
&
step_scope
=
cache_
.
GetScope
(
step
);
stepnet_
->
Run
(
step_scope
,
dev_ctx
);
// Implementation for backward propagation.
template
<
>
void
RNNAlgorithm
::
Run
<
RNNAlgorithm
::
ComputeMode
::
kBackward
>
(
const
framework
::
Scope
&
scope
,
const
framework
::
OperatorBase
&
op
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
SetComputeMode
(
ComputeMode
::
kBackward
);
cache_
.
Init
(
kArgNames
[
mode_
],
op
,
scope
,
&
dev_ctx
,
&
arg_
);
SplitInputs
();
WriteStepInputs
();
InitStates
();
WriteStepOutputs
();
RunSteps
();
// copy boot-states' gradients back.
for
(
const
auto
&
state
:
arg_
.
states
)
{
ExportInitialStateGradient
(
state
);
}
ConcatOutputs
();
}
void
DynamicRecurrentOp
::
SplitInputs
()
const
{
void
RNNAlgorithm
::
SplitInputs
()
{
// TODO(superjom) make level a config
// TODO(superjom) check all the inputs has the same LoD
int
level
=
0
;
for
(
const
auto
&
item
:
cache_
.
in
link
s
)
{
for
(
const
auto
&
item
:
cache_
.
in
put
s
)
{
const
auto
&
var
=
item
.
second
;
const
auto
&
tensor
=
var
->
Get
<
LoDTensor
>
();
TensorArray
&
ta
=
step_inputs_
[
item
.
first
];
...
...
@@ -124,8 +126,8 @@ void DynamicRecurrentOp::SplitInputs() const {
}
}
void
DynamicRecurrentOp
::
WriteStepInputs
()
const
{
for
(
const
auto
&
item
:
cache_
.
in
link
s
)
{
void
RNNAlgorithm
::
WriteStepInputs
()
{
for
(
const
auto
&
item
:
cache_
.
in
put
s
)
{
auto
ta_it
=
step_inputs_
.
find
(
item
.
first
);
PADDLE_ENFORCE
(
ta_it
!=
step_inputs_
.
end
(),
"step_inputs_ not compatible with memory set"
);
...
...
@@ -142,15 +144,15 @@ void DynamicRecurrentOp::WriteStepInputs() const {
}
}
void
DynamicRecurrentOp
::
WriteStepOutputs
()
const
{
void
RNNAlgorithm
::
WriteStepOutputs
()
{
// initialize step outputs
for
(
const
auto
&
item
:
cache_
.
out
link
s
)
{
for
(
const
auto
&
item
:
cache_
.
out
put
s
)
{
step_outputs_
.
emplace
(
item
.
first
,
TensorArray
());
}
PADDLE_ENFORCE_GT
(
step_outputs_
.
size
(),
0UL
);
}
void
DynamicRecurrentOp
::
CreateScopes
()
const
{
void
RNNAlgorithm
::
CreateScopes
()
{
PADDLE_ENFORCE_GT
(
cache_
.
num_steps
,
0
);
// resize scopes
size_t
num_scopes_need_create
=
cache_
.
num_steps
-
cache_
.
scopes
->
size
();
...
...
@@ -159,19 +161,19 @@ void DynamicRecurrentOp::CreateScopes() const {
}
// init temporary inputs
PADDLE_ENFORCE_NOT_NULL
(
step
ne
t_
,
"stepnet should be set first"
);
std
::
vector
<
std
::
string
>
memori
es
;
std
::
vector
<
std
::
string
>
pre_memori
es
;
std
::
vector
<
std
::
string
>
step
ne
t_outputs
;
std
::
transform
(
arg_
.
memories
.
begin
(),
arg_
.
memori
es
.
end
(),
std
::
back_inserter
(
memori
es
),
[](
const
rnn
::
Memory
Attr
&
m
)
{
return
m
.
var
;
});
std
::
transform
(
arg_
.
memories
.
begin
(),
arg_
.
memori
es
.
end
(),
std
::
back_inserter
(
pre_memori
es
),
[](
const
rnn
::
Memory
Attr
&
m
)
{
return
m
.
pre_var
;
});
for
(
const
auto
&
item
:
step
ne
t_
->
Outputs
())
{
PADDLE_ENFORCE_NOT_NULL
(
step
_uni
t_
,
"stepnet should be set first"
);
std
::
vector
<
std
::
string
>
stat
es
;
std
::
vector
<
std
::
string
>
ex_stat
es
;
std
::
vector
<
std
::
string
>
step
_uni
t_outputs
;
std
::
transform
(
arg_
.
states
.
begin
(),
arg_
.
stat
es
.
end
(),
std
::
back_inserter
(
stat
es
),
[](
const
rnn
::
State
Attr
&
m
)
{
return
m
.
var
;
});
std
::
transform
(
arg_
.
states
.
begin
(),
arg_
.
stat
es
.
end
(),
std
::
back_inserter
(
ex_stat
es
),
[](
const
rnn
::
State
Attr
&
m
)
{
return
m
.
pre_var
;
});
for
(
const
auto
&
item
:
step
_uni
t_
->
Outputs
())
{
for
(
const
auto
&
var
:
item
.
second
)
{
step
ne
t_outputs
.
push_back
(
var
);
step
_uni
t_outputs
.
push_back
(
var
);
}
}
...
...
@@ -179,13 +181,13 @@ void DynamicRecurrentOp::CreateScopes() const {
auto
&
scope
=
cache_
.
GetScope
(
step
);
detail
::
CreateVariables
(
scope
,
arg_
.
inlinks
);
detail
::
CreateVariables
(
scope
,
arg_
.
outlinks
);
detail
::
CreateVariables
(
scope
,
memori
es
);
detail
::
CreateVariables
(
scope
,
pre_memori
es
);
detail
::
CreateVariables
(
scope
,
step
ne
t_outputs
);
detail
::
CreateVariables
(
scope
,
stat
es
);
detail
::
CreateVariables
(
scope
,
ex_stat
es
);
detail
::
CreateVariables
(
scope
,
step
_uni
t_outputs
);
}
}
void
DynamicRecurrentOp
::
ConcatOutputs
()
const
{
void
RNNAlgorithm
::
ConcatOutputs
()
{
// TODO(superjom) transform this to a config
int
level
=
0
;
for
(
size_t
step
=
0
;
step
<
cache_
.
num_steps
;
step
++
)
{
...
...
@@ -198,31 +200,45 @@ void DynamicRecurrentOp::ConcatOutputs() const {
item
.
second
.
WriteShared
(
step
,
*
tensor
);
}
}
// the in
link
s' lods should be the same, so randomly get one lod.
// the in
put
s' lods should be the same, so randomly get one lod.
const
auto
&
some_lod
=
cache_
.
scope
->
FindVar
(
arg_
.
inlinks
.
front
())
->
Get
<
LoDTensor
>
().
lod
();
const
auto
&
some_meta
=
dy_seq_metas_
[
arg_
.
inlinks
.
front
()];
for
(
auto
&
item
:
step_outputs_
)
{
auto
tensor
=
item
.
second
.
Pack
(
level
,
some_meta
,
some_lod
);
auto
*
output
=
cache_
.
out
link
s
[
item
.
first
]
->
GetMutable
<
LoDTensor
>
();
auto
*
output
=
cache_
.
out
put
s
[
item
.
first
]
->
GetMutable
<
LoDTensor
>
();
const_cast
<
LoDTensor
*>
(
output
)
->
ShareDataWith
(
tensor
);
}
}
void
DynamicRecurrentOp
::
InitStates
()
const
{
void
RNNAlgorithm
::
RunSteps
()
{
if
(
IsBackward
())
{
// call stepnet in all the time steps reversely
for
(
int
step
=
cache_
.
num_steps
-
1
;
step
>=
0
;
step
--
)
{
auto
&
step_scope
=
cache_
.
GetScope
(
step
);
step_unit_
->
Run
(
step_scope
,
*
cache_
.
dev_ctx
);
}
}
else
{
for
(
size_t
step
=
0
;
step
<
cache_
.
num_steps
;
step
++
)
{
auto
&
step_scope
=
cache_
.
GetScope
(
step
);
step_unit_
->
Run
(
step_scope
,
*
cache_
.
dev_ctx
);
}
}
}
void
RNNAlgorithm
::
InitStates
()
{
for
(
size_t
step
=
0
;
step
<
cache_
.
num_steps
;
step
++
)
{
for
(
const
auto
&
memory
:
arg_
.
memori
es
)
{
CreateState
(
memory
,
step
);
LinkState
(
memory
,
step
);
for
(
const
auto
&
state
:
arg_
.
stat
es
)
{
CreateState
(
state
,
step
);
LinkState
(
state
,
step
);
}
}
}
void
DynamicRecurrentOp
::
CreateState
(
const
rnn
::
MemoryAttr
&
memory
,
size_t
step
)
const
{
void
RNNAlgorithm
::
CreateState
(
const
rnn
::
StateAttr
&
state_attr
,
size_t
step
)
{
auto
&
scope
=
cache_
.
GetScope
(
step
);
auto
&
state
=
*
cache_
.
GetTensor
(
scope
,
memory
.
var
);
auto
&
boot_state
=
*
cache_
.
GetTensor
(
*
cache_
.
scope
,
memory
.
boot_var
);
auto
&
state
=
*
cache_
.
GetTensor
(
scope
,
state_attr
.
var
);
auto
&
boot_state
=
*
cache_
.
GetTensor
(
*
cache_
.
scope
,
state_attr
.
boot_var
);
size_t
num_instances
=
step_inputs_
[
arg_
.
inlinks
.
front
()].
Read
(
step
).
dims
()[
0
];
...
...
@@ -231,56 +247,79 @@ void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory,
state
.
Resize
(
dims
);
state
.
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
states_
[
memory
.
var
].
WriteShared
(
step
,
state
);
states_
[
state_attr
.
var
].
WriteShared
(
step
,
state
);
}
void
DynamicRecurrentOp
::
LinkState
(
const
rnn
::
MemoryAttr
&
memory
,
size_t
step
)
const
{
void
RNNAlgorithm
::
LinkState
(
const
rnn
::
StateAttr
&
state
,
size_t
step
)
{
auto
&
scope
=
cache_
.
GetScope
(
step
);
auto
&
state_pre
=
*
cache_
.
GetTensor
(
scope
,
memory
.
pre_var
);
auto
&
state_pre
=
*
cache_
.
GetTensor
(
scope
,
state
.
pre_var
);
// process the first state's boot-state(the 0-step in forward mode or the
// last step in backward mode)
// Only forward mode need to link the boot-state to the `pre-state` in first
// time step. In backward mode, need to copy the gradient of `pre-state` in
// first time step to the gradient of `boot-state`.
if
(
step
==
0
&&
IsForward
())
{
LinkInitialState
(
state
);
}
else
{
size_t
num_instances
=
step_inputs_
[
arg_
.
inlinks
.
front
()].
Read
(
step
).
dims
()[
0
];
auto
*
pre_state
=
cache_
.
GetTensor
(
cache_
.
GetScope
(
step
-
1
),
state
.
var
);
// shink and share from previous state
auto
shrinked_pre_state
=
pre_state
->
Slice
(
0
,
num_instances
);
state_pre
.
ShareDataWith
(
shrinked_pre_state
);
}
}
void
RNNAlgorithm
::
LinkInitialState
(
const
rnn
::
StateAttr
&
state
)
{
// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const
auto
&
some_meta
=
dy_seq_metas_
[
arg_
.
inlinks
.
front
()];
size_t
num_instances
=
step_inputs_
[
arg_
.
inlinks
.
front
()].
Read
(
step
).
dims
()[
0
];
auto
&
scope
=
cache_
.
GetScope
(
0
);
auto
&
state_pre
=
*
cache_
.
GetTensor
(
scope
,
state
.
pre_var
);
auto
*
pre_state
=
cache_
.
GetTensor
(
*
cache_
.
scope
,
state
.
boot_var
);
pre_state
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
// allocate state
state_pre
.
Resize
(
pre_state
->
dims
());
state_pre
.
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
detail
::
ReorderInitialState
(
some_meta
,
*
pre_state
,
&
state_pre
,
pre_state
->
place
());
}
LoDTensor
*
pre_state
{
nullptr
};
if
(
step
==
0
)
{
pre_state
=
cache_
.
GetTensor
(
*
cache_
.
scope
,
memory
.
boot_var
);
pre_state
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
// allocate memory
state_pre
.
Resize
(
pre_state
->
dims
());
state_pre
.
mutable_data
<
value_type
>
(
platform
::
CPUPlace
());
detail
::
ReorderBootState
<
value_type
>
(
some_meta
,
*
pre_state
,
&
state_pre
,
pre_state
->
place
());
}
else
{
pre_state
=
cache_
.
GetTensor
(
cache_
.
GetScope
(
step
-
1
),
memory
.
var
);
}
void
RNNAlgorithm
::
ExportInitialStateGradient
(
const
rnn
::
StateAttr
&
state
)
{
// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const
auto
&
some_meta
=
dy_seq_metas_
[
arg_
.
inlinks
.
front
()];
auto
&
scope
=
cache_
.
GetScope
(
0
);
// shink and share from previous state
auto
shrinked_pre_state
=
pre_state
->
Slice
(
0
,
num_instances
);
state_pre
.
ShareDataWith
(
shrinked_pre_state
);
auto
&
state_pre
=
*
cache_
.
GetTensor
(
scope
,
state
.
pre_var
);
auto
&
pre_state
=
*
cache_
.
GetTensor
(
*
cache_
.
scope
,
state
.
boot_var
);
pre_state
.
Resize
(
state_pre
.
dims
());
detail
::
RestoreInitialState
(
some_meta
,
state_pre
,
&
pre_state
,
pre_state
.
place
());
}
void
DynamicRecurrentOp
::
ArgCache
::
Init
(
const
rnn
::
ArgumentName
&
name
,
const
paddle
::
framework
::
OperatorBase
&
op
,
const
paddle
::
framework
::
Scope
&
scope
,
rnn
::
Argument
*
arg
)
{
void
RNNAlgorithm
::
ArgCache
::
Init
(
const
rnn
::
ArgumentName
&
name
,
const
paddle
::
framework
::
OperatorBase
&
op
,
const
paddle
::
framework
::
Scope
&
scope
,
platform
::
DeviceContext
const
*
dev_ctx
,
rnn
::
Argument
*
arg
)
{
this
->
scope
=
&
scope
;
InitArgument
(
name
,
op
,
arg
);
CacheScopes
(
scope
,
*
arg
);
CacheInlinks
(
scope
,
arg
->
inlinks
);
CacheOutlinks
(
scope
,
arg
->
outlinks
);
this
->
dev_ctx
=
dev_ctx
;
}
void
DynamicRecurrentOp
::
ArgCache
::
InitArgument
(
const
rnn
::
ArgumentName
&
name
,
const
OperatorBase
&
op
,
rnn
::
Argument
*
arg
)
{
void
RNNAlgorithm
::
ArgCache
::
InitArgument
(
const
rnn
::
ArgumentName
&
name
,
const
OperatorBase
&
op
,
rnn
::
Argument
*
arg
)
{
rnn
::
InitArgument
(
name
,
arg
,
op
,
false
/*is_grad*/
);
}
void
DynamicRecurrentOp
::
ArgCache
::
CacheScopes
(
const
Scope
&
scope
,
const
rnn
::
Argument
&
arg
)
{
void
RNNAlgorithm
::
ArgCache
::
CacheScopes
(
const
Scope
&
scope
,
const
rnn
::
Argument
&
arg
)
{
auto
scopes_var
=
scope
.
FindVar
(
arg
.
step_scopes
);
PADDLE_ENFORCE
(
scopes_var
!=
nullptr
,
"the step_scopes output argument [%s] should be created first "
...
...
@@ -289,45 +328,85 @@ void DynamicRecurrentOp::ArgCache::CacheScopes(const Scope& scope,
this
->
scopes
=
scopes_var
->
GetMutable
<
std
::
vector
<
Scope
*>>
();
}
void
DynamicRecurrentOp
::
ArgCache
::
CacheInlinks
(
void
RNNAlgorithm
::
ArgCache
::
CacheInlinks
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
names
)
{
for
(
auto
name
:
names
)
{
auto
*
var
=
GetVariable
(
scope
,
name
);
in
link
s
[
name
]
=
var
;
in
put
s
[
name
]
=
var
;
}
}
void
DynamicRecurrentOp
::
ArgCache
::
CacheOutlinks
(
void
RNNAlgorithm
::
ArgCache
::
CacheOutlinks
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
names
)
{
for
(
auto
name
:
names
)
{
auto
*
var
=
GetVariable
(
scope
,
name
);
out
link
s
[
name
]
=
var
;
out
put
s
[
name
]
=
var
;
}
}
Variable
*
DynamicRecurrentOp
::
ArgCache
::
GetVariable
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
Variable
*
RNNAlgorithm
::
ArgCache
::
GetVariable
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
auto
*
var
=
scope
.
FindVar
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"variable [%s] not exist in scope"
,
name
);
return
var
;
}
LoDTensor
*
DynamicRecurrentOp
::
ArgCache
::
GetTensor
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
name
)
{
LoDTensor
*
RNNAlgorithm
::
ArgCache
::
GetTensor
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
name
)
{
auto
*
var
=
GetVariable
(
scope
,
name
);
return
var
->
GetMutable
<
LoDTensor
>
();
}
const
rnn
::
ArgumentName
DynamicRecurrentOp
::
kArgName
{
"step_net"
,
"step_scopes"
,
"inlinks"
,
"outlinks"
,
"memories"
,
"pre_memories"
,
"boot_memories"
};
const
std
::
array
<
rnn
::
ArgumentName
,
2
>
RNNAlgorithm
::
kArgNames
{
{
rnn
::
ArgumentName
{
"step_unit"
,
"step_scopes"
,
"inputs"
,
"outputs"
,
"states"
,
"ex_states"
,
"initial_states"
},
rnn
::
ArgumentName
{
"step_unit"
,
"step_scopes@GRAD"
,
"outputs@GRAD"
,
"inputs@GRAD"
,
"states"
,
"ex_states"
,
"initial_states@GRAD"
}}};
void
DynamicRecurrentOp
::
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
rnn
.
Run
<
RNNAlgorithm
::
ComputeMode
::
kForward
>
(
scope
,
*
dynamic_cast
<
const
OperatorBase
*>
(
this
),
dev_ctx
);
}
void
DynamicRecurrentGradientOp
::
Run
(
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{}
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
{
rnn
.
Run
<
RNNAlgorithm
::
ComputeMode
::
kBackward
>
(
scope
,
*
dynamic_cast
<
const
OperatorBase
*>
(
this
),
dev_ctx
);
}
class
DynamicRecurrentOpProtoAndCheckerMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
DynamicRecurrentOpProtoAndCheckerMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
const
auto
&
name
=
RNNAlgorithm
::
kArgNames
[
RNNAlgorithm
::
ComputeMode
::
kForward
];
// inputs and outputs stored in proto
AddInput
(
name
.
inlinks
,
"the inputs that need to be segmented for each step."
)
.
AsDuplicable
();
AddInput
(
name
.
initial_states
,
"variables to initialize states."
)
.
AsDuplicable
();
AddOutput
(
name
.
outlinks
,
"the outputs that need to concated for all steps."
)
.
AsDuplicable
();
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
// Attributes stored in AttributeMap
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
ex_states
,
"names of ex_states"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
states
,
"names of states"
);
AddComment
(
"This is a RNN operator for varience-length sequences."
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_WITHOUT_GRADIENT
(
dynamic_recurrent
,
paddle
::
operators
::
DynamicRecurrentOp
,
paddle
::
operators
::
DynamicRecurrentOpProtoAndCheckerMaker
);
REGISTER_OP
(
dynamic_recurrent
,
paddle
::
operators
::
DynamicRecurrentOp
,
paddle
::
operators
::
DynamicRecurrentOpProtoAndCheckerMaker
,
dynamic_recurrent_grad
,
paddle
::
operators
::
DynamicRecurrentGradientOp
);
paddle/operators/dynamic_recurrent_op.h
浏览文件 @
c2feab7f
...
...
@@ -27,47 +27,39 @@
namespace
paddle
{
namespace
operators
{
class
DynamicRecurrentOp
:
public
framework
::
OperatorBase
{
class
RNNAlgorithm
{
public:
static
const
rnn
::
ArgumentName
kArgName
;
enum
ComputeMode
{
kForward
=
0
,
kBackward
=
1
};
static
const
std
::
array
<
rnn
::
ArgumentName
,
2
>
kArgNames
;
using
value_type
=
float
;
DynamicRecurrentOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
DynamicRecurrentOp
(
const
DynamicRecurrentOp
&
o
)
:
framework
::
OperatorBase
(
static_cast
<
const
framework
::
OperatorBase
&>
(
o
))
{
// TODO(yuyang18): Implement copy ctor well.
PADDLE_THROW
(
"Not implemented"
);
}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
;
/*
* Different `Run` method for forward and backward, `_` is just for template
* specifialization.
*/
template
<
ComputeMode
_
>
void
Run
(
const
framework
::
Scope
&
scope
,
const
framework
::
OperatorBase
&
op
,
const
platform
::
DeviceContext
&
dev_ctx
);
/*
* Split the inputs(LoDTensors) to segments for each time step.
*/
void
SplitInputs
()
const
;
void
SplitInputs
();
/*
* Create step-scopes to store temporary outputs in each time steps.
*/
void
CreateScopes
()
const
;
void
CreateScopes
();
/*
* Link TensorArray steps to the corresponding variables located in
* step-scopes.
*/
void
WriteStepInputs
()
const
;
void
WriteStepInputs
();
/*
* Write output of each step to the corresponding TensorArray.
*/
void
WriteStepOutputs
()
const
;
void
WriteStepOutputs
();
/*
* Initialize the states, each state will have a corresponding pre-state,
...
...
@@ -75,54 +67,83 @@ class DynamicRecurrentOp : public framework::OperatorBase {
* pre-state in the first time step will be initialized with an zero tensor or
* a tensor in parent scope if is provided.
*/
void
InitStates
()
const
;
void
InitStates
();
/*
* Create state variables for each time step.
*/
void
CreateState
(
const
rnn
::
MemoryAttr
&
memory
,
size_t
step
)
const
;
void
CreateState
(
const
rnn
::
StateAttr
&
state
,
size_t
step
)
;
/*
* Link pre-state variable in current scope to the state variable in the
* previous time step (scope).
* previous time step (scope) by reference.
*/
void
LinkState
(
const
rnn
::
StateAttr
&
state
,
size_t
step
);
/*
* Link the pre-state of the first time step to the `boot-state` in parent's
* scope.
*/
void
LinkInitialState
(
const
rnn
::
StateAttr
&
state
);
/*
* Copy the gradient from `pre-state` in the first step-scope to the
* `boot-state` in parent's scope.
*/
void
ExportInitialStateGradient
(
const
rnn
::
StateAttr
&
state
);
/*
* Calculate time steps.
*/
void
LinkState
(
const
rnn
::
MemoryAttr
&
memory
,
size_t
step
)
const
;
void
RunSteps
()
;
/*
* Concatenate outputs in each time step and generate a LoDTensor.
*/
void
ConcatOutputs
()
const
;
void
ConcatOutputs
();
void
SetComputeMode
(
ComputeMode
mode
)
{
mode_
=
mode
;
}
bool
IsForward
()
const
{
return
mode_
==
ComputeMode
::
kForward
;
}
bool
IsBackward
()
const
{
return
mode_
==
ComputeMode
::
kBackward
;
}
/*
* set a step
net that is created according to a RecurrentOp's stepne
t.
* set a step
unit that is created according to a RecurrentOp's step uni
t.
*/
void
SetStep
Net
(
std
::
unique_ptr
<
OperatorBase
>
ne
t
)
{
PADDLE_ENFORCE_NOT_NULL
(
ne
t
);
step
net_
=
std
::
move
(
ne
t
);
void
SetStep
Unit
(
std
::
unique_ptr
<
framework
::
OperatorBase
>
step_uni
t
)
{
PADDLE_ENFORCE_NOT_NULL
(
step_uni
t
);
step
_unit_
=
std
::
move
(
step_uni
t
);
}
const
OperatorBase
&
GetStepNet
()
const
{
return
*
stepne
t_
;
}
const
framework
::
OperatorBase
&
GetStepUnit
()
const
{
return
*
step_uni
t_
;
}
const
framework
::
TensorArray
&
state
(
const
std
::
string
&
name
)
const
{
return
states_
[
name
];
auto
it
=
states_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
states_
.
end
());
return
it
->
second
;
}
const
framework
::
TensorArray
&
step_input
(
const
std
::
string
&
name
)
const
{
return
step_inputs_
[
name
];
auto
it
=
step_inputs_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
step_inputs_
.
end
());
return
it
->
second
;
}
const
framework
::
TensorArray
&
step_output
(
const
std
::
string
&
name
)
const
{
return
step_outputs_
[
name
];
auto
it
=
step_outputs_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
step_outputs_
.
end
());
return
it
->
second
;
}
protected:
struct
ArgCache
{
framework
::
Scope
const
*
scope
;
std
::
vector
<
framework
::
Scope
*>*
scopes
;
std
::
map
<
std
::
string
,
framework
::
Variable
*>
inlinks
;
std
::
map
<
std
::
string
,
framework
::
Variable
*>
outlinks
;
std
::
map
<
std
::
string
,
framework
::
Variable
*>
inputs
;
std
::
map
<
std
::
string
,
framework
::
Variable
*>
outputs
;
platform
::
DeviceContext
const
*
dev_ctx
;
size_t
num_steps
{
0
};
void
Init
(
const
rnn
::
ArgumentName
&
name
,
const
OperatorBase
&
op
,
const
framework
::
Scope
&
scope
,
rnn
::
Argument
*
arg
);
void
Init
(
const
rnn
::
ArgumentName
&
name
,
const
framework
::
OperatorBase
&
op
,
const
framework
::
Scope
&
scope
,
platform
::
DeviceContext
const
*
dev_ctx
,
rnn
::
Argument
*
arg
);
framework
::
Scope
&
GetScope
(
size_t
index
)
{
PADDLE_ENFORCE_LT
(
index
,
num_steps
);
...
...
@@ -133,8 +154,8 @@ class DynamicRecurrentOp : public framework::OperatorBase {
const
std
::
string
&
name
);
private:
void
InitArgument
(
const
rnn
::
ArgumentName
&
name
,
const
OperatorBase
&
op
,
rnn
::
Argument
*
arg
);
void
InitArgument
(
const
rnn
::
ArgumentName
&
name
,
const
framework
::
OperatorBase
&
op
,
rnn
::
Argument
*
arg
);
void
CacheScopes
(
const
framework
::
Scope
&
scope
,
const
rnn
::
Argument
&
arg
);
void
CacheInlinks
(
const
framework
::
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
names
);
...
...
@@ -145,27 +166,49 @@ class DynamicRecurrentOp : public framework::OperatorBase {
};
private:
std
::
unique_ptr
<
OperatorBase
>
stepne
t_
;
mutable
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
states_
;
mutable
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
step_inputs_
;
mutable
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
step_outputs_
;
mutable
std
::
map
<
std
::
string
,
std
::
vector
<
framework
::
DySeqMeta
>>
dy_seq_metas
_
;
mutable
rnn
::
Argument
arg
_
;
mutable
ArgCache
cache_
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
step_uni
t_
;
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
states_
;
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
step_inputs_
;
std
::
map
<
std
::
string
,
framework
::
TensorArray
>
step_outputs_
;
std
::
map
<
std
::
string
,
std
::
vector
<
framework
::
DySeqMeta
>>
dy_seq_metas_
;
rnn
::
Argument
arg
_
;
ArgCache
cache
_
;
ComputeMode
mode_
{
ComputeMode
::
kForward
}
;
#ifdef PADDLE_WITH_TESTING
friend
class
DynamicRecurrentOpTestHelper
;
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
SplitInputs
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
CreateCache
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
CreateScopes
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
WriteStepInputs
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
WriteStepOutputs
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
InitStates
);
FRIEND_TEST
(
DynamicRecurrentOpTestHelper
,
ConcatOutputs
);
// test forward
friend
class
RNNAlgorithmTestHelper
;
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
SplitInputs
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
CreateCache
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
CreateScopes
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
WriteStepInputs
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
WriteStepOutputs
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
InitStates
);
FRIEND_TEST
(
RNNAlgorithmTestHelper
,
ConcatOutputs
);
// TODO(superjom) test backward
#endif
};
class
DynamicRecurrentOp
:
public
framework
::
OperatorBase
{
public:
DynamicRecurrentOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
DynamicRecurrentOp
(
const
DynamicRecurrentOp
&
o
)
:
framework
::
OperatorBase
(
static_cast
<
const
framework
::
OperatorBase
&>
(
o
))
{
PADDLE_THROW
(
"Not implemented"
);
}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
;
mutable
RNNAlgorithm
rnn
;
};
class
DynamicRecurrentGradientOp
:
public
framework
::
OperatorBase
{
public:
DynamicRecurrentGradientOp
(
const
std
::
string
&
type
,
...
...
@@ -174,8 +217,16 @@ class DynamicRecurrentGradientOp : public framework::OperatorBase {
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
DynamicRecurrentGradientOp
(
const
DynamicRecurrentGradientOp
&
o
)
:
framework
::
OperatorBase
(
static_cast
<
const
framework
::
OperatorBase
&>
(
o
))
{
PADDLE_THROW
(
"Not implemented"
);
}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
;
mutable
RNNAlgorithm
rnn
;
};
}
// namespace operators
...
...
paddle/operators/dynamic_recurrent_op_test.cc
浏览文件 @
c2feab7f
...
...
@@ -43,16 +43,16 @@ LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
return
tensor
;
}
class
DynamicRecurrentOp
TestHelper
:
public
::
testing
::
Test
{
class
RNNAlgorithm
TestHelper
:
public
::
testing
::
Test
{
protected:
const
rnn
::
ArgumentName
argname
=
DynamicRecurrentOp
::
kArgName
;
const
rnn
::
ArgumentName
argname
=
RNNAlgorithm
::
kArgNames
[
0
]
;
virtual
void
SetUp
()
override
{
CreateGlobalVariables
();
auto
op_desc
=
CreateOpDesc
();
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
op_desc
,
nullptr
);
dop
=
dynamic_cast
<
DynamicRecurrentOp
*>
(
op
.
get
()
);
dop
=
&
(
dynamic_cast
<
DynamicRecurrentOp
*>
(
op
.
get
())
->
rnn
);
InitCacheManually
();
InitStepNet
();
}
...
...
@@ -63,20 +63,20 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test {
op_desc
.
set_type
(
"dynamic_recurrent"
);
OpDescNewVar
(
argname
.
inlinks
,
{
"in0"
},
op_desc
.
add_inputs
());
OpDescNewVar
(
argname
.
boot_memori
es
,
{
"boot_mem"
},
op_desc
.
add_inputs
());
OpDescNewVar
(
argname
.
initial_stat
es
,
{
"boot_mem"
},
op_desc
.
add_inputs
());
OpDescNewVar
(
argname
.
step_scopes
,
{
"step_scopes"
},
op_desc
.
add_outputs
());
OpDescNewVar
(
argname
.
outlinks
,
{
"out0"
},
op_desc
.
add_outputs
());
// set pre-
memori
es
// set pre-
stat
es
auto
pre_memories
=
op_desc
.
mutable_attrs
()
->
Add
();
pre_memories
->
set_name
(
argname
.
pre_memori
es
);
pre_memories
->
set_name
(
argname
.
ex_stat
es
);
pre_memories
->
set_type
(
paddle
::
framework
::
AttrType
::
STRINGS
);
auto
pre_memories_item
=
pre_memories
->
add_strings
();
*
pre_memories_item
=
"mem@pre"
;
// set
memori
es
// set
stat
es
auto
memories
=
op_desc
.
mutable_attrs
()
->
Add
();
memories
->
set_name
(
argname
.
memori
es
);
memories
->
set_name
(
argname
.
stat
es
);
memories
->
set_type
(
paddle
::
framework
::
AttrType
::
STRINGS
);
auto
memories_item
=
memories
->
add_strings
();
*
memories_item
=
"mem"
;
...
...
@@ -113,32 +113,33 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test {
}
void
InitCacheManually
()
{
dop
->
cache_
.
Init
(
DynamicRecurrentOp
::
kArgName
,
*
dop
,
scope
,
&
dop
->
arg_
);
dop
->
cache_
.
Init
(
RNNAlgorithm
::
kArgNames
[
0
],
*
op
,
scope
,
&
device_context
,
&
dop
->
arg_
);
}
void
InitStepNet
()
{
std
::
unique_ptr
<
framework
::
OperatorBase
>
stepnet
{
new
NetOp
};
dynamic_cast
<
NetOp
*>
(
stepnet
.
get
())
->
AppendOp
(
std
::
unique_ptr
<
TestOp
>
(
new
TestOp
(
"test"
,
{{
"in
links"
,
{
"in0"
}},
{
"boot_memori
es"
,
{
"boot_mem"
}}},
{{
"out
link
s"
,
{
"out0"
}},
{
"step_scopes"
,
{
"step_scopes"
}}},
{})));
dop
->
SetStep
Ne
t
(
std
::
move
(
stepnet
));
"test"
,
{{
"in
puts"
,
{
"in0"
}},
{
"initial_stat
es"
,
{
"boot_mem"
}}},
{{
"out
put
s"
,
{
"out0"
}},
{
"step_scopes"
,
{
"step_scopes"
}}},
{})));
dop
->
SetStep
Uni
t
(
std
::
move
(
stepnet
));
}
protected:
DynamicRecurrentOp
*
dop
;
RNNAlgorithm
*
dop
;
std
::
unique_ptr
<
framework
::
OperatorBase
>
op
;
paddle
::
platform
::
CPUDeviceContext
device_context
;
paddle
::
framework
::
Scope
scope
;
};
TEST_F
(
DynamicRecurrentOp
TestHelper
,
CreateCache
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
CreateCache
)
{
const
rnn
::
Argument
&
arg
=
dop
->
arg_
;
ASSERT_EQ
(
arg
.
inlinks
.
size
(),
1UL
);
ASSERT_EQ
(
arg
.
outlinks
.
size
(),
1UL
);
}
TEST_F
(
DynamicRecurrentOp
TestHelper
,
SplitInputs
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
SplitInputs
)
{
dop
->
SplitInputs
();
auto
&
in0_ta
=
dop
->
step_inputs_
[
"in0"
];
ASSERT_EQ
(
in0_ta
.
size
(),
4UL
);
...
...
@@ -153,14 +154,14 @@ TEST_F(DynamicRecurrentOpTestHelper, SplitInputs) {
EXPECT_EQ
(
batch3
.
dims
()[
0
],
1
);
}
TEST_F
(
DynamicRecurrentOp
TestHelper
,
CreateScopes
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
CreateScopes
)
{
dop
->
SplitInputs
();
dop
->
CreateScopes
();
ASSERT_EQ
(
dop
->
cache_
.
num_steps
,
4UL
);
ASSERT_EQ
(
dop
->
cache_
.
scopes
->
size
(),
4UL
);
}
TEST_F
(
DynamicRecurrentOp
TestHelper
,
WriteStepInputs
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
WriteStepInputs
)
{
dop
->
SplitInputs
();
dop
->
CreateScopes
();
dop
->
WriteStepInputs
();
...
...
@@ -173,7 +174,7 @@ TEST_F(DynamicRecurrentOpTestHelper, WriteStepInputs) {
}
}
TEST_F
(
DynamicRecurrentOp
TestHelper
,
WriteStepOutputs
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
WriteStepOutputs
)
{
dop
->
SplitInputs
();
dop
->
CreateScopes
();
dop
->
WriteStepInputs
();
...
...
@@ -187,11 +188,12 @@ TEST_F(DynamicRecurrentOpTestHelper, WriteStepOutputs) {
}
}
TEST_F
(
DynamicRecurrentOp
TestHelper
,
ConcatOutputs
)
{
TEST_F
(
RNNAlgorithm
TestHelper
,
ConcatOutputs
)
{
// Let's leave this test to python unittest.
}
TEST_F
(
DynamicRecurrentOpTestHelper
,
InitStates
)
{
TEST_F
(
RNNAlgorithmTestHelper
,
InitStates
)
{
dop
->
SetComputeMode
(
RNNAlgorithm
::
ComputeMode
::
kForward
);
dop
->
SplitInputs
();
dop
->
CreateScopes
();
dop
->
WriteStepInputs
();
...
...
@@ -208,12 +210,6 @@ TEST_F(DynamicRecurrentOpTestHelper, InitStates) {
auto
*
boot_state
=
scope
.
FindVar
(
"boot_mem"
);
ASSERT_TRUE
(
boot_state
!=
nullptr
);
if
(
step
==
0
)
{
// check pre_state is a reference of boot_state
ASSERT_EQ
(
boot_state
->
Get
<
LoDTensor
>
().
data
<
float
>
(),
pre_state
->
Get
<
LoDTensor
>
().
data
<
float
>
());
}
}
}
...
...
paddle/operators/gaussian_random_op.cc
浏览文件 @
c2feab7f
...
...
@@ -59,7 +59,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
static_cast
<
framework
::
DataType
>
(
Attr
<
int
>
(
"data_type"
));
return
static_cast
<
framework
::
DataType
>
(
ctx
.
Attr
<
int
>
(
"data_type"
));
}
};
...
...
paddle/operators/momentum_op.cc
浏览文件 @
c2feab7f
...
...
@@ -75,12 +75,17 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"VelocityOut"
,
"(Tensor) Output updated velocity"
);
AddAttr
<
float
>
(
"mu"
,
"(float) Momentum coefficient"
);
AddAttr
<
bool
>
(
"useNesterov"
,
"(bool) Use Nesterov Momentum"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Momentum Algorithm (momentum).
Momentum Algorithm
with a flag for Nestrov Moemntum
(momentum).
velocity = mu * velocity + gradient
param = param - learning_rate * velocity
if (use_nesterov):
param = param - gradient * learning_rate + mu * velocity * learning_rate
else:
param = param - learning_rate * velocity
)DOC"
);
}
...
...
paddle/operators/momentum_op.h
浏览文件 @
c2feab7f
...
...
@@ -34,6 +34,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
velocity_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
mu
=
ctx
.
Attr
<
float
>
(
"mu"
);
bool
use_nesterov
=
ctx
.
Attr
<
bool
>
(
"useNesterov"
);
auto
p_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
v_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
velocity_out
);
...
...
@@ -46,8 +47,14 @@ class MomentumOpKernel : public framework::OpKernel<T> {
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
grad
->
numel
());
v_out
.
device
(
place
)
=
v
*
mu
+
g
;
p_out
.
device
(
place
)
=
p
-
lr
.
broadcast
(
grad_dsize
)
*
v_out
;
if
(
use_nesterov
)
{
p_out
.
device
(
place
)
=
p
-
g
*
lr
.
broadcast
(
grad_dsize
)
+
v_out
*
mu
*
lr
.
broadcast
(
grad_dsize
);
}
else
{
p_out
.
device
(
place
)
=
p
-
lr
.
broadcast
(
grad_dsize
)
*
v_out
;
}
}
};
...
...
paddle/operators/recurrent_op.cc
浏览文件 @
c2feab7f
...
...
@@ -42,7 +42,7 @@ void RecurrentAlgorithm::Run(const Scope& scope,
for
(
size_t
step_id
=
0
;
step_id
<
seq_len
;
step_id
++
)
{
if
(
step_id
>
0
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memori
es
,
step_id
,
-
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
stat
es
,
step_id
,
-
1
);
}
(
*
stepnet_
)
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
...
...
@@ -59,7 +59,8 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
// Now all variables in scope must be created outside of op.
PADDLE_ENFORCE_NOT_NULL
(
stepnet_
);
PADDLE_ENFORCE
(
!
(
*
stepnet_
)
->
Outputs
().
empty
(),
"stepnet_ op has no outputs"
);
PADDLE_ENFORCE
(
!
(
*
stepnet_
)
->
Outputs
().
empty
(),
"step_unit_ op has no outputs"
);
if
(
seq_len
>
step_scopes
->
size
())
{
for
(
size_t
i
=
step_scopes
->
size
();
i
<
seq_len
;
++
i
)
{
...
...
@@ -86,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
}
void
RecurrentAlgorithm
::
InitMemories
(
Scope
*
step_scope
)
const
{
for
(
auto
&
attr
:
arg_
->
memori
es
)
{
for
(
auto
&
attr
:
arg_
->
stat
es
)
{
auto
*
pre_mem
=
step_scope
->
Var
(
attr
.
pre_var
)
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
"memory [%s]'s boot variable [%s] not exists"
,
attr
.
var
,
...
...
@@ -100,12 +101,12 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
}
const
rnn
::
ArgumentName
RecurrentOp
::
kArgName
{
"step_net"
,
"step_scopes"
,
"inlinks"
,
"outlink
s"
,
"
memories"
,
"pre_memories"
,
"boot_memori
es"
};
"step_net"
,
"step_scopes"
,
"inputs"
,
"output
s"
,
"
states"
,
"ex_states"
,
"initial_stat
es"
};
const
rnn
::
ArgumentName
RecurrentGradientOp
::
kArgName
{
"step_net"
,
"step_scopes@GRAD"
,
"out
links@GRAD"
,
"inlink
s@GRAD"
,
"
memories"
,
"pre_memories"
,
"boot_memori
es@GRAD"
};
"step_net"
,
"step_scopes@GRAD"
,
"out
puts@GRAD"
,
"input
s@GRAD"
,
"
states"
,
"ex_states"
,
"initial_stat
es@GRAD"
};
RecurrentOp
::
RecurrentOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
...
...
@@ -127,7 +128,7 @@ class RecurrentAlgorithmProtoAndCheckerMaker
AddInput
(
name
.
inlinks
,
"the inputs that need to be segmented for each step."
)
.
AsDuplicable
();
AddInput
(
name
.
boot_memories
,
"variables to initialize memori
es."
)
AddInput
(
name
.
initial_states
,
"variables to initialize stat
es."
)
.
AsDuplicable
();
AddOutput
(
name
.
outlinks
,
"the outputs that need to concated for all steps."
)
...
...
@@ -135,9 +136,8 @@ class RecurrentAlgorithmProtoAndCheckerMaker
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
// Attributes stored in AttributeMap
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
pre_memories
,
"names of pre-memories"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
memories
,
"names of memories"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
ex_states
,
"names of pre-states"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
states
,
"names of states"
);
AddComment
(
"This is a recurrent group operator."
);
}
...
...
@@ -152,7 +152,7 @@ void RecurrentGradientAlgorithm::Run(
rnn
::
SegmentInputs
(
step_scopes
,
arg_
->
inlinks
,
seq_len
);
for
(
int
step_id
=
seq_len
-
1
;
step_id
>=
0
;
--
step_id
)
{
if
(
static_cast
<
size_t
>
(
step_id
)
!=
seq_len
-
1
)
{
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
memori
es
,
step_id
,
1
);
rnn
::
LinkMemories
(
step_scopes
,
arg_
->
stat
es
,
step_id
,
1
);
}
(
*
stepnet_
)
->
Run
(
*
step_scopes
[
step_id
],
dev_ctx
);
}
...
...
@@ -162,7 +162,7 @@ void RecurrentGradientAlgorithm::Run(
void
RecurrentGradientAlgorithm
::
LinkBootMemoryGradients
(
Scope
*
step_scope
)
const
{
for
(
auto
&
attr
:
arg_
->
memori
es
)
{
for
(
auto
&
attr
:
arg_
->
stat
es
)
{
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
var
)
!=
nullptr
,
"memory variable [%s] does not exists"
,
attr
.
var
);
PADDLE_ENFORCE
(
step_scope
->
FindVar
(
attr
.
boot_var
)
!=
nullptr
,
...
...
paddle/operators/reduce_op.cc
浏览文件 @
c2feab7f
...
...
@@ -13,6 +13,7 @@
limitations under the License. */
#include "paddle/operators/reduce_op.h"
#include "paddle/operators/net_op.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -159,6 +160,66 @@ class ReduceMinOpMaker : public ReduceOpMaker {
}
};
class
NormOp
:
public
NetOp
{
public:
NormOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
NetOp
(
type
,
inputs
,
outputs
,
attrs
)
{
PADDLE_ENFORCE_NE
(
Input
(
"X"
),
framework
::
kEmptyVarName
,
"Input(X) of NormOp should not be null."
);
PADDLE_ENFORCE_NE
(
Output
(
"AbsOut"
),
framework
::
kEmptyVarName
,
"Output(AbsOut) of NormOp should not be null."
);
PADDLE_ENFORCE_NE
(
Output
(
"PowOut"
),
framework
::
kEmptyVarName
,
"Output(PowOut) of NormOp should not be null."
);
PADDLE_ENFORCE_NE
(
Output
(
"SumOut"
),
framework
::
kEmptyVarName
,
"Output(SumOut) of NormOp should not be null."
);
PADDLE_ENFORCE_NE
(
Output
(
"Out"
),
framework
::
kEmptyVarName
,
"Output(Out) of NormOp should not be null."
);
auto
dim
=
Attr
<
int
>
(
"dim"
);
auto
keep_dim
=
Attr
<
bool
>
(
"keep_dim"
);
auto
p
=
Attr
<
float
>
(
"p"
);
PADDLE_ENFORCE_GT
(
p
,
0
,
"Order of the norm should be positive."
);
AppendOp
(
framework
::
OpRegistry
::
CreateOp
(
"abs"
,
{{
"X"
,
{
Input
(
"X"
)}}},
{{
"Y"
,
{
Output
(
"AbsOut"
)}}},
{}));
AppendOp
(
framework
::
OpRegistry
::
CreateOp
(
"pow"
,
{{
"X"
,
{
Output
(
"AbsOut"
)}}},
{{
"Y"
,
{
Output
(
"PowOut"
)}}},
{{
"factor"
,
p
}}));
framework
::
AttributeMap
sum_attr
;
sum_attr
[
"dim"
]
=
dim
;
sum_attr
[
"keep_dim"
]
=
keep_dim
;
AppendOp
(
framework
::
OpRegistry
::
CreateOp
(
"reduce_sum"
,
{{
"X"
,
{
Output
(
"PowOut"
)}}},
{{
"Out"
,
{
Output
(
"SumOut"
)}}},
sum_attr
));
AppendOp
(
framework
::
OpRegistry
::
CreateOp
(
"pow"
,
{{
"X"
,
{
Output
(
"SumOut"
)}}},
{{
"Y"
,
{
Output
(
"Out"
)}}},
{{
"factor"
,
static_cast
<
float
>
(
1.
/
p
)}}));
CompleteAddOp
(
false
);
}
};
class
NormOpMaker
:
public
ReduceOpMaker
{
public:
NormOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
ReduceOpMaker
(
proto
,
op_checker
)
{
AddOutput
(
"AbsOut"
,
"(Tensor) The intermediate output of Norm operator, "
"saving the absolute value of the input tensor X."
)
.
AsIntermediate
();
AddOutput
(
"PowOut"
,
"(Tensor) The intermediate output of Norm operator, "
"saving the p-th power of the output tensor AbsOut."
)
.
AsIntermediate
();
AddOutput
(
"SumOut"
,
"(Tensor) the intermediate output of Norm operator, "
"saving the sum of PowOut reduced on the given dimension."
)
.
AsIntermediate
();
AddAttr
<
float
>
(
"p"
,
"(float, default 2) The order of Norm."
).
SetDefault
(
2
);
SetComment
(
"Norm"
,
"vector p-norm"
);
AddComment
(
comment_
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -176,6 +237,8 @@ REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad,
REGISTER_OP
(
reduce_min
,
ops
::
ReduceOp
,
ops
::
ReduceMinOpMaker
,
reduce_min_grad
,
ops
::
ReduceGradOp
);
REGISTER_OP_WITHOUT_GRADIENT
(
norm
,
ops
::
NormOp
,
ops
::
NormOpMaker
);
#define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
reduce_type, \
...
...
paddle/operators/rnn/recurrent_op_utils.cc
浏览文件 @
c2feab7f
...
...
@@ -36,7 +36,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
LoDTensor
*
input
=
input_var
->
GetMutable
<
LoDTensor
>
();
f
::
DDim
dims
=
input
->
dims
();
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
dims
[
0
]),
seq_len
,
"all the in
link
s be the same length"
);
"all the in
put
s be the same length"
);
f
::
DDim
step_dims
=
slice_ddim
(
dims
,
1
,
dims
.
size
());
for
(
size_t
j
=
0
;
j
<
seq_len
;
j
++
)
{
Tensor
*
step_input
=
...
...
@@ -78,7 +78,7 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
}
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
scopes
,
const
std
::
vector
<
rnn
::
Memory
Attr
>&
memories
,
const
std
::
vector
<
rnn
::
State
Attr
>&
memories
,
const
size_t
step_id
,
const
int
offset
)
{
PADDLE_ENFORCE_LT
(
step_id
,
scopes
.
size
(),
"step [%d] is out of range of step scopes' size [%d]"
,
...
...
@@ -106,26 +106,26 @@ void InitArgument(const ArgumentName& name, Argument* arg,
arg
->
inlinks
=
op
.
Inputs
(
name
.
inlinks
);
arg
->
outlinks
=
op
.
Outputs
(
name
.
outlinks
);
auto
&
boot_memories
=
is_grad
?
op
.
Outputs
(
name
.
boot_memories
)
:
op
.
Inputs
(
name
.
boot_memori
es
);
auto
&
boot_memories
=
is_grad
?
op
.
Outputs
(
name
.
initial_states
)
:
op
.
Inputs
(
name
.
initial_stat
es
);
// attributes
auto
&
memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
memori
es
);
auto
&
pre_memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
pre_memori
es
);
auto
&
memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
stat
es
);
auto
&
pre_memories
=
op
.
Attr
<
std
::
vector
<
std
::
string
>>
(
name
.
ex_stat
es
);
PADDLE_ENFORCE
(
memories
.
size
()
==
boot_memories
.
size
(),
"the size of
memories, boot_memori
es don't match:%d,%d"
,
"the size of
states, initial_stat
es don't match:%d,%d"
,
memories
.
size
(),
boot_memories
.
size
());
PADDLE_ENFORCE
(
pre_memories
.
size
()
==
boot_memories
.
size
(),
"the size of
pre_memories, boot_memori
es don't match:%d,%d"
,
"the size of
ex_states, initial_stat
es don't match:%d,%d"
,
pre_memories
.
size
(),
boot_memories
.
size
());
PADDLE_ENFORCE
(
memories
.
size
()
>
0
,
"more than 1
memori
es should be set"
);
PADDLE_ENFORCE
(
memories
.
size
()
>
0
,
"more than 1
stat
es should be set"
);
for
(
size_t
i
=
0
;
i
<
memories
.
size
();
++
i
)
{
rnn
::
Memory
Attr
mem_attr
;
rnn
::
State
Attr
mem_attr
;
mem_attr
.
var
=
memories
[
i
];
mem_attr
.
pre_var
=
pre_memories
[
i
];
mem_attr
.
boot_var
=
boot_memories
[
i
];
(
arg
->
memori
es
).
push_back
(
mem_attr
);
(
arg
->
stat
es
).
push_back
(
mem_attr
);
}
}
...
...
paddle/operators/rnn/recurrent_op_utils.h
浏览文件 @
c2feab7f
...
...
@@ -31,7 +31,7 @@ using Scope = framework::Scope;
* boot memories in father scope. Other attributes are copied from Op's proto
* attributes.
*/
struct
Memory
Attr
{
struct
State
Attr
{
// name of current state variable
std
::
string
var
;
// name of previous step's state variable
...
...
@@ -46,7 +46,7 @@ struct Argument {
std
::
string
step_scopes
;
std
::
vector
<
std
::
string
>
inlinks
;
std
::
vector
<
std
::
string
>
outlinks
;
std
::
vector
<
rnn
::
MemoryAttr
>
memori
es
;
std
::
vector
<
rnn
::
StateAttr
>
stat
es
;
};
struct
ArgumentName
{
...
...
@@ -54,9 +54,9 @@ struct ArgumentName {
std
::
string
step_scopes
;
std
::
string
inlinks
;
std
::
string
outlinks
;
std
::
string
memories
;
// the memory name
std
::
string
pre_memories
;
// the previous memory name
std
::
string
boot_memori
es
;
// the boot memory name
std
::
string
states
;
// the memory name
std
::
string
ex_states
;
// the previous memory name
std
::
string
initial_stat
es
;
// the boot memory name
};
/**
...
...
@@ -74,7 +74,7 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const
size_t
seq_len
,
const
platform
::
DeviceContext
&
ctx
);
void
LinkMemories
(
const
std
::
vector
<
Scope
*>&
step_scopes
,
const
std
::
vector
<
Memory
Attr
>&
memories
,
const
size_t
step_id
,
const
std
::
vector
<
State
Attr
>&
memories
,
const
size_t
step_id
,
const
int
offset
);
void
InitArgument
(
const
ArgumentName
&
name
,
Argument
*
arg
,
...
...
paddle/operators/uniform_random_op.cc
浏览文件 @
c2feab7f
...
...
@@ -65,7 +65,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
protected:
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
static_cast
<
framework
::
DataType
>
(
Attr
<
int
>
(
"data_type"
));
return
static_cast
<
framework
::
DataType
>
(
ctx
.
Attr
<
int
>
(
"data_type"
));
}
};
...
...
paddle/pybind/pybind.cc
浏览文件 @
c2feab7f
...
...
@@ -413,18 +413,18 @@ All parameter, weight, gradient are variables in Paddle.
return
static_cast
<
operators
::
DynamicRecurrentOp
*>
(
rnn_op
.
release
());
})
.
def
(
"set_step
ne
t"
,
.
def
(
"set_step
_uni
t"
,
[](
operators
::
DynamicRecurrentOp
&
self
,
const
operators
::
NetOp
&
net
)
->
void
{
self
.
SetStepNe
t
(
net
.
Clone
());
})
->
void
{
self
.
rnn
.
SetStepUni
t
(
net
.
Clone
());
})
.
def
(
"get_state"
,
[](
operators
::
DynamicRecurrentOp
&
self
,
const
std
::
string
&
name
)
->
const
TensorArray
&
{
return
self
.
state
(
name
);
})
->
const
TensorArray
&
{
return
self
.
rnn
.
state
(
name
);
})
.
def
(
"get_step_input"
,
[](
operators
::
DynamicRecurrentOp
&
self
,
const
std
::
string
&
name
)
->
const
TensorArray
&
{
return
self
.
step_input
(
name
);
})
->
const
TensorArray
&
{
return
self
.
rnn
.
step_input
(
name
);
})
.
def
(
"get_step_output"
,
[](
operators
::
DynamicRecurrentOp
&
self
,
const
std
::
string
&
name
)
->
const
TensorArray
&
{
return
self
.
step_output
(
name
);
});
->
const
TensorArray
&
{
return
self
.
rnn
.
step_output
(
name
);
});
// cond_op
py
::
class_
<
operators
::
CondOp
,
OperatorBase
>
(
m
,
"CondOp"
)
...
...
@@ -466,6 +466,8 @@ All parameter, weight, gradient are variables in Paddle.
BindVarDsec
(
m
);
BindOpDesc
(
m
);
m
.
def
(
"op_support_gpu"
,
OpSupportGPU
);
return
m
.
ptr
();
}
}
// namespace pybind
...
...
paddle/scripts/docker/build.sh
浏览文件 @
c2feab7f
...
...
@@ -141,10 +141,17 @@ RUN sed -i '${APT_MIRROR}' /etc/apt/sources.list
EOF
fi
if
[[
${
WITH_GPU
}
==
"ON"
]]
;
then
NCCL_DEPS
=
"apt-get install -y libnccl-dev &&"
else
NCCL_DEPS
=
""
fi
cat
>>
/paddle/build/Dockerfile
<<
EOF
ADD python/dist/*.whl /
# run paddle version to install python packages first
RUN apt-get update &&
\
${
NCCL_DEPS
}
\
apt-get install -y wget python-pip && pip install -U pip &&
\
pip install /*.whl; apt-get install -f -y &&
\
apt-get clean -y &&
\
...
...
paddle/trainer/tests/sample_trainer_config_branch_net.conf
浏览文件 @
c2feab7f
...
...
@@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
256
,
settings
(
batch_size
=
128
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
...
...
@@ -44,10 +44,11 @@ a2 = img_conv_layer(input=tmp,
shared_biases
=
True
,
act
=
ReluActivation
())
tmp
=
concat_layer
(
input
=[
a1
,
a2
])
tmp
=
addto_layer
(
input
=[
a1
,
a2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
64
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
...
...
@@ -55,35 +56,34 @@ tmp = img_pool_layer(input=tmp,
b1
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
3
,
num_filters
=
64
,
num_filters
=
32
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b1
=
img_pool_layer
(
input
=
b1
,
pool_size
=
3
,
stride
=
1
,
padding
=
1
,
stride
=
2
,
padding
=
0
,
pool_type
=
MaxPooling
())
b2
=
img_conv_layer
(
input
=
tmp
,
filter_size
=
5
,
filter_size
=
3
,
num_filters
=
64
,
padding
=
2
,
padding
=
1
,
shared_biases
=
True
,
act
=
ReluActivation
())
b2
=
img_pool_layer
(
input
=
b2
,
pool_size
=
5
,
stride
=
1
,
padding
=
2
,
stride
=
2
,
padding
=
1
,
pool_type
=
MaxPooling
())
tmp
=
addto_layer
(
input
=[
b1
,
b2
],
act
=
ReluActivation
(),
bias_attr
=
False
)
tmp
=
concat_layer
(
input
=[
b1
,
b2
])
tmp
=
img_pool_layer
(
input
=
tmp
,
num_channels
=
96
,
pool_size
=
3
,
stride
=
2
,
padding
=
1
,
...
...
paddle/trainer/tests/sample_trainer_config_simple_net.conf
浏览文件 @
c2feab7f
...
...
@@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData
(
ProtoData
(
files
=
"trainer/tests/mnist.list"
))
################################### Algorithm Configuration ###################################
settings
(
batch_size
=
1
000
,
settings
(
batch_size
=
1
28
,
learning_method
=
MomentumOptimizer
(
momentum
=
0
.
5
,
sparse
=
False
))
################################### Network Configuration ###################################
data
=
data_layer
(
name
=
"input"
,
size
=
784
)
...
...
python/paddle/v2/framework/nets.py
浏览文件 @
c2feab7f
...
...
@@ -7,18 +7,21 @@ def simple_img_conv_pool(input,
pool_size
,
pool_stride
,
act
,
program
=
None
):
program
=
None
,
init_program
=
None
):
conv_out
=
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
act
=
act
,
program
=
program
)
program
=
program
,
init_program
=
init_program
)
pool_out
=
layers
.
pool2d
(
input
=
conv_out
,
pool_size
=
pool_size
,
pool_type
=
'max'
,
pool_stride
=
pool_stride
,
program
=
program
)
program
=
program
,
init_program
=
init_program
)
return
pool_out
python/paddle/v2/framework/optimizer.py
浏览文件 @
c2feab7f
import
paddle.v2.framework.framework
as
framework
from
collections
import
defaultdict
__all__
=
[
'SGDOptimizer'
,
'MomentumOptimizer'
]
__all__
=
[
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
]
class
Optimizer
(
object
):
...
...
@@ -272,3 +272,60 @@ class MomentumOptimizer(Optimizer):
attrs
=
{
"mu"
:
self
.
_momentum
})
return
momentum_op
class
AdagradOptimizer
(
Optimizer
):
"""Simple Adagrad optimizer with moment state
"""
_moment_acc_str
=
"moment"
def
__init__
(
self
,
learning_rate
,
epsilon
=
1.0e-6
):
assert
learning_rate
is
not
None
assert
epsilon
is
not
None
super
(
AdagradOptimizer
,
self
).
__init__
()
self
.
type
=
"adagrad"
self
.
_learning_rate
=
learning_rate
self
.
_epsilon
=
epsilon
def
_initialize_tensors
(
self
,
block
):
assert
isinstance
(
block
,
framework
.
Block
)
lr_shape
=
[
1
]
# create a variable for learning_rate
self
.
_lr
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
lr_shape
,
lod_level
=
0
)
# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
block
.
append_op
(
type
=
"fill_constant"
,
outputs
=
{
"Out"
:
self
.
_lr
},
attrs
=
{
"shape"
:
lr_shape
,
"value"
:
self
.
_learning_rate
})
def
_create_accumulators
(
self
,
block
,
parameters
):
assert
isinstance
(
block
,
framework
.
Block
)
for
p
in
parameters
:
self
.
_add_accumulator
(
block
,
self
.
_moment_acc_str
,
p
,
'float32'
)
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
moment_acc
=
self
.
_get_accumulator
(
self
.
_moment_acc_str
,
param_and_grad
[
0
])
# create the adagrad optimizer op
adagrad_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"Moment"
:
moment_acc
,
"LearningRate"
:
self
.
_lr
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"MomentOut"
:
moment_acc
},
attrs
=
{
"epsilon"
:
self
.
_epsilon
})
return
adagrad_op
python/paddle/v2/framework/tests/test_cross_entropy_op.py
浏览文件 @
c2feab7f
...
...
@@ -21,7 +21,7 @@ class TestCrossEntropyOp1(OpTest):
self
.
inputs
=
{
"X"
:
X
,
"Label"
:
label
}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
self
.
attrs
=
{
"soft
L
abel"
:
False
}
self
.
attrs
=
{
"soft
_l
abel"
:
False
}
def
test_check_output
(
self
):
self
.
check_output
()
...
...
python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py
浏览文件 @
c2feab7f
...
...
@@ -4,6 +4,12 @@ import unittest
from
paddle.v2.framework.op
import
Operator
,
DynamicRecurrentOp
import
numpy
as
np
# for siplicity, just one level LoD
lod_py
=
[[
0
,
4
,
7
,
9
,
10
]]
input_dim
=
30
num_sents
=
len
(
lod_py
[
0
])
-
1
weight_dim
=
15
def
create_tensor
(
scope
,
name
,
shape
,
np_data
):
tensor
=
scope
.
var
(
name
).
get_tensor
()
...
...
@@ -12,6 +18,17 @@ def create_tensor(scope, name, shape, np_data):
return
tensor
class
PyRNNStep
(
object
):
def
__init__
(
self
):
self
.
x
=
np
.
random
.
normal
(
size
=
(
lod_py
[
0
][
-
1
],
input_dim
)).
astype
(
"float32"
)
self
.
W
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
)).
astype
(
"float32"
)
self
.
U
=
np
.
random
.
normal
(
size
=
(
input_dim
,
input_dim
)).
astype
(
"float32"
)
self
.
h_boot
=
np
.
random
.
normal
(
size
=
(
num_sents
,
input_dim
)).
astype
(
"float32"
)
class
DynamicRecurrentOpTest
(
unittest
.
TestCase
):
'''
Test RNNOp
...
...
@@ -23,17 +40,13 @@ class DynamicRecurrentOpTest(unittest.TestCase):
- U
vars:
- x
memori
es:
stat
es:
- h
outputs:
- h
'''
# for siplicity, just one level LoD
lod_py
=
[[
0
,
4
,
7
,
9
,
10
]]
input_dim
=
30
num_sents
=
len
(
lod_py
[
0
])
-
1
weight_dim
=
15
py
=
PyRNNStep
()
def
forward
(
self
):
self
.
scope
=
core
.
Scope
()
...
...
@@ -42,64 +55,55 @@ class DynamicRecurrentOpTest(unittest.TestCase):
self
.
create_step_net
()
ctx
=
core
.
DeviceContext
.
create
(
core
.
CPUPlace
())
self
.
rnnop
.
run
(
self
.
scope
,
ctx
)
state
=
self
.
rnnop
.
get_state
(
"h@
mem
"
)
state
=
self
.
rnnop
.
get_state
(
"h@
state
"
)
print
'state size: '
,
state
.
size
()
step_inputs
=
self
.
rnnop
.
get_step_input
(
"x"
)
print
"x size "
,
step_inputs
.
size
()
for
i
in
range
(
step_inputs
.
size
()):
print
"x %d"
%
i
,
np
.
array
(
step_inputs
.
read
(
i
).
get_dims
())
step_outputs
=
self
.
rnnop
.
get_step_output
(
'h@
mem
'
)
step_outputs
=
self
.
rnnop
.
get_step_output
(
'h@
state
'
)
print
'step_outputs.size '
,
step_outputs
.
size
()
output
=
self
.
scope
.
find_var
(
"h@mem"
).
get_tensor
()
output
=
self
.
scope
.
find_var
(
"h@state"
).
get_tensor
()
print
'output'
,
np
.
array
(
output
).
shape
def
create_global_variables
(
self
):
x
=
np
.
random
.
normal
(
size
=
(
self
.
lod_py
[
0
][
-
1
],
self
.
input_dim
)).
astype
(
"float32"
)
W
=
np
.
random
.
normal
(
size
=
(
self
.
input_dim
,
self
.
input_dim
)).
astype
(
"float32"
)
U
=
np
.
random
.
normal
(
size
=
(
self
.
input_dim
,
self
.
input_dim
)).
astype
(
"float32"
)
h_boot
=
np
.
random
.
normal
(
size
=
(
self
.
num_sents
,
self
.
input_dim
)).
astype
(
"float32"
)
# create inlink
x_tensor
=
create_tensor
(
self
.
scope
,
"x"
,
[
self
.
num_sents
,
self
.
input_dim
],
x
)
x_tensor
.
set_lod
(
self
.
lod_py
)
create_tensor
(
self
.
scope
,
"W"
,
[
self
.
input_dim
,
self
.
input_dim
],
W
)
create_tensor
(
self
.
scope
,
"U"
,
[
self
.
input_dim
,
self
.
input_dim
],
U
)
create_tensor
(
self
.
scope
,
"h_boot"
,
[
self
.
num_sents
,
self
.
input_dim
],
h_boot
)
x_tensor
=
create_tensor
(
self
.
scope
,
"x"
,
[
num_sents
,
input_dim
],
self
.
py
.
x
)
x_tensor
.
set_lod
(
lod_py
)
create_tensor
(
self
.
scope
,
"W"
,
[
input_dim
,
input_dim
],
self
.
py
.
W
)
create_tensor
(
self
.
scope
,
"U"
,
[
input_dim
,
input_dim
],
self
.
py
.
U
)
create_tensor
(
self
.
scope
,
"h_boot"
,
[
num_sents
,
input_dim
],
self
.
py
.
h_boot
)
self
.
scope
.
var
(
"step_scopes"
)
self
.
scope
.
var
(
"h@
mem
"
)
self
.
scope
.
var
(
"h@
state
"
)
def
create_rnn_op
(
self
):
# create RNNOp
self
.
rnnop
=
DynamicRecurrentOp
(
# inputs
in
link
s
=
[
"x"
],
boot_memori
es
=
[
"h_boot"
],
step_net
=
"step
ne
t"
,
in
put
s
=
[
"x"
],
initial_stat
es
=
[
"h_boot"
],
step_net
=
"step
_uni
t"
,
# outputs
out
links
=
[
"h@mem
"
],
out
puts
=
[
"h@state
"
],
step_scopes
=
"step_scopes"
,
# attributes
pre_memori
es
=
[
"h@pre"
],
memories
=
[
"h@mem
"
])
ex_stat
es
=
[
"h@pre"
],
states
=
[
"h@state
"
])
def
create_step_net
(
self
):
step
ne
t
=
core
.
Net
.
create
()
step
_uni
t
=
core
.
Net
.
create
()
x_fc_op
=
Operator
(
"mul"
,
X
=
"x"
,
Y
=
"W"
,
Out
=
"Wx"
)
h_fc_op
=
Operator
(
"mul"
,
X
=
"h@pre"
,
Y
=
"U"
,
Out
=
"Uh"
)
sum_op
=
Operator
(
"sum"
,
X
=
[
"Wx"
,
"Uh"
],
Out
=
"sum"
)
sig_op
=
Operator
(
"sigmoid"
,
X
=
"sum"
,
Y
=
"h@
mem
"
)
sig_op
=
Operator
(
"sigmoid"
,
X
=
"sum"
,
Y
=
"h@
state
"
)
for
op
in
[
x_fc_op
,
h_fc_op
,
sum_op
,
sig_op
]:
step
ne
t
.
append_op
(
op
)
step
ne
t
.
complete_add_op
(
True
)
self
.
rnnop
.
set_step
net
(
stepne
t
)
step
_uni
t
.
append_op
(
op
)
step
_uni
t
.
complete_add_op
(
True
)
self
.
rnnop
.
set_step
_unit
(
step_uni
t
)
def
test_forward
(
self
):
print
'test recurrent op forward'
...
...
@@ -107,5 +111,58 @@ class DynamicRecurrentOpTest(unittest.TestCase):
print
'pd_output'
,
pd_output
class
RecurrentGradientOpTest
(
unittest
.
TestCase
):
py
=
PyRNNStep
()
def
create_forward_op
(
self
):
# create RNNOp
self
.
forward_op
=
DynamicRecurrentOp
(
# inputs
inputs
=
[
"x"
],
initial_states
=
[
"h_boot"
],
step_net
=
"step_unit"
,
# outputs
outputs
=
[
"h@state"
],
step_scopes
=
"step_scopes"
,
# attributes
ex_states
=
[
"h@pre"
],
states
=
[
"h@state"
])
def
create_gradient_op
(
self
):
a
=
set
()
backward_op
=
core
.
DynamicRecurrentOp
.
backward
(
self
.
forward_op
,
a
)
def
create_step_net
(
self
):
step_unit
=
core
.
Net
.
create
()
x_fc_op
=
Operator
(
"mul"
,
X
=
"x"
,
Y
=
"W"
,
Out
=
"Wx"
)
h_fc_op
=
Operator
(
"mul"
,
X
=
"h@pre"
,
Y
=
"U"
,
Out
=
"Uh"
)
sum_op
=
Operator
(
"sum"
,
X
=
[
"Wx"
,
"Uh"
],
Out
=
"sum"
)
sig_op
=
Operator
(
"sigmoid"
,
X
=
"sum"
,
Y
=
"h@state"
)
for
op
in
[
x_fc_op
,
h_fc_op
,
sum_op
,
sig_op
]:
step_unit
.
append_op
(
op
)
step_unit
.
complete_add_op
(
True
)
self
.
forward_op
.
set_step_unit
(
step_unit
)
def
create_global_variables
(
self
):
# create inlink
x_tensor
=
create_tensor
(
self
.
scope
,
"x"
,
[
num_sents
,
input_dim
],
self
.
py
.
x
)
x_tensor
.
set_lod
(
lod_py
)
create_tensor
(
self
.
scope
,
"W"
,
[
input_dim
,
input_dim
],
self
.
py
.
W
)
create_tensor
(
self
.
scope
,
"U"
,
[
input_dim
,
input_dim
],
self
.
py
.
U
)
create_tensor
(
self
.
scope
,
"h_boot"
,
[
num_sents
,
input_dim
],
self
.
py
.
h_boot
)
self
.
scope
.
var
(
"step_scopes"
)
self
.
scope
.
var
(
"h@state"
)
def
test_grad
(
self
):
self
.
scope
=
core
.
Scope
()
self
.
create_forward_op
()
self
.
create_global_variables
()
self
.
create_step_net
()
self
.
create_gradient_op
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_momentum_op.py
浏览文件 @
c2feab7f
...
...
@@ -3,7 +3,7 @@ import numpy as np
from
op_test
import
OpTest
class
TestMomentumOp
(
OpTest
):
class
TestMomentumOp
1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"momentum"
...
...
@@ -12,6 +12,7 @@ class TestMomentumOp(OpTest):
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
mu
=
0.0001
use_nesterov
=
False
self
.
inputs
=
{
'Param'
:
param
,
...
...
@@ -23,7 +24,47 @@ class TestMomentumOp(OpTest):
self
.
attrs
=
{
'mu'
:
mu
}
velocity_out
=
mu
*
velocity
+
grad
param_out
=
param
-
learning_rate
*
velocity_out
if
use_nesterov
:
param_out
=
param
-
grad
*
learning_rate
+
\
velocity_out
*
mu
*
learning_rate
else
:
param_out
=
param
-
learning_rate
*
velocity_out
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestMomentumOp2
(
OpTest
):
'''Test Momentum with defaukt values for attributes
'''
def
setUp
(
self
):
self
.
op_type
=
"momentum"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
mu
=
0.0001
use_nesterov
=
True
self
.
inputs
=
{
'Param'
:
param
,
'Grad'
:
grad
,
'Velocity'
:
velocity
,
'LearningRate'
:
learning_rate
}
self
.
attrs
=
{
'mu'
:
mu
,
'useNesterov'
:
use_nesterov
}
velocity_out
=
mu
*
velocity
+
grad
if
use_nesterov
:
param_out
=
param
-
grad
*
learning_rate
+
\
velocity_out
*
mu
*
learning_rate
else
:
param_out
=
param
-
learning_rate
*
velocity_out
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
...
...
python/paddle/v2/framework/tests/test_op_support_gpu.py
0 → 100644
浏览文件 @
c2feab7f
import
unittest
import
paddle.v2.framework.core
as
core
class
TestOpSupportGPU
(
unittest
.
TestCase
):
def
test_case
(
self
):
self
.
assertEqual
(
core
.
is_compile_gpu
(),
core
.
op_support_gpu
(
"sum"
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_optimizer.py
浏览文件 @
c2feab7f
...
...
@@ -69,5 +69,46 @@ class TestMomentumOptimizer(unittest.TestCase):
self
.
assertTrue
(
mul_x
.
name
in
velocity_acc
)
class
TestAdagradOptimizer
(
unittest
.
TestCase
):
class
MockAdagrad
(
optimizer
.
AdagradOptimizer
):
def
get_accumulators
(
self
):
return
self
.
_accumulators
def
get_moment_str
(
self
):
return
self
.
_moment_acc_str
def
test_adagrad_optimizer
(
self
):
program
=
framework
.
Program
()
block
=
program
.
global_block
()
mul_x
=
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
)
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
mul_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"mul.out"
)
block
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
mul_x
,
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
mul_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
adagrad_optimizer
=
self
.
MockAdagrad
(
learning_rate
=
0.01
,
epsilon
=
1.0e-6
)
params_grads
=
adagrad_optimizer
.
create_backward_pass
(
mul_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
adagrad_optimizer
.
get_accumulators
()),
0
)
opts
=
adagrad_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
)
self
.
assertEqual
(
len
(
opts
),
1
)
adagrad_op
=
opts
[
0
]
self
.
assertEqual
(
adagrad_op
.
type
,
"adagrad"
)
# check accumulators
accumulators
=
adagrad_optimizer
.
get_accumulators
()
self
.
assertEqual
(
len
(
accumulators
),
1
)
self
.
assertTrue
(
adagrad_optimizer
.
get_moment_str
()
in
accumulators
)
moment_acc
=
accumulators
[
adagrad_optimizer
.
get_moment_str
()]
self
.
assertEqual
(
len
(
moment_acc
),
1
)
self
.
assertTrue
(
mul_x
.
name
in
moment_acc
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_recognize_digits_conv.py
0 → 100644
浏览文件 @
c2feab7f
import
paddle.v2
as
paddle
import
paddle.v2.framework.layers
as
layers
import
paddle.v2.framework.nets
as
nets
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.optimizer
as
optimizer
from
paddle.v2.framework.framework
import
Program
,
g_program
from
paddle.v2.framework.executor
import
Executor
import
numpy
as
np
init_program
=
Program
()
program
=
Program
()
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
data_type
=
'float32'
,
program
=
program
,
init_program
=
init_program
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int32'
,
program
=
program
,
init_program
=
init_program
)
conv_pool_1
=
nets
.
simple_img_conv_pool
(
input
=
images
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
,
program
=
program
,
init_program
=
init_program
)
conv_pool_2
=
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
,
program
=
program
,
init_program
=
init_program
)
predict
=
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
"softmax"
,
program
=
program
,
init_program
=
init_program
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
,
init_program
=
init_program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
50
PASS_NUM
=
1
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
init_program
,
feed
=
{},
fetch_list
=
[])
for
pass_id
in
range
(
PASS_NUM
):
count
=
0
for
data
in
train_reader
():
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
([
1
,
28
,
28
]),
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int32"
)
y_data
=
y_data
.
reshape
([
BATCH_SIZE
,
1
])
tensor_img
=
core
.
LoDTensor
()
tensor_y
=
core
.
LoDTensor
()
tensor_img
.
set
(
img_data
,
place
)
tensor_y
.
set
(
y_data
,
place
)
outs
=
exe
.
run
(
program
,
feed
=
{
"pixel"
:
tensor_img
,
"label"
:
tensor_y
},
fetch_list
=
[
avg_cost
])
loss
=
np
.
array
(
outs
[
0
])
if
loss
<
10.0
:
exit
(
0
)
# if avg cost less than 10.0, we think our code is good.
exit
(
1
)
python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
0 → 100644
浏览文件 @
c2feab7f
import
paddle.v2
as
paddle
import
paddle.v2.framework.layers
as
layers
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.optimizer
as
optimizer
from
paddle.v2.framework.framework
import
Program
,
g_program
from
paddle.v2.framework.executor
import
Executor
import
numpy
as
np
init_program
=
Program
()
program
=
Program
()
image
=
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
data_type
=
'float32'
,
program
=
program
,
init_program
=
init_program
)
hidden1
=
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
,
program
=
program
,
init_program
=
init_program
)
hidden2
=
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
,
program
=
program
,
init_program
=
init_program
)
predict
=
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
,
program
=
program
,
init_program
=
init_program
)
label
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
data_type
=
'int32'
,
program
=
program
,
init_program
=
init_program
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
,
init_program
=
init_program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
,
init_program
=
init_program
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
init_program
,
feed
=
{},
fetch_list
=
[])
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
x_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
],
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int32"
)
y_data
=
np
.
expand_dims
(
y_data
,
axis
=
1
)
tensor_x
=
core
.
LoDTensor
()
tensor_x
.
set
(
x_data
,
place
)
tensor_y
=
core
.
LoDTensor
()
tensor_y
.
set
(
y_data
,
place
)
outs
=
exe
.
run
(
program
,
feed
=
{
'x'
:
tensor_x
,
'y'
:
tensor_y
},
fetch_list
=
[
avg_cost
])
out
=
np
.
array
(
outs
[
0
])
if
out
[
0
]
<
5.0
:
exit
(
0
)
# if avg cost less than 5.0, we think our code is good.
exit
(
1
)
python/paddle/v2/framework/tests/test_recurrent_op.py
浏览文件 @
c2feab7f
...
...
@@ -132,15 +132,15 @@ class RecurrentOpTest(unittest.TestCase):
# create RNNOp
self
.
rnnop
=
RecurrentOp
(
# inputs
in
link
s
=
[
"x"
],
boot_memori
es
=
[
"h_boot"
],
in
put
s
=
[
"x"
],
initial_stat
es
=
[
"h_boot"
],
step_net
=
"stepnet"
,
# outputs
out
link
s
=
[
"h@mem"
],
out
put
s
=
[
"h@mem"
],
step_scopes
=
"step_scopes"
,
# attributes
pre_memori
es
=
[
"h@pre"
],
memori
es
=
[
"h@mem"
])
ex_stat
es
=
[
"h@pre"
],
stat
es
=
[
"h@mem"
])
def
create_step_net
(
self
):
stepnet
=
core
.
Net
.
create
()
...
...
@@ -169,15 +169,15 @@ class RecurrentGradientOpTest(unittest.TestCase):
def
create_forward_op
(
self
):
self
.
forward_op
=
RecurrentOp
(
# inputs
in
link
s
=
[
"x"
],
boot_memori
es
=
[
"h_boot"
],
in
put
s
=
[
"x"
],
initial_stat
es
=
[
"h_boot"
],
step_net
=
"stepnet"
,
# outputs
out
link
s
=
[
"h"
],
out
put
s
=
[
"h"
],
step_scopes
=
"step_scopes"
,
# attributes
pre_memori
es
=
[
"h@pre"
],
memori
es
=
[
"h@alias"
])
ex_stat
es
=
[
"h@pre"
],
stat
es
=
[
"h@alias"
])
# create a stepnet for RNN
stepnet
=
core
.
Net
.
create
()
...
...
python/paddle/v2/framework/tests/test_reduce_op.py
浏览文件 @
c2feab7f
...
...
@@ -85,5 +85,33 @@ class Test1DReduce(OpTest):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestNorm
(
OpTest
):
def
setUp
(
self
):
# use x away from 0 to avoid errors of numerical gradient when gradient near 0
x
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
+
0.2
p
=
2
dim
=
1
keep_dim
=
False
abs_out
=
np
.
absolute
(
x
)
pow_out
=
np
.
power
(
x
,
p
)
sum_out
=
np
.
sum
(
pow_out
,
axis
=
dim
,
keepdims
=
keep_dim
)
out
=
np
.
power
(
sum_out
,
1.
/
p
)
self
.
op_type
=
"norm"
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
"p"
:
p
,
"dim"
:
dim
,
"keep_dim"
:
keep_dim
}
self
.
outputs
=
{
"AbsOut"
:
abs_out
,
"PowOut"
:
pow_out
,
"SumOut"
:
sum_out
,
"Out"
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.01
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_rmsprop_op.py
浏览文件 @
c2feab7f
...
...
@@ -46,7 +46,7 @@ class TestRmspropOp1(OpTest):
class
TestRmspropOp2
(
OpTest
):
'''Test RMSProp with defau
k
t values for attributes
'''Test RMSProp with defau
l
t values for attributes
'''
def
setUp
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录