Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
552c9012
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
552c9012
编写于
1月 23, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enable backward computation in lstmp_op
上级
f2c4bb67
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
206 addition
and
57 deletion
+206
-57
paddle/operators/lstmp_op.cc
paddle/operators/lstmp_op.cc
+38
-15
paddle/operators/lstmp_op.cu
paddle/operators/lstmp_op.cu
+0
-0
paddle/operators/lstmp_op.h
paddle/operators/lstmp_op.h
+128
-23
python/paddle/v2/fluid/tests/test_lstmp_op.py
python/paddle/v2/fluid/tests/test_lstmp_op.py
+40
-19
未找到文件。
paddle/operators/lstmp_op.cc
浏览文件 @
552c9012
...
...
@@ -39,21 +39,12 @@ class LSTMPOp : public framework::OperatorWithKernel {
"Output(BatchGate) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchCellPreAct"
),
"Output(BatchGate) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchHidden"
),
"Output(BatchHidden) of LSTMP should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
"Input(X)'s rank must be 2."
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
"Input(C0) and Input(H0) of LSTMP should not "
"be null at the same time."
);
auto
h_dims
=
ctx
->
GetInputDim
(
"H0"
);
auto
c_dims
=
ctx
->
GetInputDim
(
"C0"
);
PADDLE_ENFORCE
(
h_dims
==
c_dims
,
"The dimension of Input(H0) and Input(C0) "
"should be the same."
);
}
int
frame_size
=
in_dims
[
1
]
/
4
;
auto
w_dims
=
ctx
->
GetInputDim
(
"Weight"
);
auto
proj_dims
=
ctx
->
GetInputDim
(
"ProjWeight"
);
...
...
@@ -75,6 +66,18 @@ class LSTMPOp : public framework::OperatorWithKernel {
"should be %d."
,
frame_size
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
"Input(C0) and Input(H0) of LSTMP should not "
"be null at the same time."
);
auto
h_dims
=
ctx
->
GetInputDim
(
"H0"
);
auto
c_dims
=
ctx
->
GetInputDim
(
"C0"
);
PADDLE_ENFORCE
(
h_dims
==
c_dims
,
"The dimension of Input(H0) and Input(C0) "
"should be the same."
);
ctx
->
SetOutputDim
(
"OrderedP0"
,
{
h_dims
[
0
],
proj_dims
[
1
]});
}
auto
b_dims
=
ctx
->
GetInputDim
(
"Bias"
);
PADDLE_ENFORCE_EQ
(
b_dims
.
size
(),
2
,
"The rank of Input(Bias) should be 2."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
...
...
@@ -98,6 +101,7 @@ class LSTMPOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"Cell"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchGate"
,
in_dims
);
ctx
->
SetOutputDim
(
"BatchCellPreAct"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchHidden"
,
out_dims
);
ctx
->
ShareLoD
(
"Input"
,
"Projection"
);
ctx
->
ShareLoD
(
"Input"
,
"Cell"
);
}
...
...
@@ -169,6 +173,15 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward."
)
.
AsIntermediate
();
AddOutput
(
"BatchHidden"
,
"(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward."
)
.
AsIntermediate
();
AddOutput
(
"OrderedP0"
,
"(Tensor) the projection of the initial hidden state "
"H0. This is a tensor with shape (N x P), where N is the "
"batch size and P is the hidden size."
)
.
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, defalut: True) "
"whether to enable diagonal/peephole connections."
)
...
...
@@ -177,6 +190,12 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, defalut: False) "
"whether to compute reversed LSTMP."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"share_cell_act"
,
"(bool, defalut: True) "
"whether to share activation with cell output. "
"If false, the projection would be linear, else "
"through an activation same with the cell output."
)
.
SetDefault
(
true
);
AddAttr
<
std
::
string
>
(
"gate_activation"
,
"(string, default: sigmoid)"
...
...
@@ -213,7 +232,7 @@ o_t = \sigma(W_{ox}x_{t} + W_{oh}r_{t-1} + W_{oc}c_t + b_o) \\
h_t = o_t \odot act_h(c_t)
r_t =
W_{rh}h_t
r_t =
act_h'(W_{rh}h_t)
$$
where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
...
...
@@ -229,7 +248,8 @@ layer.
The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$
are the cell input and cell output activation functions and `tanh` is usually
used for them.
used for them. If `share_cell_act` setted to `False`, $act_h'$ will be linear
else will be same with $act_h$.
Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$
operations on the input $x_{t}$ are NOT included in this operator.
...
...
@@ -246,12 +266,14 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Hidde
n"
),
"Input(
Hidde
n) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Projectio
n"
),
"Input(
Projectio
n) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cell"
),
"Input(Cell) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
),
"Input(Weight) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"ProjWeight"
),
"Input(ProjWeight) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Bias"
),
"Input(Bias) of LSTMP should not be null."
);
...
...
@@ -268,6 +290,7 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
SetOutGradDim
(
"Input"
);
SetOutGradDim
(
"Weight"
);
SetOutGradDim
(
"ProjWeight"
);
SetOutGradDim
(
"Bias"
);
SetOutGradDim
(
"H0"
);
SetOutGradDim
(
"C0"
);
...
...
paddle/operators/lstmp_op.cu
.cc
→
paddle/operators/lstmp_op.cu
浏览文件 @
552c9012
文件已移动
paddle/operators/lstmp_op.h
浏览文件 @
552c9012
...
...
@@ -13,18 +13,25 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/
framework/op_registry
.h"
#include "paddle/
operators/activation_op
.h"
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/lstm_compute.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
src
,
const
size_t
*
index
,
...
...
@@ -37,6 +44,21 @@ inline void ReorderInitState(const DeviceContext& ctx,
template
<
typename
DeviceContext
,
typename
T
>
class
LSTMPKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
ActCompute
(
const
math
::
detail
::
ActivationType
act_type
,
const
Device
&
d
,
X
x
,
Y
y
)
const
{
if
(
act_type
==
math
::
detail
::
ActivationType
::
kIdentity
)
y
.
device
(
d
)
=
x
;
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kSigmoid
)
SigmoidFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kTanh
)
TanhFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kReLU
)
ReluFunctor
<
T
>
()(
d
,
x
,
y
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
...
...
@@ -44,6 +66,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
hidden_t0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
ordered_proj0
=
ctx
.
Output
<
Tensor
>
(
"OrderedP0"
);
auto
*
cell_t0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
batch_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchGate"
);
...
...
@@ -97,12 +120,13 @@ class LSTMPKernel : public framework::OpKernel<T> {
}
// Use the local variable as here.
LoDTensor
batch_
hidden
,
batch_
proj
,
batch_cell
;
LoDTensor
batch_proj
,
batch_cell
;
auto
*
batch_cell_pre_act
=
ctx
.
Output
<
LoDTensor
>
(
"BatchCellPreAct"
);
batch_hidden
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_cell_pre_act
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
auto
*
batch_hidden
=
ctx
.
Output
<
LoDTensor
>
(
"BatchHidden"
);
batch_hidden
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_proj
.
mutable_data
<
T
>
(
proj_dims
,
ctx
.
GetPlace
());
// T x P
batch_cell
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_cell_pre_act
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
...
...
@@ -112,13 +136,15 @@ class LSTMPKernel : public framework::OpKernel<T> {
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
));
auto
cand_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
));
auto
share_cell_act
=
ctx
.
Attr
<
bool
>
(
"share_cell_act"
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
for
(
size_t
n
=
0
;
n
<
num_batch
;
n
++
)
{
int
bstart
=
static_cast
<
int
>
(
batch_starts
[
n
]);
int
bend
=
static_cast
<
int
>
(
batch_starts
[
n
+
1
]);
Tensor
gate_t
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
hidden_t
=
batch_hidden
.
Slice
(
bstart
,
bend
);
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
Tensor
proj_t
=
batch_proj
.
Slice
(
bstart
,
bend
);
Tensor
cell_t
=
batch_cell
.
Slice
(
bstart
,
bend
);
Tensor
cell_pre_act_t
=
batch_cell_pre_act
->
Slice
(
bstart
,
bend
);
...
...
@@ -140,15 +166,19 @@ class LSTMPKernel : public framework::OpKernel<T> {
// Since the batch computing for LSTMP reorders the input sequence
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor
ordered_h0
,
ordered_proj0
;
ordered_proj0
.
Resize
({
1
,
proj_weight
->
dims
()[
1
]})
;
ordered_proj0
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
Tensor
ordered_h0
;
ordered_proj0
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ReorderInitState
<
DeviceContext
,
T
>
(
device_ctx
,
*
hidden_t0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
false
,
*
proj_weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
ordered_proj0
,
static_cast
<
T
>
(
0.0
));
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_proj0
,
false
,
ordered_proj0
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj0_dev
=
EigenMatrix
<
T
>::
From
(
*
ordered_proj0
);
ActCompute
(
cell_act
,
place
,
proj0_dev
,
proj0_dev
);
}
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
*
ordered_proj0
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
...
...
@@ -164,6 +194,10 @@ class LSTMPKernel : public framework::OpKernel<T> {
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
false
,
*
proj_weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
proj_t
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj_t_dev
=
EigenMatrix
<
T
>::
From
(
proj_t
);
ActCompute
(
cell_act
,
place
,
proj_t_dev
,
proj_t_dev
);
}
}
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
...
...
@@ -180,9 +214,26 @@ class LSTMPKernel : public framework::OpKernel<T> {
template
<
typename
DeviceContext
,
typename
T
>
class
LSTMPGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
>
void
ActGradCompute
(
const
math
::
detail
::
ActivationType
act_type
,
const
Device
&
d
,
X
x
,
Y
y
,
DX
dx
,
DY
dy
)
const
{
// x is dummy and won't be used even in Relu(use y instead)
if
(
act_type
==
math
::
detail
::
ActivationType
::
kIdentity
)
dx
.
device
(
d
)
=
dy
;
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kSigmoid
)
SigmoidGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kTanh
)
TanhGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kReLU
)
ReluGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
auto
*
proj_weight
=
ctx
.
Input
<
Tensor
>
(
"ProjWeight"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
proj_out
=
ctx
.
Input
<
LoDTensor
>
(
"Projection"
);
...
...
@@ -190,14 +241,19 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
auto
*
batch_gate
=
ctx
.
Input
<
LoDTensor
>
(
"BatchGate"
);
auto
*
batch_cell_pre_act
=
ctx
.
Input
<
LoDTensor
>
(
"BatchCellPreAct"
);
auto
*
batch_hidden
=
ctx
.
Input
<
LoDTensor
>
(
"BatchHidden"
);
auto
*
hidden_g
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Projection"
));
auto
*
projection_g
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Projection"
));
auto
*
in_g
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"Input"
));
auto
*
weight_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
auto
*
proj_weight_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"ProjWeight"
));
auto
*
bias_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
h0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
ordered_proj0
=
ctx
.
Input
<
Tensor
>
(
"OrderedP0"
);
auto
*
c0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
h0_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"H0"
));
...
...
@@ -209,6 +265,10 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
weight_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
device_ctx
,
weight_g
,
static_cast
<
T
>
(
0.0
));
}
if
(
proj_weight_g
)
{
proj_weight_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
device_ctx
,
proj_weight_g
,
static_cast
<
T
>
(
0.0
));
}
// ordered_h0/c0 is the reordered hidden/cell initialization.
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
...
...
@@ -224,7 +284,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
}
auto
in_dims
=
input
->
dims
();
auto
out_dims
=
hidden_g
->
dims
();
auto
out_dims
=
cell_out
->
dims
();
framework
::
DDim
proj_dims
({
in_dims
[
0
],
proj_weight
->
dims
()[
1
]});
int
frame_size
=
static_cast
<
int
>
(
in_dims
[
1
]
/
4
);
PADDLE_ENFORCE_EQ
(
frame_size
,
out_dims
[
1
]);
...
...
@@ -267,10 +328,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
to_batch
(
ctx
,
src
,
dst
,
false
);
};
LoDTensor
batch_proj
,
batch_proj_g
,
batch_cell
;
ToBatch
(
device_ctx
,
*
proj_out
,
out_dims
,
batch_proj
);
ToBatch
(
device_ctx
,
*
hidden_g
,
out_dims
,
batch_proj_g
);
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
LoDTensor
batch_hidden_g
,
batch_proj
,
batch_proj_g
,
batch_cell
;
batch_hidden_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
ToBatch
(
device_ctx
,
*
proj_out
,
proj_dims
,
batch_proj
);
// T x P
ToBatch
(
device_ctx
,
*
projection_g
,
proj_dims
,
batch_proj_g
);
// T x P
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
// T x D
LoDTensor
batch_cell_g
,
batch_gate_g
;
batch_cell_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
...
...
@@ -286,6 +348,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
));
auto
cand_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
));
auto
share_cell_act
=
ctx
.
Attr
<
bool
>
(
"share_cell_act"
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
...
...
@@ -293,6 +357,19 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
int
bstart
=
static_cast
<
int
>
(
batch_starts
[
n
]);
int
bend
=
static_cast
<
int
>
(
batch_starts
[
n
+
1
]);
Tensor
cur_proj
=
batch_proj
.
Slice
(
bstart
,
bend
);
Tensor
proj_g
=
batch_proj_g
.
Slice
(
bstart
,
bend
);
if
(
share_cell_act
)
{
auto
cur_proj_dev
=
EigenMatrix
<
T
>::
From
(
cur_proj
);
auto
proj_g_dev
=
EigenMatrix
<
T
>::
From
(
proj_g
);
ActGradCompute
(
cell_act
,
place
,
cur_proj_dev
,
cur_proj_dev
,
proj_g_dev
,
proj_g_dev
);
}
Tensor
out_g
=
batch_hidden_g
.
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj_g
,
false
,
*
proj_weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
out_g
,
static_cast
<
T
>
(
0.0
));
Tensor
gate
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
cell
=
batch_cell
.
Slice
(
bstart
,
bend
);
Tensor
cell_pre_act
=
batch_cell_pre_act
->
Slice
(
bstart
,
bend
);
...
...
@@ -300,7 +377,6 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
lstmp_value
.
state_value
=
cell
.
data
<
T
>
();
lstmp_value
.
state_active_value
=
cell_pre_act
.
data
<
T
>
();
Tensor
out_g
=
batch_proj_g
.
Slice
(
bstart
,
bend
);
Tensor
gate_g
=
batch_gate_g
.
Slice
(
bstart
,
bend
);
Tensor
cell_g
=
batch_cell_g
.
Slice
(
bstart
,
bend
);
lstmp_grad
.
state_grad
=
cell_g
.
data
<
T
>
();
...
...
@@ -337,19 +413,48 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
}
if
(
proj_weight_g
)
{
/* backward proj weigh */
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
true
,
proj_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
else
{
if
(
h0
&&
weight_g
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
device_ctx
,
*
h0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
if
(
weight_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
*
ordered_proj0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
if
(
h0
&&
h0_g
)
{
if
(
h0
&&
(
h0_g
||
proj_weight_g
)
)
{
ordered_h0_g
.
mutable_data
<
T
>
(
h0_g
->
dims
(),
ctx
.
GetPlace
());
Tensor
proj0_g
;
proj0_g
.
Resize
({
in_dims
[
0
],
proj_weight
->
dims
()[
1
]});
proj0_g
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
gate_g
,
false
,
*
weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
ordered_h0_g
,
static_cast
<
T
>
(
0.0
));
true
,
static_cast
<
T
>
(
1.0
),
&
proj0_g
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj0_dev
=
EigenMatrix
<
T
>::
From
(
*
ordered_proj0
);
auto
proj0_g_dev
=
EigenMatrix
<
T
>::
From
(
proj0_g
);
ActGradCompute
(
cell_act
,
place
,
proj0_dev
,
proj0_dev
,
proj0_g_dev
,
proj0_g_dev
);
}
// Tensor proj0_g = proj_g.Slice(bstart, bend);
if
(
h0_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj0_g
,
false
,
*
proj_weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
ordered_h0_g
,
static_cast
<
T
>
(
0.0
));
}
if
(
proj_weight_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
proj0_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
}
}
...
...
python/paddle/v2/fluid/tests/test_lstmp_op.py
浏览文件 @
552c9012
...
...
@@ -62,7 +62,8 @@ def lstmp(
is_reverse
=
False
,
act_gate
=
None
,
act_cell
=
None
,
act_cand
=
None
):
act_cand
=
None
,
share_cell_act
=
True
):
def
_step
(
x
,
w_r
,
w_rh
,
w_c
,
r_pre
,
c_pre
,
act_gate
,
act_cell
,
act_cand
):
g
=
np
.
dot
(
r_pre
,
w_r
)
# 1 x 4D
g
=
g
+
x
...
...
@@ -85,6 +86,8 @@ def lstmp(
h
=
g_o
*
act_cell
(
c
)
# projection
r
=
np
.
dot
(
h
,
w_rh
)
if
share_cell_act
:
r
=
act_cell
(
r
)
return
r
,
c
def
_reverse
(
x
,
lod
):
...
...
@@ -107,6 +110,8 @@ def lstmp(
seq_len
=
offset
[
i
+
1
]
-
offset
[
i
]
x
=
input
[
offset
[
i
]:
offset
[
i
+
1
],
:]
r_pre
=
np
.
dot
(
h0
[
i
],
w_rh
)
# 1 x P
if
share_cell_act
:
r_pre
=
act_cell
(
r_pre
)
c_pre
=
c0
[
i
]
# 1 x D
for
j
in
range
(
seq_len
):
# compute one step
...
...
@@ -138,6 +143,7 @@ class TestLstmOp(OpTest):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
False
self
.
use_peepholes
=
True
...
...
@@ -167,7 +173,7 @@ class TestLstmOp(OpTest):
w_rh
=
np
.
random
.
normal
(
size
=
(
self
.
D
,
self
.
P
)).
astype
(
'float64'
)
r
,
c
=
lstmp
(
x
,
self
.
lod
,
h0
,
c0
,
w
,
w_rh
,
w_b
,
w_c
,
self
.
is_reverse
,
ACTVATION
[
self
.
act_gate
],
ACTVATION
[
self
.
act_cell
],
ACTVATION
[
self
.
act_cand
])
ACTVATION
[
self
.
act_cand
]
,
self
.
share_cell_act
)
self
.
inputs
=
{
'Input'
:
(
x
,
self
.
lod
),
'Weight'
:
w
,
'ProjWeight'
:
w_rh
}
...
...
@@ -192,28 +198,30 @@ class TestLstmOp(OpTest):
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
)
"""
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias'], ['
Hidden'], max_relative_error=5e-4)
"""
[
'Input'
,
'Weight'
,
'Bias'
],
[
'
Projection'
],
max_relative_error
=
5e-3
)
"""
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
P
=
5
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
...
...
@@ -221,63 +229,74 @@ class TestLstmOpHasInitial(TestLstmOp):
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'H0', 'C0'], ['
Hidde
n'],
max_relative_error=5e-
4
)
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Bias'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Bias'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Weight', 'Bias'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Weight'
,
'Bias'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'C0'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
,
'Bias'
,
'C0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'H0'
))
def
test_check_grad_ingore_c0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'H0'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'C0'
))
"""
class
TestLstmOpRerverse
(
TestLstmOp
):
...
...
@@ -290,6 +309,7 @@ class TestLstmOpRerverse(TestLstmOp):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
...
...
@@ -305,6 +325,7 @@ class TestLstmOpNotUsePeepholes(TestLstmOp):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录