Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
552c9012
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
552c9012
编写于
1月 23, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enable backward computation in lstmp_op
上级
f2c4bb67
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
206 addition
and
57 deletion
+206
-57
paddle/operators/lstmp_op.cc
paddle/operators/lstmp_op.cc
+38
-15
paddle/operators/lstmp_op.cu
paddle/operators/lstmp_op.cu
+0
-0
paddle/operators/lstmp_op.h
paddle/operators/lstmp_op.h
+128
-23
python/paddle/v2/fluid/tests/test_lstmp_op.py
python/paddle/v2/fluid/tests/test_lstmp_op.py
+40
-19
未找到文件。
paddle/operators/lstmp_op.cc
浏览文件 @
552c9012
...
...
@@ -39,21 +39,12 @@ class LSTMPOp : public framework::OperatorWithKernel {
"Output(BatchGate) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchCellPreAct"
),
"Output(BatchGate) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchHidden"
),
"Output(BatchHidden) of LSTMP should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
"Input(X)'s rank must be 2."
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
"Input(C0) and Input(H0) of LSTMP should not "
"be null at the same time."
);
auto
h_dims
=
ctx
->
GetInputDim
(
"H0"
);
auto
c_dims
=
ctx
->
GetInputDim
(
"C0"
);
PADDLE_ENFORCE
(
h_dims
==
c_dims
,
"The dimension of Input(H0) and Input(C0) "
"should be the same."
);
}
int
frame_size
=
in_dims
[
1
]
/
4
;
auto
w_dims
=
ctx
->
GetInputDim
(
"Weight"
);
auto
proj_dims
=
ctx
->
GetInputDim
(
"ProjWeight"
);
...
...
@@ -75,6 +66,18 @@ class LSTMPOp : public framework::OperatorWithKernel {
"should be %d."
,
frame_size
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
"Input(C0) and Input(H0) of LSTMP should not "
"be null at the same time."
);
auto
h_dims
=
ctx
->
GetInputDim
(
"H0"
);
auto
c_dims
=
ctx
->
GetInputDim
(
"C0"
);
PADDLE_ENFORCE
(
h_dims
==
c_dims
,
"The dimension of Input(H0) and Input(C0) "
"should be the same."
);
ctx
->
SetOutputDim
(
"OrderedP0"
,
{
h_dims
[
0
],
proj_dims
[
1
]});
}
auto
b_dims
=
ctx
->
GetInputDim
(
"Bias"
);
PADDLE_ENFORCE_EQ
(
b_dims
.
size
(),
2
,
"The rank of Input(Bias) should be 2."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
...
...
@@ -98,6 +101,7 @@ class LSTMPOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"Cell"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchGate"
,
in_dims
);
ctx
->
SetOutputDim
(
"BatchCellPreAct"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchHidden"
,
out_dims
);
ctx
->
ShareLoD
(
"Input"
,
"Projection"
);
ctx
->
ShareLoD
(
"Input"
,
"Cell"
);
}
...
...
@@ -169,6 +173,15 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward."
)
.
AsIntermediate
();
AddOutput
(
"BatchHidden"
,
"(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward."
)
.
AsIntermediate
();
AddOutput
(
"OrderedP0"
,
"(Tensor) the projection of the initial hidden state "
"H0. This is a tensor with shape (N x P), where N is the "
"batch size and P is the hidden size."
)
.
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, defalut: True) "
"whether to enable diagonal/peephole connections."
)
...
...
@@ -177,6 +190,12 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, defalut: False) "
"whether to compute reversed LSTMP."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"share_cell_act"
,
"(bool, defalut: True) "
"whether to share activation with cell output. "
"If false, the projection would be linear, else "
"through an activation same with the cell output."
)
.
SetDefault
(
true
);
AddAttr
<
std
::
string
>
(
"gate_activation"
,
"(string, default: sigmoid)"
...
...
@@ -213,7 +232,7 @@ o_t = \sigma(W_{ox}x_{t} + W_{oh}r_{t-1} + W_{oc}c_t + b_o) \\
h_t = o_t \odot act_h(c_t)
r_t =
W_{rh}h_t
r_t =
act_h'(W_{rh}h_t)
$$
where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
...
...
@@ -229,7 +248,8 @@ layer.
The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$
are the cell input and cell output activation functions and `tanh` is usually
used for them.
used for them. If `share_cell_act` setted to `False`, $act_h'$ will be linear
else will be same with $act_h$.
Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$
operations on the input $x_{t}$ are NOT included in this operator.
...
...
@@ -246,12 +266,14 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Hidde
n"
),
"Input(
Hidde
n) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Projectio
n"
),
"Input(
Projectio
n) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cell"
),
"Input(Cell) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
),
"Input(Weight) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"ProjWeight"
),
"Input(ProjWeight) of LSTMP should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Bias"
),
"Input(Bias) of LSTMP should not be null."
);
...
...
@@ -268,6 +290,7 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
SetOutGradDim
(
"Input"
);
SetOutGradDim
(
"Weight"
);
SetOutGradDim
(
"ProjWeight"
);
SetOutGradDim
(
"Bias"
);
SetOutGradDim
(
"H0"
);
SetOutGradDim
(
"C0"
);
...
...
paddle/operators/lstmp_op.cu
.cc
→
paddle/operators/lstmp_op.cu
浏览文件 @
552c9012
文件已移动
paddle/operators/lstmp_op.h
浏览文件 @
552c9012
...
...
@@ -13,18 +13,25 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/
framework/op_registry
.h"
#include "paddle/
operators/activation_op
.h"
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/lstm_compute.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
src
,
const
size_t
*
index
,
...
...
@@ -37,6 +44,21 @@ inline void ReorderInitState(const DeviceContext& ctx,
template
<
typename
DeviceContext
,
typename
T
>
class
LSTMPKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
ActCompute
(
const
math
::
detail
::
ActivationType
act_type
,
const
Device
&
d
,
X
x
,
Y
y
)
const
{
if
(
act_type
==
math
::
detail
::
ActivationType
::
kIdentity
)
y
.
device
(
d
)
=
x
;
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kSigmoid
)
SigmoidFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kTanh
)
TanhFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kReLU
)
ReluFunctor
<
T
>
()(
d
,
x
,
y
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
...
...
@@ -44,6 +66,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
hidden_t0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
ordered_proj0
=
ctx
.
Output
<
Tensor
>
(
"OrderedP0"
);
auto
*
cell_t0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
batch_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchGate"
);
...
...
@@ -97,12 +120,13 @@ class LSTMPKernel : public framework::OpKernel<T> {
}
// Use the local variable as here.
LoDTensor
batch_
hidden
,
batch_
proj
,
batch_cell
;
LoDTensor
batch_proj
,
batch_cell
;
auto
*
batch_cell_pre_act
=
ctx
.
Output
<
LoDTensor
>
(
"BatchCellPreAct"
);
batch_hidden
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_cell_pre_act
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
auto
*
batch_hidden
=
ctx
.
Output
<
LoDTensor
>
(
"BatchHidden"
);
batch_hidden
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_proj
.
mutable_data
<
T
>
(
proj_dims
,
ctx
.
GetPlace
());
// T x P
batch_cell
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
// T x D
batch_cell_pre_act
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
...
...
@@ -112,13 +136,15 @@ class LSTMPKernel : public framework::OpKernel<T> {
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
));
auto
cand_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
));
auto
share_cell_act
=
ctx
.
Attr
<
bool
>
(
"share_cell_act"
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
for
(
size_t
n
=
0
;
n
<
num_batch
;
n
++
)
{
int
bstart
=
static_cast
<
int
>
(
batch_starts
[
n
]);
int
bend
=
static_cast
<
int
>
(
batch_starts
[
n
+
1
]);
Tensor
gate_t
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
hidden_t
=
batch_hidden
.
Slice
(
bstart
,
bend
);
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
Tensor
proj_t
=
batch_proj
.
Slice
(
bstart
,
bend
);
Tensor
cell_t
=
batch_cell
.
Slice
(
bstart
,
bend
);
Tensor
cell_pre_act_t
=
batch_cell_pre_act
->
Slice
(
bstart
,
bend
);
...
...
@@ -140,15 +166,19 @@ class LSTMPKernel : public framework::OpKernel<T> {
// Since the batch computing for LSTMP reorders the input sequence
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor
ordered_h0
,
ordered_proj0
;
ordered_proj0
.
Resize
({
1
,
proj_weight
->
dims
()[
1
]})
;
ordered_proj0
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
Tensor
ordered_h0
;
ordered_proj0
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ReorderInitState
<
DeviceContext
,
T
>
(
device_ctx
,
*
hidden_t0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
false
,
*
proj_weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
ordered_proj0
,
static_cast
<
T
>
(
0.0
));
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_proj0
,
false
,
ordered_proj0
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj0_dev
=
EigenMatrix
<
T
>::
From
(
*
ordered_proj0
);
ActCompute
(
cell_act
,
place
,
proj0_dev
,
proj0_dev
);
}
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
*
ordered_proj0
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
...
...
@@ -164,6 +194,10 @@ class LSTMPKernel : public framework::OpKernel<T> {
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
false
,
*
proj_weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
proj_t
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj_t_dev
=
EigenMatrix
<
T
>::
From
(
proj_t
);
ActCompute
(
cell_act
,
place
,
proj_t_dev
,
proj_t_dev
);
}
}
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
...
...
@@ -180,9 +214,26 @@ class LSTMPKernel : public framework::OpKernel<T> {
template
<
typename
DeviceContext
,
typename
T
>
class
LSTMPGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
>
void
ActGradCompute
(
const
math
::
detail
::
ActivationType
act_type
,
const
Device
&
d
,
X
x
,
Y
y
,
DX
dx
,
DY
dy
)
const
{
// x is dummy and won't be used even in Relu(use y instead)
if
(
act_type
==
math
::
detail
::
ActivationType
::
kIdentity
)
dx
.
device
(
d
)
=
dy
;
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kSigmoid
)
SigmoidGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kTanh
)
TanhGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
math
::
detail
::
ActivationType
::
kReLU
)
ReluGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
auto
*
proj_weight
=
ctx
.
Input
<
Tensor
>
(
"ProjWeight"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
proj_out
=
ctx
.
Input
<
LoDTensor
>
(
"Projection"
);
...
...
@@ -190,14 +241,19 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
auto
*
batch_gate
=
ctx
.
Input
<
LoDTensor
>
(
"BatchGate"
);
auto
*
batch_cell_pre_act
=
ctx
.
Input
<
LoDTensor
>
(
"BatchCellPreAct"
);
auto
*
batch_hidden
=
ctx
.
Input
<
LoDTensor
>
(
"BatchHidden"
);
auto
*
hidden_g
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Projection"
));
auto
*
projection_g
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Projection"
));
auto
*
in_g
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"Input"
));
auto
*
weight_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
auto
*
proj_weight_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"ProjWeight"
));
auto
*
bias_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
h0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
ordered_proj0
=
ctx
.
Input
<
Tensor
>
(
"OrderedP0"
);
auto
*
c0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
h0_g
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"H0"
));
...
...
@@ -209,6 +265,10 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
weight_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
device_ctx
,
weight_g
,
static_cast
<
T
>
(
0.0
));
}
if
(
proj_weight_g
)
{
proj_weight_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
zero
(
device_ctx
,
proj_weight_g
,
static_cast
<
T
>
(
0.0
));
}
// ordered_h0/c0 is the reordered hidden/cell initialization.
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
...
...
@@ -224,7 +284,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
}
auto
in_dims
=
input
->
dims
();
auto
out_dims
=
hidden_g
->
dims
();
auto
out_dims
=
cell_out
->
dims
();
framework
::
DDim
proj_dims
({
in_dims
[
0
],
proj_weight
->
dims
()[
1
]});
int
frame_size
=
static_cast
<
int
>
(
in_dims
[
1
]
/
4
);
PADDLE_ENFORCE_EQ
(
frame_size
,
out_dims
[
1
]);
...
...
@@ -267,10 +328,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
to_batch
(
ctx
,
src
,
dst
,
false
);
};
LoDTensor
batch_proj
,
batch_proj_g
,
batch_cell
;
ToBatch
(
device_ctx
,
*
proj_out
,
out_dims
,
batch_proj
);
ToBatch
(
device_ctx
,
*
hidden_g
,
out_dims
,
batch_proj_g
);
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
LoDTensor
batch_hidden_g
,
batch_proj
,
batch_proj_g
,
batch_cell
;
batch_hidden_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
ToBatch
(
device_ctx
,
*
proj_out
,
proj_dims
,
batch_proj
);
// T x P
ToBatch
(
device_ctx
,
*
projection_g
,
proj_dims
,
batch_proj_g
);
// T x P
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
// T x D
LoDTensor
batch_cell_g
,
batch_gate_g
;
batch_cell_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
...
...
@@ -286,6 +348,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
));
auto
cand_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
));
auto
share_cell_act
=
ctx
.
Attr
<
bool
>
(
"share_cell_act"
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
...
...
@@ -293,6 +357,19 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
int
bstart
=
static_cast
<
int
>
(
batch_starts
[
n
]);
int
bend
=
static_cast
<
int
>
(
batch_starts
[
n
+
1
]);
Tensor
cur_proj
=
batch_proj
.
Slice
(
bstart
,
bend
);
Tensor
proj_g
=
batch_proj_g
.
Slice
(
bstart
,
bend
);
if
(
share_cell_act
)
{
auto
cur_proj_dev
=
EigenMatrix
<
T
>::
From
(
cur_proj
);
auto
proj_g_dev
=
EigenMatrix
<
T
>::
From
(
proj_g
);
ActGradCompute
(
cell_act
,
place
,
cur_proj_dev
,
cur_proj_dev
,
proj_g_dev
,
proj_g_dev
);
}
Tensor
out_g
=
batch_hidden_g
.
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj_g
,
false
,
*
proj_weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
out_g
,
static_cast
<
T
>
(
0.0
));
Tensor
gate
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
cell
=
batch_cell
.
Slice
(
bstart
,
bend
);
Tensor
cell_pre_act
=
batch_cell_pre_act
->
Slice
(
bstart
,
bend
);
...
...
@@ -300,7 +377,6 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
lstmp_value
.
state_value
=
cell
.
data
<
T
>
();
lstmp_value
.
state_active_value
=
cell_pre_act
.
data
<
T
>
();
Tensor
out_g
=
batch_proj_g
.
Slice
(
bstart
,
bend
);
Tensor
gate_g
=
batch_gate_g
.
Slice
(
bstart
,
bend
);
Tensor
cell_g
=
batch_cell_g
.
Slice
(
bstart
,
bend
);
lstmp_grad
.
state_grad
=
cell_g
.
data
<
T
>
();
...
...
@@ -337,19 +413,48 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
}
if
(
proj_weight_g
)
{
/* backward proj weigh */
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
true
,
proj_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
else
{
if
(
h0
&&
weight_g
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
device_ctx
,
*
h0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
if
(
weight_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
*
ordered_proj0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
}
if
(
h0
&&
h0_g
)
{
}
if
(
h0
&&
(
h0_g
||
proj_weight_g
))
{
ordered_h0_g
.
mutable_data
<
T
>
(
h0_g
->
dims
(),
ctx
.
GetPlace
());
Tensor
proj0_g
;
proj0_g
.
Resize
({
in_dims
[
0
],
proj_weight
->
dims
()[
1
]});
proj0_g
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
gate_g
,
false
,
*
weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
ordered_h0_g
,
static_cast
<
T
>
(
0.0
));
true
,
static_cast
<
T
>
(
1.0
),
&
proj0_g
,
static_cast
<
T
>
(
0.0
));
if
(
share_cell_act
)
{
auto
proj0_dev
=
EigenMatrix
<
T
>::
From
(
*
ordered_proj0
);
auto
proj0_g_dev
=
EigenMatrix
<
T
>::
From
(
proj0_g
);
ActGradCompute
(
cell_act
,
place
,
proj0_dev
,
proj0_dev
,
proj0_g_dev
,
proj0_g_dev
);
}
// Tensor proj0_g = proj_g.Slice(bstart, bend);
if
(
h0_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj0_g
,
false
,
*
proj_weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
ordered_h0_g
,
static_cast
<
T
>
(
0.0
));
}
if
(
proj_weight_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
proj0_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
}
}
...
...
python/paddle/v2/fluid/tests/test_lstmp_op.py
浏览文件 @
552c9012
...
...
@@ -62,7 +62,8 @@ def lstmp(
is_reverse
=
False
,
act_gate
=
None
,
act_cell
=
None
,
act_cand
=
None
):
act_cand
=
None
,
share_cell_act
=
True
):
def
_step
(
x
,
w_r
,
w_rh
,
w_c
,
r_pre
,
c_pre
,
act_gate
,
act_cell
,
act_cand
):
g
=
np
.
dot
(
r_pre
,
w_r
)
# 1 x 4D
g
=
g
+
x
...
...
@@ -85,6 +86,8 @@ def lstmp(
h
=
g_o
*
act_cell
(
c
)
# projection
r
=
np
.
dot
(
h
,
w_rh
)
if
share_cell_act
:
r
=
act_cell
(
r
)
return
r
,
c
def
_reverse
(
x
,
lod
):
...
...
@@ -107,6 +110,8 @@ def lstmp(
seq_len
=
offset
[
i
+
1
]
-
offset
[
i
]
x
=
input
[
offset
[
i
]:
offset
[
i
+
1
],
:]
r_pre
=
np
.
dot
(
h0
[
i
],
w_rh
)
# 1 x P
if
share_cell_act
:
r_pre
=
act_cell
(
r_pre
)
c_pre
=
c0
[
i
]
# 1 x D
for
j
in
range
(
seq_len
):
# compute one step
...
...
@@ -138,6 +143,7 @@ class TestLstmOp(OpTest):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
False
self
.
use_peepholes
=
True
...
...
@@ -167,7 +173,7 @@ class TestLstmOp(OpTest):
w_rh
=
np
.
random
.
normal
(
size
=
(
self
.
D
,
self
.
P
)).
astype
(
'float64'
)
r
,
c
=
lstmp
(
x
,
self
.
lod
,
h0
,
c0
,
w
,
w_rh
,
w_b
,
w_c
,
self
.
is_reverse
,
ACTVATION
[
self
.
act_gate
],
ACTVATION
[
self
.
act_cell
],
ACTVATION
[
self
.
act_cand
])
ACTVATION
[
self
.
act_cand
]
,
self
.
share_cell_act
)
self
.
inputs
=
{
'Input'
:
(
x
,
self
.
lod
),
'Weight'
:
w
,
'ProjWeight'
:
w_rh
}
...
...
@@ -192,28 +198,30 @@ class TestLstmOp(OpTest):
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
)
"""
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias'], ['
Hidden'], max_relative_error=5e-4)
"""
[
'Input'
,
'Weight'
,
'Bias'
],
[
'
Projection'
],
max_relative_error
=
5e-3
)
"""
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
self
.
lod
=
[[
0
,
2
,
5
,
7
]]
self
.
D
=
16
self
.
P
=
5
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
...
...
@@ -221,63 +229,74 @@ class TestLstmOpHasInitial(TestLstmOp):
def
test_check_grad
(
self
):
# TODO(qingqing) remove folowing lines after the check_grad is refined.
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'H0', 'C0'], ['
Hidde
n'],
max_relative_error=5e-
4
)
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Bias'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Bias'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Weight', 'Bias'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Weight'
,
'Bias'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'C0'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
,
'Bias'
,
'C0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'H0'
))
def
test_check_grad_ingore_c0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
['Input', 'Weight', 'Bias', 'H0'], ['
Hidde
n'],
max_relative_error=5e-
4
,
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
],
[
'
Projectio
n'
],
max_relative_error
=
5e-
3
,
no_grad_set
=
set
(
'C0'
))
"""
class
TestLstmOpRerverse
(
TestLstmOp
):
...
...
@@ -290,6 +309,7 @@ class TestLstmOpRerverse(TestLstmOp):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
...
...
@@ -305,6 +325,7 @@ class TestLstmOpNotUsePeepholes(TestLstmOp):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
share_cell_act
=
True
self
.
has_initial_state
=
False
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录