Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1cabdb87
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1cabdb87
编写于
10月 11, 2017
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine gru_unit_op according to comments to support multiple activation types
上级
0922fca4
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
194 addition
and
121 deletion
+194
-121
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+80
-70
paddle/operators/gru_unit_op.h
paddle/operators/gru_unit_op.h
+64
-31
python/paddle/v2/framework/tests/test_gru_unit_op.py
python/paddle/v2/framework/tests/test_gru_unit_op.py
+50
-20
未找到文件。
paddle/operators/gru_unit_op.cc
浏览文件 @
1cabdb87
...
...
@@ -24,26 +24,26 @@ class GRUUnitOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
Base
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
i
nput"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
i
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
hidden_p
rev"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
w
eight"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
w
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
b
ias"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
b
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
g
ate"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
reset_hidden_p
rev"
),
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
I
nput"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
I
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
HiddenP
rev"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
HiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
W
eight"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
W
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
B
ias"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
B
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
G
ate"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
ResetHiddenP
rev"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
h
idden"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
h
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
i
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
hidden_p
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
w
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
b
ias"
);
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
H
idden"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
H
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
I
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
HiddenP
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
W
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
B
ias"
);
int
batch_size
=
input_dims
[
0
];
int
input_size
=
input_dims
[
1
];
int
frame_size
=
hidden_prev_dims
[
1
];
...
...
@@ -53,54 +53,64 @@ class GRUUnitOp : public framework::OperatorWithKernel {
int
bias_width
=
bias_dims
[
1
];
PADDLE_ENFORCE_EQ
(
input_size
,
frame_size
*
3
,
"The in
n
put_size must be 3 times of frame_size in GRUUnitOp."
);
"The input_size must be 3 times of frame_size in GRUUnitOp."
);
PADDLE_ENFORCE_EQ
(
weight_height
,
frame_size
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
weight_width
,
frame_size
*
3
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_height
,
1
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_width
,
frame_size
*
3
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
ctx
->
SetOutputDim
(
"
g
ate"
,
{
batch_size
,
frame_size
*
3
});
ctx
->
SetOutputDim
(
"
reset_hidden_p
rev"
,
{
batch_size
,
frame_size
});
ctx
->
SetOutputDim
(
"
h
idden"
,
{
batch_size
,
frame_size
});
"The shape of
B
ias must be [1, frame_size * 3]."
);
ctx
->
SetOutputDim
(
"
G
ate"
,
{
batch_size
,
frame_size
*
3
});
ctx
->
SetOutputDim
(
"
ResetHiddenP
rev"
,
{
batch_size
,
frame_size
});
ctx
->
SetOutputDim
(
"
H
idden"
,
{
batch_size
,
frame_size
});
}
};
class
GRUUnitOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
GRUUnitOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
GRUUnitOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"
i
nput"
,
AddInput
(
"
I
nput"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"input."
);
AddInput
(
"
hidden_p
rev"
,
AddInput
(
"
HiddenP
rev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."
);
AddInput
(
"
w
eight"
,
AddInput
(
"
W
eight"
,
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]"
);
AddInput
(
"
b
ias"
,
AddInput
(
"
B
ias"
,
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate."
);
AddOutput
(
"
g
ate"
,
AddOutput
(
"
G
ate"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate"
)
.
AsIntermediate
();
AddOutput
(
"
reset_hidden_p
rev"
,
AddOutput
(
"
ResetHiddenP
rev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"reseted hidden state of previous time step."
)
.
AsIntermediate
();
AddOutput
(
"
h
idden"
,
AddOutput
(
"
H
idden"
,
"(Tensor) The GRU hidden state of the current time step "
"with shape [batch_size, frame_size]."
);
AddAttr
<
int
>
(
"activation"
,
"(enum int, default tanh) "
"The activation type used for output candidate {h}_t."
)
.
SetDefault
(
tanh
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddAttr
<
int
>
(
"gate_activation"
,
"(enum int, default sigmoid) "
"The activation type used in update gate and reset gate."
)
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
...
...
@@ -121,36 +131,36 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
Base
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
i
nput"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
i
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
hidden_p
rev"
),
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
I
nput"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
I
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
HiddenP
rev"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
w
eight"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
w
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
b
ias"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
b
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
g
ate"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
reset_hidden_p
rev"
),
"
HiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
W
eight"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
W
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
B
ias"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
B
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
G
ate"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
ResetHiddenP
rev"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
h
idden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
h
idden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
g
ate"
)),
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
H
idden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
H
idden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
G
ate"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
reset_hidden_p
rev"
)),
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
ResetHiddenP
rev"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
h
idden"
)),
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
H
idden"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
h
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
i
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
hidden_p
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
w
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
b
ias"
);
"
H
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
I
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
HiddenP
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
W
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
B
ias"
);
// int batch_size = input_dims[0];
int
input_size
=
input_dims
[
1
];
int
frame_size
=
hidden_prev_dims
[
1
];
...
...
@@ -160,27 +170,27 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
int
bias_width
=
bias_dims
[
1
];
PADDLE_ENFORCE_EQ
(
input_size
,
frame_size
*
3
,
"The in
n
put_size must be 3 times of frame_size in GRUUnitOp."
);
"The input_size must be 3 times of frame_size in GRUUnitOp."
);
PADDLE_ENFORCE_EQ
(
weight_height
,
frame_size
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
weight_width
,
frame_size
*
3
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_height
,
1
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_width
,
frame_size
*
3
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
auto
input_grad_name
=
framework
::
GradVarName
(
"
i
nput"
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
auto
input_grad_name
=
framework
::
GradVarName
(
"
I
nput"
);
if
(
ctx
->
HasOutput
(
input_grad_name
))
ctx
->
SetOutputDim
(
input_grad_name
,
input_dims
);
auto
hidden_prev_grad_name
=
framework
::
GradVarName
(
"
hidden_p
rev"
);
auto
hidden_prev_grad_name
=
framework
::
GradVarName
(
"
HiddenP
rev"
);
if
(
ctx
->
HasOutput
(
hidden_prev_grad_name
))
ctx
->
SetOutputDim
(
hidden_prev_grad_name
,
hidden_prev_dims
);
auto
weight_grad_name
=
framework
::
GradVarName
(
"
w
eight"
);
auto
weight_grad_name
=
framework
::
GradVarName
(
"
W
eight"
);
if
(
ctx
->
HasOutput
(
weight_grad_name
))
ctx
->
SetOutputDim
(
weight_grad_name
,
weight_dims
);
auto
bias_grad_name
=
framework
::
GradVarName
(
"
b
ias"
);
auto
bias_grad_name
=
framework
::
GradVarName
(
"
B
ias"
);
if
(
ctx
->
HasOutput
(
bias_grad_name
))
ctx
->
SetOutputDim
(
bias_grad_name
,
bias_dims
);
}
...
...
paddle/operators/gru_unit_op.h
浏览文件 @
1cabdb87
...
...
@@ -14,6 +14,7 @@
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/framework/eigen.h"
...
...
@@ -27,19 +28,35 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
enum
GRUActivationType
{
identity
=
0
,
sigmoid
=
1
,
tanh
=
2
,
relu
=
3
};
template
<
typename
Place
,
typename
T
>
class
GRUUnitKernel
:
public
framework
::
OpKernel
{
class
GRUUnitKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
ActCompute
(
const
int
act_type
,
const
Device
&
d
,
X
x
,
Y
y
)
const
{
if
(
act_type
==
identity
)
y
.
device
(
d
)
=
x
;
else
if
(
act_type
==
sigmoid
)
SigmoidFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
tanh
)
TanhFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
relu
)
ReluFunctor
<
T
>
()(
d
,
x
,
y
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
i
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
hidden_p
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
w
eight"
);
auto
*
bias
=
context
.
Input
<
Tensor
>
(
"
b
ias"
);
auto
*
gate
=
context
.
Output
<
Tensor
>
(
"
g
ate"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
I
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
HiddenP
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
W
eight"
);
auto
*
bias
=
context
.
Input
<
Tensor
>
(
"
B
ias"
);
auto
*
gate
=
context
.
Output
<
Tensor
>
(
"
G
ate"
);
gate
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
reset_hidden_prev
=
context
.
Output
<
Tensor
>
(
"
reset_hidden_p
rev"
);
auto
*
reset_hidden_prev
=
context
.
Output
<
Tensor
>
(
"
ResetHiddenP
rev"
);
reset_hidden_prev
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
hidden
=
context
.
Output
<
Tensor
>
(
"
h
idden"
);
auto
*
hidden
=
context
.
Output
<
Tensor
>
(
"
H
idden"
);
hidden
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
batch_size
=
input
->
dims
()[
0
];
...
...
@@ -69,12 +86,12 @@ class GRUUnitKernel : public framework::OpKernel {
// calculate activited gate
Eigen
::
array
<
int
,
2
>
extents
({{
batch_size
,
frame_size
}});
Eigen
::
array
<
int
,
2
>
u_offsets
({{
0
,
0
}});
g
.
slice
(
u_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
u_offsets
,
extents
).
sigmoid
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
)
);
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
({{
0
,
frame_size
}});
g
.
slice
(
r_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
r_offsets
,
extents
).
sigmoid
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
)
);
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
r_h_p
.
device
(
place
)
=
r
*
h_p
;
// reset previous hidden state
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
false
,
batch_size
,
...
...
@@ -84,8 +101,8 @@ class GRUUnitKernel : public framework::OpKernel {
frame_size
*
3
);
Eigen
::
array
<
int
,
2
>
c_offsets
({{
0
,
frame_size
*
2
}});
g
.
slice
(
c_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
c_offsets
,
extents
).
tanh
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
)
);
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// calculate final output
...
...
@@ -94,21 +111,37 @@ class GRUUnitKernel : public framework::OpKernel {
};
template
<
typename
Place
,
typename
T
>
class
GRUUnitGradKernel
:
public
framework
::
OpKernel
{
class
GRUUnitGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
>
void
ActGradCompute
(
const
int
act_type
,
const
Device
&
d
,
X
x
,
Y
y
,
DX
dx
,
DY
dy
)
const
{
// x is dummy and won't be used even in Relu(use y instead)
if
(
act_type
==
identity
)
dx
.
device
(
d
)
=
dy
;
else
if
(
act_type
==
sigmoid
)
SigmoidGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
tanh
)
TanhGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
relu
)
ReluGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
i
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
hidden_p
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
w
eight"
);
auto
*
gate
=
context
.
Input
<
Tensor
>
(
"
g
ate"
);
auto
*
reset_hidden_prev
=
context
.
Input
<
Tensor
>
(
"
reset_hidden_p
rev"
);
auto
*
hidden_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"
h
idden"
));
auto
*
input_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
i
nput"
));
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
I
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
HiddenP
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
W
eight"
);
auto
*
gate
=
context
.
Input
<
Tensor
>
(
"
G
ate"
);
auto
*
reset_hidden_prev
=
context
.
Input
<
Tensor
>
(
"
ResetHiddenP
rev"
);
auto
*
hidden_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"
H
idden"
));
auto
*
input_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
I
nput"
));
auto
*
hidden_prev_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
hidden_p
rev"
));
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
HiddenP
rev"
));
auto
*
weight_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
w
eight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
b
ias"
));
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
W
eight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
B
ias"
));
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
@@ -149,11 +182,11 @@ class GRUUnitGradKernel : public framework::OpKernel {
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// backward for unactivated update gate
d_g
.
slice
(
u_offsets
,
extents
).
device
(
place
)
=
d_h
*
(
h_p
-
c
)
*
u
*
(
u
.
constant
(
T
(
1
))
-
u
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
u
,
u
,
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
h_p
-
c
)
);
// backward for unactivated output candidate
d_g
.
slice
(
c_offsets
,
extents
).
device
(
place
)
=
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
*
(
c
.
constant
(
T
(
1
))
-
c
*
c
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
c
,
c
,
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
);
// backward for reset_hidden_prev
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
,
1
,
...
...
@@ -167,8 +200,8 @@ class GRUUnitGradKernel : public framework::OpKernel {
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for unactivated reset gate
d_g
.
slice
(
r_offsets
,
extents
).
device
(
place
)
=
d_r_h_p
*
h_p
*
r
*
(
r
.
constant
(
T
(
1
))
-
r
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
r
,
r
,
d_g
.
slice
(
r_offsets
,
extents
),
d_r_h_p
*
h_p
);
// backward for update_gate_weight and reset_gate_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
...
...
python/paddle/v2/framework/tests/test_gru_unit_op.py
浏览文件 @
1cabdb87
...
...
@@ -4,54 +4,84 @@ import numpy as np
from
op_test
import
OpTest
def
sigmoid_np
(
x
):
class
GRUActivationType
(
OpTest
):
identity
=
0
sigmoid
=
1
tanh
=
2
relu
=
3
def
identity
(
x
):
return
x
def
sigmoid
(
x
):
return
1.
/
(
1.
+
np
.
exp
(
-
x
))
def
tanh_np
(
x
):
return
2.
*
sigmoid_np
(
2.
*
x
)
-
1.
def
tanh
(
x
):
return
2.
*
sigmoid
(
2.
*
x
)
-
1.
def
relu
(
x
):
return
np
.
maximum
(
x
,
0
)
class
TestGRUUnitOp
(
OpTest
):
activate
=
{
GRUActivationType
.
identity
:
identity
,
GRUActivationType
.
sigmoid
:
sigmoid
,
GRUActivationType
.
tanh
:
tanh
,
GRUActivationType
.
relu
:
relu
,
}
def
setUp
(
self
):
batch_size
=
3
frame_size
=
5
self
.
op_type
=
"gru_unit"
self
.
op_type
=
'gru_unit'
self
.
inputs
=
{
'
i
nput'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
*
3
)).
astype
(
"float32"
),
'
hidden_p
rev'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
)).
astype
(
"float32"
),
'
w
eight'
:
np
.
random
.
uniform
(
'
I
nput'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
*
3
)).
astype
(
'float32'
),
'
HiddenP
rev'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
)).
astype
(
'float32'
),
'
W
eight'
:
np
.
random
.
uniform
(
-
1.
/
math
.
sqrt
(
frame_size
),
1.
/
math
.
sqrt
(
frame_size
),
(
frame_size
,
frame_size
*
3
)).
astype
(
"float32"
),
'
b
ias'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
"float32"
)
(
frame_size
,
frame_size
*
3
)).
astype
(
'float32'
),
'
B
ias'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float32'
)
}
x
=
self
.
inputs
[
'input'
]
h_p
=
self
.
inputs
[
'hidden_prev'
]
w
=
self
.
inputs
[
'weight'
]
b
=
self
.
inputs
[
'bias'
]
self
.
attrs
=
{
'activation'
:
GRUActivationType
.
tanh
,
'gate_activation'
:
GRUActivationType
.
sigmoid
}
# GRU calculations
x
=
self
.
inputs
[
'Input'
]
h_p
=
self
.
inputs
[
'HiddenPrev'
]
w
=
self
.
inputs
[
'Weight'
]
b
=
self
.
inputs
[
'Bias'
]
g
=
x
+
np
.
tile
(
b
,
(
batch_size
,
1
))
w_u_r
=
w
.
flatten
()[:
frame_size
*
frame_size
*
2
].
reshape
(
(
frame_size
,
frame_size
*
2
))
u_r
=
sigmoid_np
(
np
.
dot
(
h_p
,
w_u_r
)
+
g
[:,
:
frame_size
*
2
])
u_r
=
self
.
activate
[
self
.
attrs
[
'gate_activation'
]](
np
.
dot
(
h_p
,
w_u_r
)
+
g
[:,
:
frame_size
*
2
])
u
=
u_r
[:,
:
frame_size
]
r
=
u_r
[:,
frame_size
:
frame_size
*
2
]
r_h_p
=
r
*
h_p
w_c
=
w
.
flatten
()[
frame_size
*
frame_size
*
2
:].
reshape
(
(
frame_size
,
frame_size
))
c
=
tanh_np
(
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
c
=
self
.
activate
[
self
.
attrs
[
'activation'
]](
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
g
=
np
.
hstack
((
u_r
,
c
))
h
=
u
*
h_p
+
(
1
-
u
)
*
c
self
.
outputs
=
{
'gate'
:
g
,
'reset_hidden_prev'
:
r_h_p
,
'hidden'
:
h
}
self
.
outputs
=
{
'Gate'
:
g
,
'ResetHiddenPrev'
:
r_h_p
,
'Hidden'
:
h
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
'
input'
,
'hidden_prev'
,
'weight'
,
'bias'
],
[
'h
idden'
],
[
'
Input'
,
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'H
idden'
],
max_relative_error
=
0.007
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录