Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1cabdb87
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1cabdb87
编写于
10月 11, 2017
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine gru_unit_op according to comments to support multiple activation types
上级
0922fca4
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
194 addition
and
121 deletion
+194
-121
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+80
-70
paddle/operators/gru_unit_op.h
paddle/operators/gru_unit_op.h
+64
-31
python/paddle/v2/framework/tests/test_gru_unit_op.py
python/paddle/v2/framework/tests/test_gru_unit_op.py
+50
-20
未找到文件。
paddle/operators/gru_unit_op.cc
浏览文件 @
1cabdb87
...
...
@@ -24,26 +24,26 @@ class GRUUnitOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
Base
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
i
nput"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
i
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
hidden_p
rev"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
w
eight"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
w
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
b
ias"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
b
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
g
ate"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
reset_hidden_p
rev"
),
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
I
nput"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
I
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
HiddenP
rev"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
HiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
W
eight"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
W
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
B
ias"
),
"Input(%s) of GRUUnitOp should not be null."
,
"
B
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
G
ate"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
ResetHiddenP
rev"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
h
idden"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
h
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
i
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
hidden_p
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
w
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
b
ias"
);
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
H
idden"
),
"Output(%s) of GRUUnitOp should not be null."
,
"
H
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
I
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
HiddenP
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
W
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
B
ias"
);
int
batch_size
=
input_dims
[
0
];
int
input_size
=
input_dims
[
1
];
int
frame_size
=
hidden_prev_dims
[
1
];
...
...
@@ -53,54 +53,64 @@ class GRUUnitOp : public framework::OperatorWithKernel {
int
bias_width
=
bias_dims
[
1
];
PADDLE_ENFORCE_EQ
(
input_size
,
frame_size
*
3
,
"The in
n
put_size must be 3 times of frame_size in GRUUnitOp."
);
"The input_size must be 3 times of frame_size in GRUUnitOp."
);
PADDLE_ENFORCE_EQ
(
weight_height
,
frame_size
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
weight_width
,
frame_size
*
3
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_height
,
1
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_width
,
frame_size
*
3
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
ctx
->
SetOutputDim
(
"
g
ate"
,
{
batch_size
,
frame_size
*
3
});
ctx
->
SetOutputDim
(
"
reset_hidden_p
rev"
,
{
batch_size
,
frame_size
});
ctx
->
SetOutputDim
(
"
h
idden"
,
{
batch_size
,
frame_size
});
"The shape of
B
ias must be [1, frame_size * 3]."
);
ctx
->
SetOutputDim
(
"
G
ate"
,
{
batch_size
,
frame_size
*
3
});
ctx
->
SetOutputDim
(
"
ResetHiddenP
rev"
,
{
batch_size
,
frame_size
});
ctx
->
SetOutputDim
(
"
H
idden"
,
{
batch_size
,
frame_size
});
}
};
class
GRUUnitOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
GRUUnitOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
GRUUnitOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"
i
nput"
,
AddInput
(
"
I
nput"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"input."
);
AddInput
(
"
hidden_p
rev"
,
AddInput
(
"
HiddenP
rev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."
);
AddInput
(
"
w
eight"
,
AddInput
(
"
W
eight"
,
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]"
);
AddInput
(
"
b
ias"
,
AddInput
(
"
B
ias"
,
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate."
);
AddOutput
(
"
g
ate"
,
AddOutput
(
"
G
ate"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate"
)
.
AsIntermediate
();
AddOutput
(
"
reset_hidden_p
rev"
,
AddOutput
(
"
ResetHiddenP
rev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"reseted hidden state of previous time step."
)
.
AsIntermediate
();
AddOutput
(
"
h
idden"
,
AddOutput
(
"
H
idden"
,
"(Tensor) The GRU hidden state of the current time step "
"with shape [batch_size, frame_size]."
);
AddAttr
<
int
>
(
"activation"
,
"(enum int, default tanh) "
"The activation type used for output candidate {h}_t."
)
.
SetDefault
(
tanh
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddAttr
<
int
>
(
"gate_activation"
,
"(enum int, default sigmoid) "
"The activation type used in update gate and reset gate."
)
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
...
...
@@ -121,36 +131,36 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
Base
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
i
nput"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
i
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
hidden_p
rev"
),
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
I
nput"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
I
nput"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
HiddenP
rev"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
w
eight"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
w
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
b
ias"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
b
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
g
ate"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
reset_hidden_p
rev"
),
"
HiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
W
eight"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
W
eight"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
B
ias"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
B
ias"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
G
ate"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
ResetHiddenP
rev"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
h
idden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
h
idden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
g
ate"
)),
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
H
idden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"
H
idden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
G
ate"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
g
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
reset_hidden_p
rev"
)),
"
G
ate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
ResetHiddenP
rev"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
reset_hidden_p
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
h
idden"
)),
"
ResetHiddenP
rev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
H
idden"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"
h
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
i
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
hidden_p
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
w
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
b
ias"
);
"
H
idden"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"
I
nput"
);
auto
hidden_prev_dims
=
ctx
->
GetInputDim
(
"
HiddenP
rev"
);
auto
weight_dims
=
ctx
->
GetInputDim
(
"
W
eight"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"
B
ias"
);
// int batch_size = input_dims[0];
int
input_size
=
input_dims
[
1
];
int
frame_size
=
hidden_prev_dims
[
1
];
...
...
@@ -160,27 +170,27 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
int
bias_width
=
bias_dims
[
1
];
PADDLE_ENFORCE_EQ
(
input_size
,
frame_size
*
3
,
"The in
n
put_size must be 3 times of frame_size in GRUUnitOp."
);
"The input_size must be 3 times of frame_size in GRUUnitOp."
);
PADDLE_ENFORCE_EQ
(
weight_height
,
frame_size
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
weight_width
,
frame_size
*
3
,
"The shape of
w
eight matrix must be [frame_size, frame_size * 3]."
);
"The shape of
W
eight matrix must be [frame_size, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_height
,
1
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
PADDLE_ENFORCE_EQ
(
bias_width
,
frame_size
*
3
,
"The shape of
b
ias must be [1, frame_size * 3]."
);
auto
input_grad_name
=
framework
::
GradVarName
(
"
i
nput"
);
"The shape of
B
ias must be [1, frame_size * 3]."
);
auto
input_grad_name
=
framework
::
GradVarName
(
"
I
nput"
);
if
(
ctx
->
HasOutput
(
input_grad_name
))
ctx
->
SetOutputDim
(
input_grad_name
,
input_dims
);
auto
hidden_prev_grad_name
=
framework
::
GradVarName
(
"
hidden_p
rev"
);
auto
hidden_prev_grad_name
=
framework
::
GradVarName
(
"
HiddenP
rev"
);
if
(
ctx
->
HasOutput
(
hidden_prev_grad_name
))
ctx
->
SetOutputDim
(
hidden_prev_grad_name
,
hidden_prev_dims
);
auto
weight_grad_name
=
framework
::
GradVarName
(
"
w
eight"
);
auto
weight_grad_name
=
framework
::
GradVarName
(
"
W
eight"
);
if
(
ctx
->
HasOutput
(
weight_grad_name
))
ctx
->
SetOutputDim
(
weight_grad_name
,
weight_dims
);
auto
bias_grad_name
=
framework
::
GradVarName
(
"
b
ias"
);
auto
bias_grad_name
=
framework
::
GradVarName
(
"
B
ias"
);
if
(
ctx
->
HasOutput
(
bias_grad_name
))
ctx
->
SetOutputDim
(
bias_grad_name
,
bias_dims
);
}
...
...
paddle/operators/gru_unit_op.h
浏览文件 @
1cabdb87
...
...
@@ -14,6 +14,7 @@
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/framework/eigen.h"
...
...
@@ -27,19 +28,35 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
enum
GRUActivationType
{
identity
=
0
,
sigmoid
=
1
,
tanh
=
2
,
relu
=
3
};
template
<
typename
Place
,
typename
T
>
class
GRUUnitKernel
:
public
framework
::
OpKernel
{
class
GRUUnitKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
ActCompute
(
const
int
act_type
,
const
Device
&
d
,
X
x
,
Y
y
)
const
{
if
(
act_type
==
identity
)
y
.
device
(
d
)
=
x
;
else
if
(
act_type
==
sigmoid
)
SigmoidFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
tanh
)
TanhFunctor
<
T
>
()(
d
,
x
,
y
);
else
if
(
act_type
==
relu
)
ReluFunctor
<
T
>
()(
d
,
x
,
y
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
i
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
hidden_p
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
w
eight"
);
auto
*
bias
=
context
.
Input
<
Tensor
>
(
"
b
ias"
);
auto
*
gate
=
context
.
Output
<
Tensor
>
(
"
g
ate"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
I
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
HiddenP
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
W
eight"
);
auto
*
bias
=
context
.
Input
<
Tensor
>
(
"
B
ias"
);
auto
*
gate
=
context
.
Output
<
Tensor
>
(
"
G
ate"
);
gate
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
reset_hidden_prev
=
context
.
Output
<
Tensor
>
(
"
reset_hidden_p
rev"
);
auto
*
reset_hidden_prev
=
context
.
Output
<
Tensor
>
(
"
ResetHiddenP
rev"
);
reset_hidden_prev
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
hidden
=
context
.
Output
<
Tensor
>
(
"
h
idden"
);
auto
*
hidden
=
context
.
Output
<
Tensor
>
(
"
H
idden"
);
hidden
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
batch_size
=
input
->
dims
()[
0
];
...
...
@@ -69,12 +86,12 @@ class GRUUnitKernel : public framework::OpKernel {
// calculate activited gate
Eigen
::
array
<
int
,
2
>
extents
({{
batch_size
,
frame_size
}});
Eigen
::
array
<
int
,
2
>
u_offsets
({{
0
,
0
}});
g
.
slice
(
u_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
u_offsets
,
extents
).
sigmoid
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
)
);
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
({{
0
,
frame_size
}});
g
.
slice
(
r_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
r_offsets
,
extents
).
sigmoid
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
)
);
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
r_h_p
.
device
(
place
)
=
r
*
h_p
;
// reset previous hidden state
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
false
,
batch_size
,
...
...
@@ -84,8 +101,8 @@ class GRUUnitKernel : public framework::OpKernel {
frame_size
*
3
);
Eigen
::
array
<
int
,
2
>
c_offsets
({{
0
,
frame_size
*
2
}});
g
.
slice
(
c_offsets
,
extents
).
device
(
place
)
=
g
.
slice
(
c_offsets
,
extents
).
tanh
(
);
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
)
);
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// calculate final output
...
...
@@ -94,21 +111,37 @@ class GRUUnitKernel : public framework::OpKernel {
};
template
<
typename
Place
,
typename
T
>
class
GRUUnitGradKernel
:
public
framework
::
OpKernel
{
class
GRUUnitGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
DX
,
typename
DY
>
void
ActGradCompute
(
const
int
act_type
,
const
Device
&
d
,
X
x
,
Y
y
,
DX
dx
,
DY
dy
)
const
{
// x is dummy and won't be used even in Relu(use y instead)
if
(
act_type
==
identity
)
dx
.
device
(
d
)
=
dy
;
else
if
(
act_type
==
sigmoid
)
SigmoidGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
tanh
)
TanhGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
if
(
act_type
==
relu
)
ReluGradFunctor
<
T
>
()(
d
,
x
,
y
,
dy
,
dx
);
else
PADDLE_THROW
(
"unsupported activation type"
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
i
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
hidden_p
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
w
eight"
);
auto
*
gate
=
context
.
Input
<
Tensor
>
(
"
g
ate"
);
auto
*
reset_hidden_prev
=
context
.
Input
<
Tensor
>
(
"
reset_hidden_p
rev"
);
auto
*
hidden_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"
h
idden"
));
auto
*
input_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
i
nput"
));
auto
*
input
=
context
.
Input
<
Tensor
>
(
"
I
nput"
);
auto
*
hidden_prev
=
context
.
Input
<
Tensor
>
(
"
HiddenP
rev"
);
auto
*
weight
=
context
.
Input
<
Tensor
>
(
"
W
eight"
);
auto
*
gate
=
context
.
Input
<
Tensor
>
(
"
G
ate"
);
auto
*
reset_hidden_prev
=
context
.
Input
<
Tensor
>
(
"
ResetHiddenP
rev"
);
auto
*
hidden_grad
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"
H
idden"
));
auto
*
input_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
I
nput"
));
auto
*
hidden_prev_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
hidden_p
rev"
));
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
HiddenP
rev"
));
auto
*
weight_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
w
eight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
b
ias"
));
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
W
eight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"
B
ias"
));
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
@@ -149,11 +182,11 @@ class GRUUnitGradKernel : public framework::OpKernel {
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// backward for unactivated update gate
d_g
.
slice
(
u_offsets
,
extents
).
device
(
place
)
=
d_h
*
(
h_p
-
c
)
*
u
*
(
u
.
constant
(
T
(
1
))
-
u
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
u
,
u
,
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
h_p
-
c
)
);
// backward for unactivated output candidate
d_g
.
slice
(
c_offsets
,
extents
).
device
(
place
)
=
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
*
(
c
.
constant
(
T
(
1
))
-
c
*
c
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
c
,
c
,
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
);
// backward for reset_hidden_prev
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
,
1
,
...
...
@@ -167,8 +200,8 @@ class GRUUnitGradKernel : public framework::OpKernel {
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for unactivated reset gate
d_g
.
slice
(
r_offsets
,
extents
).
device
(
place
)
=
d_r_h_p
*
h_p
*
r
*
(
r
.
constant
(
T
(
1
))
-
r
);
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
r
,
r
,
d_g
.
slice
(
r_offsets
,
extents
),
d_r_h_p
*
h_p
);
// backward for update_gate_weight and reset_gate_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
...
...
python/paddle/v2/framework/tests/test_gru_unit_op.py
浏览文件 @
1cabdb87
...
...
@@ -4,54 +4,84 @@ import numpy as np
from
op_test
import
OpTest
def
sigmoid_np
(
x
):
class
GRUActivationType
(
OpTest
):
identity
=
0
sigmoid
=
1
tanh
=
2
relu
=
3
def
identity
(
x
):
return
x
def
sigmoid
(
x
):
return
1.
/
(
1.
+
np
.
exp
(
-
x
))
def
tanh_np
(
x
):
return
2.
*
sigmoid_np
(
2.
*
x
)
-
1.
def
tanh
(
x
):
return
2.
*
sigmoid
(
2.
*
x
)
-
1.
def
relu
(
x
):
return
np
.
maximum
(
x
,
0
)
class
TestGRUUnitOp
(
OpTest
):
activate
=
{
GRUActivationType
.
identity
:
identity
,
GRUActivationType
.
sigmoid
:
sigmoid
,
GRUActivationType
.
tanh
:
tanh
,
GRUActivationType
.
relu
:
relu
,
}
def
setUp
(
self
):
batch_size
=
3
frame_size
=
5
self
.
op_type
=
"gru_unit"
self
.
op_type
=
'gru_unit'
self
.
inputs
=
{
'
i
nput'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
*
3
)).
astype
(
"float32"
),
'
hidden_p
rev'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
)).
astype
(
"float32"
),
'
w
eight'
:
np
.
random
.
uniform
(
'
I
nput'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
*
3
)).
astype
(
'float32'
),
'
HiddenP
rev'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
batch_size
,
frame_size
)).
astype
(
'float32'
),
'
W
eight'
:
np
.
random
.
uniform
(
-
1.
/
math
.
sqrt
(
frame_size
),
1.
/
math
.
sqrt
(
frame_size
),
(
frame_size
,
frame_size
*
3
)).
astype
(
"float32"
),
'
b
ias'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
"float32"
)
(
frame_size
,
frame_size
*
3
)).
astype
(
'float32'
),
'
B
ias'
:
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float32'
)
}
x
=
self
.
inputs
[
'input'
]
h_p
=
self
.
inputs
[
'hidden_prev'
]
w
=
self
.
inputs
[
'weight'
]
b
=
self
.
inputs
[
'bias'
]
self
.
attrs
=
{
'activation'
:
GRUActivationType
.
tanh
,
'gate_activation'
:
GRUActivationType
.
sigmoid
}
# GRU calculations
x
=
self
.
inputs
[
'Input'
]
h_p
=
self
.
inputs
[
'HiddenPrev'
]
w
=
self
.
inputs
[
'Weight'
]
b
=
self
.
inputs
[
'Bias'
]
g
=
x
+
np
.
tile
(
b
,
(
batch_size
,
1
))
w_u_r
=
w
.
flatten
()[:
frame_size
*
frame_size
*
2
].
reshape
(
(
frame_size
,
frame_size
*
2
))
u_r
=
sigmoid_np
(
np
.
dot
(
h_p
,
w_u_r
)
+
g
[:,
:
frame_size
*
2
])
u_r
=
self
.
activate
[
self
.
attrs
[
'gate_activation'
]](
np
.
dot
(
h_p
,
w_u_r
)
+
g
[:,
:
frame_size
*
2
])
u
=
u_r
[:,
:
frame_size
]
r
=
u_r
[:,
frame_size
:
frame_size
*
2
]
r_h_p
=
r
*
h_p
w_c
=
w
.
flatten
()[
frame_size
*
frame_size
*
2
:].
reshape
(
(
frame_size
,
frame_size
))
c
=
tanh_np
(
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
c
=
self
.
activate
[
self
.
attrs
[
'activation'
]](
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
g
=
np
.
hstack
((
u_r
,
c
))
h
=
u
*
h_p
+
(
1
-
u
)
*
c
self
.
outputs
=
{
'gate'
:
g
,
'reset_hidden_prev'
:
r_h_p
,
'hidden'
:
h
}
self
.
outputs
=
{
'Gate'
:
g
,
'ResetHiddenPrev'
:
r_h_p
,
'Hidden'
:
h
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
'
input'
,
'hidden_prev'
,
'weight'
,
'bias'
],
[
'h
idden'
],
[
'
Input'
,
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'H
idden'
],
max_relative_error
=
0.007
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录