Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
085260f3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
085260f3
编写于
11月 27, 2020
作者:
J
Jack Zhou
提交者:
GitHub
11月 27, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add eigen gru and fix the dropout bug in the rnn
Add eigen gru and fix the dropout bug in the rnn
上级
545df287
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
346 addition
and
267 deletion
+346
-267
paddle/fluid/operators/math/detail/gru_cpu_kernel.h
paddle/fluid/operators/math/detail/gru_cpu_kernel.h
+140
-38
paddle/fluid/operators/math/gru_compute.cc
paddle/fluid/operators/math/gru_compute.cc
+49
-5
paddle/fluid/operators/math/gru_compute.h
paddle/fluid/operators/math/gru_compute.h
+1
-1
paddle/fluid/operators/rnn_op.h
paddle/fluid/operators/rnn_op.h
+148
-217
python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py
python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py
+4
-4
python/paddle/fluid/tests/unittests/test_rnn_op.py
python/paddle/fluid/tests/unittests/test_rnn_op.py
+4
-2
未找到文件。
paddle/fluid/operators/math/detail/gru_cpu_kernel.h
浏览文件 @
085260f3
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h"
...
...
@@ -21,6 +23,10 @@ namespace paddle {
namespace
operators
{
namespace
math
{
namespace
detail
{
using
Array1
=
Eigen
::
DSizes
<
int64_t
,
1
>
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
#ifndef __NVCC__
...
...
@@ -242,23 +248,46 @@ void hl_avx_gru_forward_final_output(OpFinalOutput op_final_output,
#endif
}
template
<
typename
T
>
inline
void
forward_reset_outputV2
(
const
platform
::
CPUDeviceContext
&
context
,
GRUMetaValue
<
T
>
value
,
int
frame_size
)
{
auto
&
place
=
*
context
.
eigen_device
();
auto
value_reset_gate
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
,
Array1
(
frame_size
));
auto
value_update_gate
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
+
frame_size
,
Array1
(
frame_size
));
auto
value_reset_output
=
typename
EigenVector
<
T
>::
Type
(
value
.
reset_output_value
,
Array1
(
frame_size
));
auto
value_reset_bias
=
typename
EigenVector
<
T
>::
ConstType
(
value
.
reset_bias
,
Array1
(
frame_size
));
SigmoidFunctor
<
T
>
()(
place
,
value_reset_gate
,
value_reset_gate
);
SigmoidFunctor
<
T
>
()(
place
,
value_update_gate
,
value_update_gate
);
value_reset_output
.
device
(
place
)
=
(
value_reset_output
+
value_reset_bias
)
*
value_reset_gate
;
}
template
<
class
OpResetOutput
,
typename
T
>
inline
void
forward_reset_output
(
OpResetOutput
op_reset_output
,
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
ActivationType
active_gat
e
,
bool
old_version
=
true
)
{
inline
void
forward_reset_output
(
OpResetOutput
op_reset_output
,
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
ActivationType
active_gate
,
bool
old_version
=
tru
e
,
const
platform
::
CPUDeviceContext
*
context
=
nullptr
)
{
for
(
int
b
=
0
;
b
<
batch_size
;
b
++
)
{
if
(
OpResetOutput
::
avx
&&
(
frame_size
>
static_cast
<
int
>
(
8
-
1
))
&&
(
sizeof
(
T
)
==
4
))
{
hl_avx_gru_forward_reset_output
(
op_reset_output
,
value
.
gate_value
,
value
.
reset_output_value
,
value
.
prev_out_value
,
frame_size
,
active_gate
,
old_version
,
value
.
reset_bias
);
if
(
!
old_version
)
{
// use eigen
forward_reset_outputV2
(
*
context
,
value
,
frame_size
);
}
else
{
hl_naive_gru_forward_reset_output
(
op_reset_output
,
value
.
gate_value
,
value
.
reset_output_value
,
value
.
prev_out_value
,
frame_size
,
active_gate
,
old_version
,
value
.
reset_bias
);
if
(
OpResetOutput
::
avx
&&
(
frame_size
&
static_cast
<
int
>
(
8
-
1
))
&&
(
sizeof
(
T
)
==
4
))
{
hl_avx_gru_forward_reset_output
(
op_reset_output
,
value
.
gate_value
,
value
.
reset_output_value
,
value
.
prev_out_value
,
frame_size
,
active_gate
,
old_version
,
value
.
reset_bias
);
}
else
{
hl_naive_gru_forward_reset_output
(
op_reset_output
,
value
.
gate_value
,
value
.
reset_output_value
,
value
.
prev_out_value
,
frame_size
,
active_gate
,
old_version
,
value
.
reset_bias
);
}
}
value
.
gate_value
+=
frame_size
*
3
;
value
.
reset_output_value
+=
frame_size
;
...
...
@@ -268,25 +297,51 @@ inline void forward_reset_output(OpResetOutput op_reset_output,
}
}
template
<
typename
T
>
inline
void
forward_final_outputV2
(
const
platform
::
CPUDeviceContext
&
context
,
GRUMetaValue
<
T
>
value
,
int
frame_size
)
{
auto
&
place
=
*
context
.
eigen_device
();
auto
value_update_gate
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
+
frame_size
,
Array1
(
frame_size
));
auto
value_frame_state
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
+
2
*
frame_size
,
Array1
(
frame_size
));
auto
value_output
=
typename
EigenVector
<
T
>::
Type
(
value
.
output_value
,
Array1
(
frame_size
));
TanhFunctor
<
T
>
()(
place
,
value_frame_state
,
value_frame_state
);
value_output
.
device
(
place
)
=
(
static_cast
<
T
>
(
1.0
)
-
value_update_gate
)
*
value_frame_state
;
if
(
value
.
prev_out_value
)
{
auto
value_prev_out
=
typename
EigenVector
<
T
>::
ConstType
(
value
.
prev_out_value
,
Array1
(
frame_size
));
value_output
.
device
(
place
)
=
value_output
+
value_update_gate
*
value_prev_out
;
}
}
template
<
class
OpFinalOutput
,
typename
T
>
inline
void
forward_final_output
(
OpFinalOutput
op_final_output
,
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
ActivationType
active_node
,
bool
origin_mode
,
bool
old_version
=
true
)
{
inline
void
forward_final_output
(
OpFinalOutput
op_final_output
,
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
ActivationType
active_node
,
bool
origin_mode
,
bool
old_version
=
true
,
const
platform
::
CPUDeviceContext
*
context
=
nullptr
)
{
for
(
int
b
=
0
;
b
<
batch_size
;
b
++
)
{
if
(
OpFinalOutput
::
avx
&&
(
frame_size
>
static_cast
<
int
>
(
8
-
1
))
&&
(
sizeof
(
T
)
==
4
))
{
hl_avx_gru_forward_final_output
(
op_final_output
,
value
.
gate_value
,
value
.
prev_out_value
,
value
.
output_value
,
frame_size
,
active_node
,
origin_mode
,
old_version
);
if
(
!
old_version
)
{
// eigen
forward_final_outputV2
(
*
context
,
value
,
frame_size
);
}
else
{
hl_naive_gru_forward_final_output
(
op_final_output
,
value
.
gate_value
,
if
(
OpFinalOutput
::
avx
&&
(
frame_size
&
static_cast
<
int
>
(
8
-
1
))
&&
(
sizeof
(
T
)
==
4
))
{
hl_avx_gru_forward_final_output
(
op_final_output
,
value
.
gate_value
,
value
.
prev_out_value
,
value
.
output_value
,
frame_size
,
active_node
,
origin_mode
,
old_version
);
}
else
{
hl_naive_gru_forward_final_output
(
op_final_output
,
value
.
gate_value
,
value
.
prev_out_value
,
value
.
output_value
,
frame_size
,
active_node
,
origin_mode
,
old_version
);
}
}
value
.
gate_value
+=
frame_size
*
3
;
value
.
output_value
+=
frame_size
;
if
(
value
.
prev_out_value
)
{
...
...
@@ -664,23 +719,70 @@ inline void backward_reset_grad(OpResetGrad op_reset_grad,
}
}
template
<
typename
T
>
inline
void
gru_backward
(
const
platform
::
CPUDeviceContext
&
context
,
GRUMetaValue
<
T
>
value
,
GRUMetaGrad
<
T
>
grad
,
int
frame_size
)
{
auto
&
place
=
*
context
.
eigen_device
();
auto
value_reset_gate
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
,
Array1
(
frame_size
));
auto
grad_reset_gate
=
typename
EigenVector
<
T
>::
Type
(
grad
.
gate_grad
,
Array1
(
frame_size
));
auto
value_update_gate
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
+
frame_size
,
Array1
(
frame_size
));
auto
grad_update_gate
=
typename
EigenVector
<
T
>::
Type
(
grad
.
gate_grad
+
frame_size
,
Array1
(
frame_size
));
auto
value_frame_state
=
typename
EigenVector
<
T
>::
Type
(
value
.
gate_value
+
frame_size
*
2
,
Array1
(
frame_size
));
auto
grad_frame_state
=
typename
EigenVector
<
T
>::
Type
(
grad
.
gate_grad
+
frame_size
*
2
,
Array1
(
frame_size
));
auto
grad_output
=
typename
EigenVector
<
T
>::
Type
(
grad
.
output_grad
,
Array1
(
frame_size
));
auto
value_reset_output
=
typename
EigenVector
<
T
>::
Type
(
value
.
reset_output_value
,
Array1
(
frame_size
));
auto
grad_reset_output
=
typename
EigenVector
<
T
>::
Type
(
grad
.
reset_output_grad
,
Array1
(
frame_size
));
if
(
value
.
prev_out_value
)
{
auto
value_prev_out
=
typename
EigenVector
<
T
>::
ConstType
(
value
.
prev_out_value
,
Array1
(
frame_size
));
SigmoidGradFunctor
<
T
>
()(
place
,
1
/*useless*/
,
value_update_gate
,
(
value_prev_out
-
value_frame_state
)
*
grad_output
,
grad_update_gate
);
}
else
{
SigmoidGradFunctor
<
T
>
()(
place
,
1
/*useless*/
,
value_update_gate
,
static_cast
<
T
>
(
-
1
)
*
value_frame_state
*
grad_output
,
grad_update_gate
);
}
if
(
grad
.
prev_out_grad
)
{
auto
grad_prev_out
=
typename
EigenVector
<
T
>::
Type
(
grad
.
prev_out_grad
,
Array1
(
frame_size
));
grad_prev_out
.
device
(
place
)
=
grad_prev_out
+
grad_output
*
value_update_gate
;
}
TanhGradFunctor
<
T
>
()(
place
,
1
/*useless*/
,
value_frame_state
,
grad_output
*
(
static_cast
<
T
>
(
1.0
)
-
value_update_gate
),
grad_frame_state
);
SigmoidGradFunctor
<
T
>
()(
place
,
1
/*useless*/
,
value_reset_gate
,
value_reset_output
/
value_reset_gate
*
grad_frame_state
,
grad_reset_gate
);
if
(
value
.
prev_out_value
&&
grad
.
prev_out_grad
)
{
grad_reset_output
.
device
(
place
)
=
value_reset_gate
*
grad_frame_state
;
}
}
template
<
class
OpGruGrad
,
typename
T
>
inline
void
cpu_gru_backward
(
OpGruGrad
op_gru_grad
,
GRUMetaValue
<
T
>
value
,
inline
void
cpu_gru_backward
(
const
platform
::
CPUDeviceContext
&
context
,
OpGruGrad
op_gru_grad
,
GRUMetaValue
<
T
>
value
,
GRUMetaGrad
<
T
>
grad
,
int
frame_size
,
int
batch_size
,
ActivationType
active_node
,
ActivationType
active_gate
)
{
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
if
(
OpGruGrad
::
avx
&&
!
(
frame_size
&
(
8
-
1
))
&&
(
sizeof
(
T
)
==
4
))
{
hl_avx_gru_backward
(
op_gru_grad
,
value
.
gate_value
,
grad
.
gate_grad
,
value
.
prev_out_value
,
grad
.
prev_out_grad
,
value
.
reset_output_value
,
grad
.
reset_output_grad
,
grad
.
output_grad
,
frame_size
,
active_node
,
active_gate
);
}
else
{
hl_naive_gru_backward
(
op_gru_grad
,
value
.
gate_value
,
grad
.
gate_grad
,
value
.
prev_out_value
,
grad
.
prev_out_grad
,
value
.
reset_output_value
,
grad
.
reset_output_grad
,
grad
.
output_grad
,
frame_size
,
active_node
,
active_gate
);
}
// eigen
gru_backward
(
context
,
value
,
grad
,
frame_size
);
value
.
gate_value
+=
frame_size
*
3
;
value
.
reset_output_value
+=
frame_size
;
...
...
paddle/fluid/operators/math/gru_compute.cc
浏览文件 @
085260f3
...
...
@@ -42,7 +42,8 @@ struct GRUUnitFunctor<platform::CPUDeviceContext, T> {
}
detail
::
forward_reset_output
(
detail
::
forward
::
gru_resetOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_gate
);
frame_size
,
batch_size
,
active_gate
,
true
,
&
context
);
if
(
value
.
prev_out_value
)
{
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
...
...
@@ -53,7 +54,7 @@ struct GRUUnitFunctor<platform::CPUDeviceContext, T> {
detail
::
forward_final_output
(
detail
::
forward
::
gru_finalOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_node
,
origin_mode
);
origin_mode
,
&
context
);
#endif
}
};
...
...
@@ -116,7 +117,8 @@ struct GRUUnitFunctorV2<platform::CPUDeviceContext, T> {
value
.
reset_output_value
);
}
detail
::
forward_reset_output
(
detail
::
forward
::
gru_resetOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_gate
,
false
);
frame_size
,
batch_size
,
active_gate
,
false
,
&
context
);
T
*
cell_state_value
=
value
.
gate_value
+
2
*
frame_size
;
T
*
reset_output_value
=
value
.
reset_output_value
;
...
...
@@ -129,7 +131,7 @@ struct GRUUnitFunctorV2<platform::CPUDeviceContext, T> {
detail
::
forward_final_output
(
detail
::
forward
::
gru_finalOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_node
,
true
,
false
);
false
,
&
context
);
#endif
}
};
...
...
@@ -144,8 +146,50 @@ struct GRUUnitGradFunctorV2<platform::CPUDeviceContext, T> {
#ifndef __NVCC__
// calculate grad_update_gate, grad_frame_state,
// grad_reset_output, grad_reset_gate
detail
::
cpu_gru_backward
(
detail
::
backward
::
gru
<
T
>
(),
value
,
grad
,
detail
::
cpu_gru_backward
(
context
,
detail
::
backward
::
gru
<
T
>
(),
value
,
grad
,
frame_size
,
batch_size
,
active_node
,
active_gate
);
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
if
(
grad
.
prev_out_grad
&&
value
.
prev_out_value
)
{
// update prev_out_grad
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
gate_grad
,
frame_size
*
3
,
value
.
gate_weight
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
gate_grad
+
frame_size
,
frame_size
*
3
,
value
.
gate_weight
+
frame_size
*
frame_size
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
reset_output_grad
,
frame_size
,
value
.
state_weight
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
// update weight_hh_grad
if
(
grad
.
gate_weight_grad
)
{
// reset gate
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
gate_grad
,
frame_size
*
3
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
gate_weight_grad
,
frame_size
);
// update gate
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
gate_grad
+
frame_size
,
frame_size
*
3
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
gate_weight_grad
+
frame_size
*
frame_size
,
frame_size
);
// cell state
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
reset_output_grad
,
frame_size
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
state_weight_grad
,
frame_size
);
}
}
// update bias_hh_grad
T
*
gate_grad
=
grad
.
gate_grad
;
T
*
bias_hh_grad
=
grad
.
bias_hh_grad
;
T
*
state_bias_grad
=
grad
.
bias_hh_grad
+
2
*
frame_size
;
T
*
reset_output_grad
=
grad
.
reset_output_grad
;
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
blas
.
VADD
(
2
*
frame_size
,
bias_hh_grad
,
gate_grad
,
bias_hh_grad
);
blas
.
VADD
(
frame_size
,
state_bias_grad
,
reset_output_grad
,
state_bias_grad
);
gate_grad
+=
3
*
frame_size
;
reset_output_grad
+=
frame_size
;
}
#endif
}
};
...
...
paddle/fluid/operators/math/gru_compute.h
浏览文件 @
085260f3
...
...
@@ -38,7 +38,7 @@ struct GRUMetaGrad {
T
*
reset_output_grad
;
T
*
output_grad
;
T
*
prev_out_grad
;
T
*
state_bias
_grad
;
T
*
bias_hh
_grad
;
};
template
<
typename
DeviceContext
,
typename
T
>
...
...
paddle/fluid/operators/rnn_op.h
浏览文件 @
085260f3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py
浏览文件 @
085260f3
...
...
@@ -294,7 +294,6 @@ def unstack(array, axis=0):
def
dropout
(
array
,
p
=
0.5
):
if
p
==
0.0
:
return
array
mask
=
(
np
.
random
.
uniform
(
size
=
array
.
shape
)
<
(
1
-
p
)).
astype
(
array
.
dtype
)
return
array
*
(
mask
/
(
1
-
p
))
...
...
@@ -390,11 +389,12 @@ class RNNMixin(LayerListMixin):
states
=
split_states
(
initial_states
,
self
.
num_directions
==
2
,
self
.
state_components
)
final_states
=
[]
input_temp
=
inputs
for
i
,
rnn_layer
in
enumerate
(
self
):
if
i
>
0
:
inputs
=
dropout
(
inputs
,
self
.
dropout
)
outputs
,
final_state
=
rnn_layer
(
inputs
,
states
[
i
],
sequence_length
)
input_temp
=
dropout
(
inputs
,
self
.
dropout
)
outputs
,
final_state
=
rnn_layer
(
input_temp
,
states
[
i
],
sequence_length
)
final_states
.
append
(
final_state
)
inputs
=
outputs
...
...
python/paddle/fluid/tests/unittests/test_rnn_op.py
浏览文件 @
085260f3
...
...
@@ -53,6 +53,7 @@ class TestRNNOp(OpTest):
self
.
is_bidirec
=
False
self
.
mode
=
"LSTM"
self
.
is_test
=
False
self
.
dropout
=
0.0
self
.
set_attrs
()
self
.
direction_num
=
2
if
self
.
is_bidirec
else
1
...
...
@@ -76,7 +77,8 @@ class TestRNNOp(OpTest):
hidden_size
,
num_layers
=
self
.
num_layers
,
time_major
=
True
,
direction
=
direction
)
direction
=
direction
,
dropout
=
self
.
dropout
)
flat_w
=
get_params_for_net
(
rnn1
)
output
,
(
last_hidden
,
last_cell
)
=
rnn1
(
...
...
@@ -101,7 +103,7 @@ class TestRNNOp(OpTest):
'PreState'
:
[(
'init_h'
,
init_h
),
(
'init_c'
,
init_c
)],
}
self
.
attrs
=
{
'dropout_prob'
:
0.0
,
'dropout_prob'
:
self
.
dropout
,
'is_bidirec'
:
self
.
is_bidirec
,
'input_size'
:
input_size
,
'hidden_size'
:
hidden_size
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录