Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
93acef3b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
93acef3b
编写于
12月 08, 2022
作者:
L
lugimzzz
提交者:
GitHub
12月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【fluid clean】remove fluid.dygraph.rnn.lstmcell and fluid.dygraph.rnn.grucell (#48719)
上级
2bba3e18
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
0 addition
and
1051 deletion
+0
-1051
python/paddle/fluid/dygraph/__init__.py
python/paddle/fluid/dygraph/__init__.py
+0
-4
python/paddle/fluid/dygraph/rnn.py
python/paddle/fluid/dygraph/rnn.py
+0
-509
python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
+0
-256
python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
+0
-278
tools/parallel_UT_rule.py
tools/parallel_UT_rule.py
+0
-4
未找到文件。
python/paddle/fluid/dygraph/__init__.py
浏览文件 @
93acef3b
...
...
@@ -43,9 +43,6 @@ from .io import *
from
.
import
static_runner
from
.static_runner
import
StaticModelRunner
from
.
import
rnn
from
.rnn
import
*
from
.
import
amp
from
.amp
import
*
...
...
@@ -60,5 +57,4 @@ __all__ += parallel.__all__
__all__
+=
checkpoint
.
__all__
__all__
+=
learning_rate_scheduler
.
__all__
__all__
+=
io
.
__all__
__all__
+=
rnn
.
__all__
__all__
+=
amp
.
__all__
python/paddle/fluid/dygraph/rnn.py
已删除
100644 → 0
浏览文件 @
2bba3e18
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
from
.
import
Layer
from
..layers
import
(
concat
,
fill_constant
,
elementwise_mul
,
split
,
)
import
copy
import
paddle
__all__
=
[
'LSTMCell'
,
'GRUCell'
]
class
LSTMCell
(
Layer
):
r
"""
LSTMCell implementation using basic operators.
There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation.
The algorithm can be described as the equations below.
.. math::
i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i)
f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f)
o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o)
\\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c)
c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t}
h_t &= o_t \\odot tanh(c_t)
The other LSTMCell version is compatible with the BasicLSTMUnit used in static graph.
The algorithm can be described as the equations below.
.. math::
i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
\\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t}
h_t &= o_t \\odot tanh(c_t)
Args:
hidden_size (integer): The hidden size used in the Cell.
input_size (integer): The input size used in the Cell.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, LSTMCell will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of LSTMCell.
If it is set to None or one attribute of ParamAttr, LSTMCell will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized as zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cells (actNode).
Default: 'fluid.layers.tanh'
forget_bias(float|1.0): forget bias used when computing forget gate. This
is not used in default LSTMCell implementation (CUDNN compatiable)
use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell
dtype(string): data type used in this cell
Returns:
None
Examples:
.. code-block:: python
from paddle import fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph import LSTMCell
import numpy as np
batch_size = 64
input_size = 128
hidden_size = 256
step_input_np = np.random.uniform(-0.1, 0.1, (
batch_size, input_size)).astype('float64')
pre_hidden_np = np.random.uniform(-0.1, 0.1, (
batch_size, hidden_size)).astype('float64')
pre_cell_np = np.random.uniform(-0.1, 0.1, (
batch_size, hidden_size)).astype('float64')
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
with fluid.dygraph.guard(place):
cudnn_lstm = LSTMCell(hidden_size, input_size)
step_input_var = fluid.dygraph.to_variable(step_input_np)
pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np)
pre_cell_var = fluid.dygraph.to_variable(pre_cell_np)
new_hidden, new_cell = cudnn_lstm(step_input_var, pre_hidden_var, pre_cell_var)
"""
def
__init__
(
self
,
hidden_size
,
input_size
,
param_attr
=
None
,
bias_attr
=
None
,
gate_activation
=
None
,
activation
=
None
,
forget_bias
=
1.0
,
use_cudnn_impl
=
True
,
dtype
=
'float64'
,
):
super
().
__init__
(
dtype
)
self
.
_hidden_size
=
hidden_size
self
.
_input_size
=
input_size
self
.
_param_attr
=
param_attr
self
.
_bias_attr
=
bias_attr
self
.
_dtype
=
dtype
self
.
_gate_activation
=
gate_activation
or
paddle
.
nn
.
functional
.
sigmoid
self
.
_activation
=
activation
or
paddle
.
tanh
self
.
_use_cudnn_impl
=
use_cudnn_impl
if
self
.
_use_cudnn_impl
:
if
(
self
.
_param_attr
is
not
None
and
self
.
_param_attr
.
name
is
not
None
):
weight_ih_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
weight_hh_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
weight_ih_param_attr
.
name
+=
"_weight_ih"
weight_hh_param_attr
.
name
+=
"_weight_hh"
else
:
weight_ih_param_attr
=
self
.
_param_attr
weight_hh_param_attr
=
self
.
_param_attr
if
self
.
_bias_attr
is
not
None
and
self
.
_bias_attr
.
name
is
not
None
:
bias_ih_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
bias_hh_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
bias_ih_param_attr
.
name
+=
"_bias_ih"
bias_hh_param_attr
.
name
+=
"_bias_hh"
else
:
bias_ih_param_attr
=
self
.
_bias_attr
bias_hh_param_attr
=
self
.
_bias_attr
self
.
_weight_ih
=
self
.
create_parameter
(
attr
=
weight_ih_param_attr
,
shape
=
[
4
*
self
.
_hidden_size
,
self
.
_input_size
],
dtype
=
self
.
_dtype
,
)
self
.
_weight_hh
=
self
.
create_parameter
(
attr
=
weight_hh_param_attr
,
shape
=
[
4
*
self
.
_hidden_size
,
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
)
self
.
_bias_ih
=
self
.
create_parameter
(
attr
=
bias_ih_param_attr
,
shape
=
[
4
*
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
is_bias
=
True
,
)
self
.
_bias_hh
=
self
.
create_parameter
(
attr
=
bias_hh_param_attr
,
shape
=
[
4
*
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
is_bias
=
True
,
)
else
:
self
.
_forget_bias
=
fill_constant
(
[
1
],
dtype
=
dtype
,
value
=
forget_bias
)
self
.
_forget_bias
.
stop_gradient
=
False
self
.
_weight
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
[
self
.
_input_size
+
self
.
_hidden_size
,
4
*
self
.
_hidden_size
,
],
dtype
=
dtype
,
)
self
.
_bias
=
self
.
create_parameter
(
attr
=
self
.
_bias_attr
,
shape
=
[
4
*
self
.
_hidden_size
],
dtype
=
dtype
,
is_bias
=
True
,
)
def
forward
(
self
,
input
,
pre_hidden
,
pre_cell
):
if
self
.
_use_cudnn_impl
:
igates
=
paddle
.
matmul
(
input
,
y
=
self
.
_weight_ih
,
transpose_y
=
True
)
igates
=
paddle
.
add
(
igates
,
self
.
_bias_ih
)
hgates
=
paddle
.
matmul
(
pre_hidden
,
self
.
_weight_hh
,
transpose_y
=
True
)
hgates
=
paddle
.
add
(
hgates
,
self
.
_bias_hh
)
chunked_igates
=
split
(
igates
,
num_or_sections
=
4
,
dim
=
1
)
chunked_hgates
=
split
(
hgates
,
num_or_sections
=
4
,
dim
=
1
)
ingate
=
paddle
.
add
(
chunked_igates
[
0
],
chunked_hgates
[
0
])
ingate
=
self
.
_gate_activation
(
ingate
)
forgetgate
=
paddle
.
add
(
chunked_igates
[
1
],
chunked_hgates
[
1
])
forgetgate
=
self
.
_gate_activation
(
forgetgate
)
cellgate
=
paddle
.
add
(
chunked_igates
[
2
],
chunked_hgates
[
2
])
cellgate
=
self
.
_activation
(
cellgate
)
outgate
=
paddle
.
add
(
chunked_igates
[
3
],
chunked_hgates
[
3
])
outgate
=
self
.
_gate_activation
(
outgate
)
new_cell
=
(
forgetgate
*
pre_cell
)
+
(
ingate
*
cellgate
)
new_hidden
=
outgate
*
self
.
_activation
(
new_cell
)
else
:
concat_input_hidden
=
concat
([
input
,
pre_hidden
],
1
)
gate_input
=
paddle
.
matmul
(
x
=
concat_input_hidden
,
y
=
self
.
_weight
)
gate_input
=
paddle
.
add
(
gate_input
,
self
.
_bias
)
i
,
j
,
f
,
o
=
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
new_cell
=
paddle
.
add
(
paddle
.
multiply
(
pre_cell
,
self
.
_gate_activation
(
paddle
.
add
(
f
,
self
.
_forget_bias
)),
),
paddle
.
multiply
(
paddle
.
nn
.
functional
.
sigmoid
(
i
),
paddle
.
tanh
(
j
)
),
)
new_hidden
=
self
.
_activation
(
new_cell
)
*
self
.
_gate_activation
(
o
)
return
new_hidden
,
new_cell
class
GRUCell
(
Layer
):
r
"""
GRU implementation using basic operators.
There are two GRUCell version, the default one is compatible with CUDNN GRU implementation.
The algorithm can be described as the equations below.
.. math::
u_t & = sigmoid(W_{ux} x_{t} + b_ux + W_{uh} h_{t-1} + b_uh)
r_t & = sigmoid(W_{rx} x_{t} + b_rx + W_{rh} h_{t-1} + b_rh)
\\tilde{h_{t}} & = tanh(W_{cx} x_{t} + b_cx + r_t \\odot (W_{ch} h_{t-1} + b_ch))
h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}}
The other LSTMCell version is compatible with the BasicGRUUnit used in static graph.
The algorithm can be described as the equations below.
.. math::
u_t & = sigmoid(W_{ux} x_{t} + W_{uh} h_{t-1} + b_u)
r_t & = sigmoid(W_{rx} x_{t} + W_{rh} h_{t-1} + b_r)
\\tilde{h_{t}} & = tanh(W_{cx} x_{t} + W_{ch} \\odot(r_t, h_{t-1}) + b_m)
h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}}
Args:
hidden_size (integer): The hidden size used in the Cell.
input_size (integer): The input size used in the Cell.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, GRUCell will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of GRUCell.
If it is set to None or one attribute of ParamAttr, GRUCell will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cell (actNode).
Default: 'fluid.layers.tanh'
use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell
dtype(string): data type used in this cell
Returns:
None
Examples:
.. code-block:: python
from paddle import fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph import GRUCell
import numpy as np
batch_size = 64
input_size = 128
hidden_size = 256
step_input_np = np.random.uniform(-0.1, 0.1, (
batch_size, input_size)).astype('float64')
pre_hidden_np = np.random.uniform(-0.1, 0.1, (
batch_size, hidden_size)).astype('float64')
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
with fluid.dygraph.guard(place):
cudnn_gru = GRUCell(hidden_size, input_size)
step_input_var = fluid.dygraph.to_variable(step_input_np)
pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np)
"""
def
__init__
(
self
,
hidden_size
,
input_size
,
param_attr
=
None
,
bias_attr
=
None
,
gate_activation
=
None
,
activation
=
None
,
use_cudnn_impl
=
True
,
dtype
=
'float64'
,
):
super
().
__init__
()
self
.
_hidden_size
=
hidden_size
self
.
_input_size
=
input_size
self
.
_param_attr
=
param_attr
self
.
_bias_attr
=
bias_attr
self
.
_dtype
=
dtype
self
.
_gate_activation
=
gate_activation
or
paddle
.
nn
.
functional
.
sigmoid
self
.
_activation
=
activation
or
paddle
.
tanh
self
.
_use_cudnn_impl
=
use_cudnn_impl
if
self
.
_use_cudnn_impl
:
if
(
self
.
_param_attr
is
not
None
and
self
.
_param_attr
.
name
is
not
None
):
weight_ih_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
weight_hh_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
weight_ih_param_attr
.
name
+=
"_weight_ih"
weight_hh_param_attr
.
name
+=
"_weight_hh"
else
:
weight_ih_param_attr
=
self
.
_param_attr
weight_hh_param_attr
=
self
.
_param_attr
if
self
.
_bias_attr
is
not
None
and
self
.
_bias_attr
.
name
is
not
None
:
bias_ih_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
bias_hh_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
bias_ih_param_attr
.
name
+=
"_bias_ih"
bias_hh_param_attr
.
name
+=
"_bias_hh"
else
:
bias_ih_param_attr
=
self
.
_bias_attr
bias_hh_param_attr
=
self
.
_bias_attr
self
.
_weight_ih
=
self
.
create_parameter
(
attr
=
weight_ih_param_attr
,
shape
=
[
3
*
self
.
_hidden_size
,
self
.
_input_size
],
dtype
=
self
.
_dtype
,
)
self
.
_weight_hh
=
self
.
create_parameter
(
attr
=
weight_hh_param_attr
,
shape
=
[
3
*
self
.
_hidden_size
,
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
)
self
.
_bias_ih
=
self
.
create_parameter
(
attr
=
bias_ih_param_attr
,
shape
=
[
3
*
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
is_bias
=
True
,
)
self
.
_bias_hh
=
self
.
create_parameter
(
attr
=
bias_hh_param_attr
,
shape
=
[
3
*
self
.
_hidden_size
],
dtype
=
self
.
_dtype
,
is_bias
=
True
,
)
else
:
if
(
self
.
_param_attr
is
not
None
and
self
.
_param_attr
.
name
is
not
None
):
gate_weight_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
candidate_weight_param_attr
=
copy
.
deepcopy
(
self
.
_param_attr
)
gate_weight_param_attr
.
name
+=
"_gate_weight"
candidate_weight_param_attr
.
name
+=
"_candidate_weight"
else
:
gate_weight_param_attr
=
self
.
_param_attr
candidate_weight_param_attr
=
self
.
_param_attr
if
self
.
_bias_attr
is
not
None
and
self
.
_bias_attr
.
name
is
not
None
:
gate_bias_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
candidate_bias_param_attr
=
copy
.
deepcopy
(
self
.
_bias_attr
)
gate_bias_param_attr
.
name
+=
"_gate_bias"
candidate_bias_param_attr
.
name
+=
"_candidate_bias"
else
:
gate_bias_param_attr
=
self
.
_bias_attr
candidate_bias_param_attr
=
self
.
_bias_attr
self
.
_gate_weight
=
self
.
create_parameter
(
attr
=
gate_weight_param_attr
,
shape
=
[
self
.
_input_size
+
self
.
_hidden_size
,
2
*
self
.
_hidden_size
,
],
dtype
=
dtype
,
)
self
.
_candidate_weight
=
self
.
create_parameter
(
attr
=
candidate_weight_param_attr
,
shape
=
[
self
.
_input_size
+
self
.
_hidden_size
,
self
.
_hidden_size
],
dtype
=
dtype
,
)
self
.
_gate_bias
=
self
.
create_parameter
(
attr
=
gate_bias_param_attr
,
shape
=
[
2
*
self
.
_hidden_size
],
dtype
=
dtype
,
is_bias
=
True
,
)
self
.
_candidate_bias
=
self
.
create_parameter
(
attr
=
candidate_bias_param_attr
,
shape
=
[
self
.
_hidden_size
],
dtype
=
dtype
,
is_bias
=
True
,
)
def
forward
(
self
,
input
,
pre_hidden
):
if
self
.
_use_cudnn_impl
:
igates
=
paddle
.
matmul
(
input
,
y
=
self
.
_weight_ih
,
transpose_y
=
True
)
igates
=
paddle
.
add
(
igates
,
self
.
_bias_ih
)
hgates
=
paddle
.
matmul
(
pre_hidden
,
self
.
_weight_hh
,
transpose_y
=
True
)
hgates
=
paddle
.
add
(
hgates
,
self
.
_bias_hh
)
chunked_igates
=
split
(
igates
,
num_or_sections
=
3
,
dim
=
1
)
chunked_hgates
=
split
(
hgates
,
num_or_sections
=
3
,
dim
=
1
)
reset_gate
=
paddle
.
add
(
chunked_igates
[
0
],
chunked_hgates
[
0
])
reset_gate
=
self
.
_gate_activation
(
reset_gate
)
input_gate
=
paddle
.
add
(
chunked_igates
[
1
],
chunked_hgates
[
1
])
input_gate
=
self
.
_gate_activation
(
input_gate
)
_temp
=
reset_gate
*
chunked_hgates
[
2
]
new_gate
=
paddle
.
add
(
chunked_igates
[
2
],
_temp
)
new_gate
=
self
.
_activation
(
new_gate
)
new_hidden
=
(
pre_hidden
-
new_gate
)
*
input_gate
+
new_gate
else
:
concat_input_hidden
=
concat
([
input
,
pre_hidden
],
1
)
gate_input
=
paddle
.
matmul
(
x
=
concat_input_hidden
,
y
=
self
.
_gate_weight
)
gate_input
=
paddle
.
add
(
gate_input
,
self
.
_gate_bias
)
gate_input
=
self
.
_gate_activation
(
gate_input
)
r
,
u
=
split
(
gate_input
,
num_or_sections
=
2
,
dim
=
1
)
r_hidden
=
r
*
pre_hidden
candidate
=
paddle
.
matmul
(
concat
([
input
,
r_hidden
],
1
),
self
.
_candidate_weight
)
candidate
=
paddle
.
add
(
candidate
,
self
.
_candidate_bias
)
c
=
self
.
_activation
(
candidate
)
new_hidden
=
u
*
pre_hidden
+
(
1
-
u
)
*
c
return
new_hidden
python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
已删除
100644 → 0
浏览文件 @
2bba3e18
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph
import
GRUCell
np
.
random
.
seed
=
123
def
sigmoid
(
x
):
return
1.0
/
(
1.0
+
np
.
exp
(
-
x
))
def
tanh
(
x
):
return
2.0
*
sigmoid
(
2.0
*
x
)
-
1.0
def
cudnn_step
(
step_input_np
,
pre_hidden_np
,
weight_ih
,
bias_ih
,
weight_hh
,
bias_hh
):
igates
=
np
.
matmul
(
step_input_np
,
weight_ih
.
transpose
(
1
,
0
))
igates
+=
bias_ih
hgates
=
np
.
matmul
(
pre_hidden_np
,
weight_hh
.
transpose
(
1
,
0
))
hgates
+=
bias_hh
chunked_igates
=
np
.
split
(
igates
,
indices_or_sections
=
3
,
axis
=
1
)
chunked_hgates
=
np
.
split
(
hgates
,
indices_or_sections
=
3
,
axis
=
1
)
reset_gate
=
chunked_igates
[
0
]
+
chunked_hgates
[
0
]
reset_gate
=
sigmoid
(
reset_gate
)
input_gate
=
chunked_igates
[
1
]
+
chunked_hgates
[
1
]
input_gate
=
sigmoid
(
input_gate
)
_temp
=
reset_gate
*
chunked_hgates
[
2
]
new_gate
=
chunked_igates
[
2
]
+
_temp
new_gate
=
tanh
(
new_gate
)
new_hidden
=
(
pre_hidden_np
-
new_gate
)
*
input_gate
+
new_gate
return
new_hidden
def
non_cudnn_step
(
step_in
,
pre_hidden
,
gate_w
,
gate_b
,
candidate_w
,
candidate_b
):
concat_1
=
np
.
concatenate
([
step_in
,
pre_hidden
],
1
)
gate_input
=
np
.
matmul
(
concat_1
,
gate_w
)
gate_input
+=
gate_b
gate_input
=
sigmoid
(
gate_input
)
r
,
u
=
np
.
split
(
gate_input
,
indices_or_sections
=
2
,
axis
=
1
)
r_hidden
=
r
*
pre_hidden
candidate
=
np
.
matmul
(
np
.
concatenate
([
step_in
,
r_hidden
],
1
),
candidate_w
)
candidate
+=
candidate_b
c
=
tanh
(
candidate
)
new_hidden
=
u
*
pre_hidden
+
(
1
-
u
)
*
c
return
new_hidden
class
TestCudnnGRU
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
input_size
=
100
self
.
hidden_size
=
200
self
.
batch_size
=
64
def
test_run
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
param_attr
=
fluid
.
ParamAttr
(
name
=
"param_attr"
)
bias_attr
=
fluid
.
ParamAttr
(
name
=
"bias_attr"
)
named_cudnn_gru
=
GRUCell
(
self
.
hidden_size
,
self
.
input_size
,
param_attr
,
bias_attr
)
cudnn_gru
=
GRUCell
(
self
.
hidden_size
,
self
.
input_size
)
param_list
=
cudnn_gru
.
state_dict
()
named_param_list
=
named_cudnn_gru
.
state_dict
()
# process weight and bias
weight_ih_name
=
"_weight_ih"
bias_ih_name
=
"_bias_ih"
weight_hh_name
=
"_weight_hh"
bias_hh_name
=
"_bias_hh"
weight_ih
=
param_list
[
weight_ih_name
].
numpy
()
weight_ih
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
weight_ih
.
shape
).
astype
(
'float64'
)
param_list
[
weight_ih_name
].
set_value
(
weight_ih
)
named_param_list
[
weight_ih_name
].
set_value
(
weight_ih
)
bias_ih
=
param_list
[
bias_ih_name
].
numpy
()
bias_ih
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
bias_ih
.
shape
).
astype
(
'float64'
)
param_list
[
bias_ih_name
].
set_value
(
bias_ih
)
named_param_list
[
bias_ih_name
].
set_value
(
bias_ih
)
weight_hh
=
param_list
[
weight_hh_name
].
numpy
()
weight_hh
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
weight_hh
.
shape
).
astype
(
'float64'
)
param_list
[
weight_hh_name
].
set_value
(
weight_hh
)
named_param_list
[
weight_hh_name
].
set_value
(
weight_hh
)
bias_hh
=
param_list
[
bias_hh_name
].
numpy
()
bias_hh
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
bias_hh
.
shape
).
astype
(
'float64'
)
param_list
[
bias_hh_name
].
set_value
(
bias_hh
)
named_param_list
[
bias_hh_name
].
set_value
(
bias_hh
)
step_input_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float64'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
step_input_var
=
fluid
.
dygraph
.
to_variable
(
step_input_np
)
pre_hidden_var
=
fluid
.
dygraph
.
to_variable
(
pre_hidden_np
)
api_out
=
cudnn_gru
(
step_input_var
,
pre_hidden_var
)
named_api_out
=
named_cudnn_gru
(
step_input_var
,
pre_hidden_var
)
np_out
=
cudnn_step
(
step_input_np
,
pre_hidden_np
,
weight_ih
,
bias_ih
,
weight_hh
,
bias_hh
)
np
.
testing
.
assert_allclose
(
api_out
.
numpy
(),
np_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_out
.
numpy
(),
np_out
,
rtol
=
1e-05
,
atol
=
0
)
class
TestNonCudnnGRU
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
input_size
=
100
self
.
hidden_size
=
200
self
.
batch_size
=
64
def
test_run
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
param_attr
=
fluid
.
ParamAttr
(
name
=
"param_attr"
)
bias_attr
=
fluid
.
ParamAttr
(
name
=
"bias_attr"
)
named_non_cudnn_gru
=
GRUCell
(
self
.
hidden_size
,
self
.
input_size
,
param_attr
,
bias_attr
,
use_cudnn_impl
=
False
,
)
non_cudnn_gru
=
GRUCell
(
self
.
hidden_size
,
self
.
input_size
,
use_cudnn_impl
=
False
)
param_list
=
non_cudnn_gru
.
state_dict
()
named_param_list
=
named_non_cudnn_gru
.
state_dict
()
# process weight and bias
gate_w_name
=
"_gate_weight"
gate_b_name
=
"_gate_bias"
candidate_w_name
=
"_candidate_weight"
candidate_b_name
=
"_candidate_bias"
gate_w
=
param_list
[
gate_w_name
].
numpy
()
gate_w
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
gate_w
.
shape
).
astype
(
'float64'
)
param_list
[
gate_w_name
].
set_value
(
gate_w
)
named_param_list
[
gate_w_name
].
set_value
(
gate_w
)
gate_b
=
param_list
[
gate_b_name
].
numpy
()
gate_b
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
gate_b
.
shape
).
astype
(
'float64'
)
param_list
[
gate_b_name
].
set_value
(
gate_b
)
named_param_list
[
gate_b_name
].
set_value
(
gate_b
)
candidate_w
=
param_list
[
candidate_w_name
].
numpy
()
candidate_w
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
candidate_w
.
shape
).
astype
(
'float64'
)
param_list
[
candidate_w_name
].
set_value
(
candidate_w
)
named_param_list
[
candidate_w_name
].
set_value
(
candidate_w
)
candidate_b
=
param_list
[
candidate_b_name
].
numpy
()
candidate_b
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
candidate_b
.
shape
).
astype
(
'float64'
)
param_list
[
candidate_b_name
].
set_value
(
candidate_b
)
named_param_list
[
candidate_b_name
].
set_value
(
candidate_b
)
step_input_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float64'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
step_input_var
=
fluid
.
dygraph
.
to_variable
(
step_input_np
)
pre_hidden_var
=
fluid
.
dygraph
.
to_variable
(
pre_hidden_np
)
api_out
=
non_cudnn_gru
(
step_input_var
,
pre_hidden_var
)
named_api_out
=
named_non_cudnn_gru
(
step_input_var
,
pre_hidden_var
)
np_out
=
non_cudnn_step
(
step_input_np
,
pre_hidden_np
,
gate_w
,
gate_b
,
candidate_w
,
candidate_b
,
)
np
.
testing
.
assert_allclose
(
api_out
.
numpy
(),
np_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_out
.
numpy
(),
np_out
,
rtol
=
1e-05
,
atol
=
0
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
已删除
100644 → 0
浏览文件 @
2bba3e18
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph
import
LSTMCell
np
.
random
.
seed
=
123
def
sigmoid
(
x
):
return
1.0
/
(
1.0
+
np
.
exp
(
-
x
))
def
tanh
(
x
):
return
2.0
*
sigmoid
(
2.0
*
x
)
-
1.0
def
non_cudnn_step
(
step_in
,
pre_hidden
,
pre_cell
,
gate_w
,
gate_b
,
forget_bias
=
1.0
):
concat_1
=
np
.
concatenate
([
step_in
,
pre_hidden
],
1
)
gate_input
=
np
.
matmul
(
concat_1
,
gate_w
)
gate_input
+=
gate_b
i
,
j
,
f
,
o
=
np
.
split
(
gate_input
,
indices_or_sections
=
4
,
axis
=
1
)
new_cell
=
pre_cell
*
sigmoid
(
f
+
forget_bias
)
+
sigmoid
(
i
)
*
tanh
(
j
)
new_hidden
=
tanh
(
new_cell
)
*
sigmoid
(
o
)
return
new_hidden
,
new_cell
def
cudnn_step
(
step_input_np
,
pre_hidden_np
,
pre_cell_np
,
weight_ih
,
bias_ih
,
weight_hh
,
bias_hh
,
):
igates
=
np
.
matmul
(
step_input_np
,
weight_ih
.
transpose
(
1
,
0
))
igates
=
igates
+
bias_ih
hgates
=
np
.
matmul
(
pre_hidden_np
,
weight_hh
.
transpose
(
1
,
0
))
hgates
=
hgates
+
bias_hh
chunked_igates
=
np
.
split
(
igates
,
indices_or_sections
=
4
,
axis
=
1
)
chunked_hgates
=
np
.
split
(
hgates
,
indices_or_sections
=
4
,
axis
=
1
)
ingate
=
chunked_igates
[
0
]
+
chunked_hgates
[
0
]
ingate
=
sigmoid
(
ingate
)
forgetgate
=
chunked_igates
[
1
]
+
chunked_hgates
[
1
]
forgetgate
=
sigmoid
(
forgetgate
)
cellgate
=
chunked_igates
[
2
]
+
chunked_hgates
[
2
]
cellgate
=
tanh
(
cellgate
)
outgate
=
chunked_igates
[
3
]
+
chunked_hgates
[
3
]
outgate
=
sigmoid
(
outgate
)
new_cell
=
(
forgetgate
*
pre_cell_np
)
+
(
ingate
*
cellgate
)
new_hidden
=
outgate
*
tanh
(
new_cell
)
return
new_hidden
,
new_cell
class
TestCudnnLSTM
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
input_size
=
100
self
.
hidden_size
=
200
self
.
batch_size
=
128
def
test_run
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
param_attr
=
fluid
.
ParamAttr
(
name
=
"param_attr"
)
bias_attr
=
fluid
.
ParamAttr
(
name
=
"bias_attr"
)
named_cudnn_lstm
=
LSTMCell
(
self
.
hidden_size
,
self
.
input_size
,
param_attr
,
bias_attr
)
cudnn_lstm
=
LSTMCell
(
self
.
hidden_size
,
self
.
input_size
)
param_list
=
cudnn_lstm
.
state_dict
()
named_param_list
=
named_cudnn_lstm
.
state_dict
()
# process weight and bias
weight_ih_name
=
"_weight_ih"
bias_ih_name
=
"_bias_ih"
weight_hh_name
=
"_weight_hh"
bias_hh_name
=
"_bias_hh"
weight_ih
=
param_list
[
weight_ih_name
].
numpy
()
weight_ih
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
weight_ih
.
shape
).
astype
(
'float64'
)
param_list
[
weight_ih_name
].
set_value
(
weight_ih
)
named_param_list
[
weight_ih_name
].
set_value
(
weight_ih
)
bias_ih
=
param_list
[
bias_ih_name
].
numpy
()
bias_ih
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
bias_ih
.
shape
).
astype
(
'float64'
)
param_list
[
bias_ih_name
].
set_value
(
bias_ih
)
named_param_list
[
bias_ih_name
].
set_value
(
bias_ih
)
weight_hh
=
param_list
[
weight_hh_name
].
numpy
()
weight_hh
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
weight_hh
.
shape
).
astype
(
'float64'
)
param_list
[
weight_hh_name
].
set_value
(
weight_hh
)
named_param_list
[
weight_hh_name
].
set_value
(
weight_hh
)
bias_hh
=
param_list
[
bias_hh_name
].
numpy
()
bias_hh
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
bias_hh
.
shape
).
astype
(
'float64'
)
param_list
[
bias_hh_name
].
set_value
(
bias_hh
)
named_param_list
[
bias_hh_name
].
set_value
(
bias_hh
)
step_input_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float64'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
pre_cell_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
step_input_var
=
fluid
.
dygraph
.
to_variable
(
step_input_np
)
pre_hidden_var
=
fluid
.
dygraph
.
to_variable
(
pre_hidden_np
)
pre_cell_var
=
fluid
.
dygraph
.
to_variable
(
pre_cell_np
)
api_out
=
cudnn_lstm
(
step_input_var
,
pre_hidden_var
,
pre_cell_var
)
named_api_out
=
named_cudnn_lstm
(
step_input_var
,
pre_hidden_var
,
pre_cell_var
)
api_hidden_out
=
api_out
[
0
]
api_cell_out
=
api_out
[
1
]
named_api_hidden_out
=
named_api_out
[
0
]
named_api_cell_out
=
named_api_out
[
1
]
np_hidden_out
,
np_cell_out
=
cudnn_step
(
step_input_np
,
pre_hidden_np
,
pre_cell_np
,
weight_ih
,
bias_ih
,
weight_hh
,
bias_hh
,
)
np
.
testing
.
assert_allclose
(
api_hidden_out
.
numpy
(),
np_hidden_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
api_cell_out
.
numpy
(),
np_cell_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_hidden_out
.
numpy
(),
np_hidden_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_cell_out
.
numpy
(),
np_cell_out
,
rtol
=
1e-05
,
atol
=
0
)
class
TestNonCudnnLSTM
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
input_size
=
100
self
.
hidden_size
=
200
self
.
batch_size
=
128
def
test_run
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
param_attr
=
fluid
.
ParamAttr
(
name
=
"param_attr"
)
bias_attr
=
fluid
.
ParamAttr
(
name
=
"bias_attr"
)
named_cudnn_lstm
=
LSTMCell
(
self
.
hidden_size
,
self
.
input_size
,
param_attr
,
bias_attr
,
use_cudnn_impl
=
False
,
)
cudnn_lstm
=
LSTMCell
(
self
.
hidden_size
,
self
.
input_size
,
use_cudnn_impl
=
False
)
param_list
=
cudnn_lstm
.
state_dict
()
named_param_list
=
named_cudnn_lstm
.
state_dict
()
# process weight and bias
gate_w_name
=
"_weight"
gate_b_name
=
"_bias"
gate_w
=
param_list
[
gate_w_name
].
numpy
()
gate_w
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
gate_w
.
shape
).
astype
(
'float64'
)
param_list
[
gate_w_name
].
set_value
(
gate_w
)
named_param_list
[
gate_w_name
].
set_value
(
gate_w
)
gate_b
=
param_list
[
gate_b_name
].
numpy
()
gate_b
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
gate_b
.
shape
).
astype
(
'float64'
)
param_list
[
gate_b_name
].
set_value
(
gate_b
)
named_param_list
[
gate_b_name
].
set_value
(
gate_b
)
step_input_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
input_size
)
).
astype
(
'float64'
)
pre_hidden_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
pre_cell_np
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
self
.
batch_size
,
self
.
hidden_size
)
).
astype
(
'float64'
)
step_input_var
=
fluid
.
dygraph
.
to_variable
(
step_input_np
)
pre_hidden_var
=
fluid
.
dygraph
.
to_variable
(
pre_hidden_np
)
pre_cell_var
=
fluid
.
dygraph
.
to_variable
(
pre_cell_np
)
api_out
=
cudnn_lstm
(
step_input_var
,
pre_hidden_var
,
pre_cell_var
)
named_api_out
=
named_cudnn_lstm
(
step_input_var
,
pre_hidden_var
,
pre_cell_var
)
api_hidden_out
=
api_out
[
0
]
api_cell_out
=
api_out
[
1
]
named_api_hidden_out
=
named_api_out
[
0
]
named_api_cell_out
=
named_api_out
[
1
]
np_hidden_out
,
np_cell_out
=
non_cudnn_step
(
step_input_np
,
pre_hidden_np
,
pre_cell_np
,
gate_w
,
gate_b
)
np
.
testing
.
assert_allclose
(
api_hidden_out
.
numpy
(),
np_hidden_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
api_cell_out
.
numpy
(),
np_cell_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_hidden_out
.
numpy
(),
np_hidden_out
,
rtol
=
1e-05
,
atol
=
0
)
np
.
testing
.
assert_allclose
(
named_api_cell_out
.
numpy
(),
np_cell_out
,
rtol
=
1e-05
,
atol
=
0
)
if
__name__
==
'__main__'
:
unittest
.
main
()
tools/parallel_UT_rule.py
浏览文件 @
93acef3b
...
...
@@ -1103,7 +1103,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_trt_reduce_sum_op'
,
'data_type_transform_test'
,
'test_gru_rnn_op'
,
'test_cudnn_grucell'
,
'test_argsort_op'
,
'test_batch_norm_op'
,
'test_inplace'
,
...
...
@@ -1190,7 +1189,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_nn_functional_embedding_dygraph'
,
'test_reshape_op'
,
'test_maxout_op'
,
'test_cudnn_lstmcell'
,
'test_sigmoid_focal_loss'
,
'test_manual_seed'
,
'test_lrn_op'
,
...
...
@@ -2727,7 +2725,6 @@ TWO_PARALLEL_JOB = [
'test_polygon_box_transform'
,
'test_sequence_pad_op'
,
'test_sequence_expand'
,
'test_cudnn_grucell'
,
'test_pool2d_bf16_mkldnn_op'
,
'test_bilinear_api'
,
'test_parallel_executor_inference_feed_partial_data'
,
...
...
@@ -2783,7 +2780,6 @@ TWO_PARALLEL_JOB = [
'test_cos_sim_op'
,
'test_sequence_enumerate_op'
,
'test_sequence_concat'
,
'test_cudnn_lstmcell'
,
'test_data_norm_op'
,
'test_decoupled_py_reader_data_check'
,
'test_deformable_conv_v1_op'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录