diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index 1132ef393d552c878c8db27752c1f3e3280500d0..397c305c23e040955a12ddfc9683dc1daad0ab32 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -43,9 +43,6 @@ from .io import * from . import static_runner from .static_runner import StaticModelRunner -from . import rnn -from .rnn import * - from . import amp from .amp import * @@ -60,5 +57,4 @@ __all__ += parallel.__all__ __all__ += checkpoint.__all__ __all__ += learning_rate_scheduler.__all__ __all__ += io.__all__ -__all__ += rnn.__all__ __all__ += amp.__all__ diff --git a/python/paddle/fluid/dygraph/rnn.py b/python/paddle/fluid/dygraph/rnn.py deleted file mode 100644 index d74e0b1bfee70e790fe195839028917fb5d015aa..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/dygraph/rnn.py +++ /dev/null @@ -1,509 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from . import Layer -from ..layers import ( - concat, - fill_constant, - elementwise_mul, - split, -) -import copy -import paddle - -__all__ = ['LSTMCell', 'GRUCell'] - - -class LSTMCell(Layer): - r""" - LSTMCell implementation using basic operators. - There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation. - The algorithm can be described as the equations below. - - .. math:: - - i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i) - - f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f) - - o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c) - - c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} - - h_t &= o_t \\odot tanh(c_t) - - The other LSTMCell version is compatible with the BasicLSTMUnit used in static graph. - The algorithm can be described as the equations below. - - .. math:: - - i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - - f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - - o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} - - h_t &= o_t \\odot tanh(c_t) - - Args: - hidden_size (integer): The hidden size used in the Cell. - input_size (integer): The input size used in the Cell. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, LSTMCell will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTMCell. - If it is set to None or one attribute of ParamAttr, LSTMCell will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized as zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cells (actNode). - Default: 'fluid.layers.tanh' - forget_bias(float|1.0): forget bias used when computing forget gate. This - is not used in default LSTMCell implementation (CUDNN compatiable) - use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell - dtype(string): data type used in this cell - - Returns: - None - - Examples: - - .. code-block:: python - - from paddle import fluid - import paddle.fluid.core as core - from paddle.fluid.dygraph import LSTMCell - import numpy as np - batch_size = 64 - input_size = 128 - hidden_size = 256 - step_input_np = np.random.uniform(-0.1, 0.1, ( - batch_size, input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - batch_size, hidden_size)).astype('float64') - pre_cell_np = np.random.uniform(-0.1, 0.1, ( - batch_size, hidden_size)).astype('float64') - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - with fluid.dygraph.guard(place): - cudnn_lstm = LSTMCell(hidden_size, input_size) - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - pre_cell_var = fluid.dygraph.to_variable(pre_cell_np) - new_hidden, new_cell = cudnn_lstm(step_input_var, pre_hidden_var, pre_cell_var) - - """ - - def __init__( - self, - hidden_size, - input_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - use_cudnn_impl=True, - dtype='float64', - ): - super().__init__(dtype) - - self._hidden_size = hidden_size - self._input_size = input_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._dtype = dtype - self._gate_activation = gate_activation or paddle.nn.functional.sigmoid - self._activation = activation or paddle.tanh - self._use_cudnn_impl = use_cudnn_impl - - if self._use_cudnn_impl: - - if ( - self._param_attr is not None - and self._param_attr.name is not None - ): - weight_ih_param_attr = copy.deepcopy(self._param_attr) - weight_hh_param_attr = copy.deepcopy(self._param_attr) - weight_ih_param_attr.name += "_weight_ih" - weight_hh_param_attr.name += "_weight_hh" - else: - weight_ih_param_attr = self._param_attr - weight_hh_param_attr = self._param_attr - - if self._bias_attr is not None and self._bias_attr.name is not None: - bias_ih_param_attr = copy.deepcopy(self._bias_attr) - bias_hh_param_attr = copy.deepcopy(self._bias_attr) - bias_ih_param_attr.name += "_bias_ih" - bias_hh_param_attr.name += "_bias_hh" - else: - bias_ih_param_attr = self._bias_attr - bias_hh_param_attr = self._bias_attr - - self._weight_ih = self.create_parameter( - attr=weight_ih_param_attr, - shape=[4 * self._hidden_size, self._input_size], - dtype=self._dtype, - ) - - self._weight_hh = self.create_parameter( - attr=weight_hh_param_attr, - shape=[4 * self._hidden_size, self._hidden_size], - dtype=self._dtype, - ) - - self._bias_ih = self.create_parameter( - attr=bias_ih_param_attr, - shape=[4 * self._hidden_size], - dtype=self._dtype, - is_bias=True, - ) - self._bias_hh = self.create_parameter( - attr=bias_hh_param_attr, - shape=[4 * self._hidden_size], - dtype=self._dtype, - is_bias=True, - ) - - else: - - self._forget_bias = fill_constant( - [1], dtype=dtype, value=forget_bias - ) - self._forget_bias.stop_gradient = False - - self._weight = self.create_parameter( - attr=self._param_attr, - shape=[ - self._input_size + self._hidden_size, - 4 * self._hidden_size, - ], - dtype=dtype, - ) - - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hidden_size], - dtype=dtype, - is_bias=True, - ) - - def forward(self, input, pre_hidden, pre_cell): - - if self._use_cudnn_impl: - igates = paddle.matmul(input, y=self._weight_ih, transpose_y=True) - igates = paddle.add(igates, self._bias_ih) - hgates = paddle.matmul( - pre_hidden, self._weight_hh, transpose_y=True - ) - hgates = paddle.add(hgates, self._bias_hh) - chunked_igates = split(igates, num_or_sections=4, dim=1) - chunked_hgates = split(hgates, num_or_sections=4, dim=1) - - ingate = paddle.add(chunked_igates[0], chunked_hgates[0]) - ingate = self._gate_activation(ingate) - - forgetgate = paddle.add(chunked_igates[1], chunked_hgates[1]) - forgetgate = self._gate_activation(forgetgate) - - cellgate = paddle.add(chunked_igates[2], chunked_hgates[2]) - cellgate = self._activation(cellgate) - - outgate = paddle.add(chunked_igates[3], chunked_hgates[3]) - outgate = self._gate_activation(outgate) - - new_cell = (forgetgate * pre_cell) + (ingate * cellgate) - new_hidden = outgate * self._activation(new_cell) - - else: - - concat_input_hidden = concat([input, pre_hidden], 1) - gate_input = paddle.matmul(x=concat_input_hidden, y=self._weight) - - gate_input = paddle.add(gate_input, self._bias) - i, j, f, o = split(gate_input, num_or_sections=4, dim=-1) - new_cell = paddle.add( - paddle.multiply( - pre_cell, - self._gate_activation(paddle.add(f, self._forget_bias)), - ), - paddle.multiply( - paddle.nn.functional.sigmoid(i), paddle.tanh(j) - ), - ) - new_hidden = self._activation(new_cell) * self._gate_activation(o) - - return new_hidden, new_cell - - -class GRUCell(Layer): - r""" - GRU implementation using basic operators. - There are two GRUCell version, the default one is compatible with CUDNN GRU implementation. - The algorithm can be described as the equations below. - - .. math:: - - u_t & = sigmoid(W_{ux} x_{t} + b_ux + W_{uh} h_{t-1} + b_uh) - - r_t & = sigmoid(W_{rx} x_{t} + b_rx + W_{rh} h_{t-1} + b_rh) - - \\tilde{h_{t}} & = tanh(W_{cx} x_{t} + b_cx + r_t \\odot (W_{ch} h_{t-1} + b_ch)) - - h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}} - - The other LSTMCell version is compatible with the BasicGRUUnit used in static graph. - The algorithm can be described as the equations below. - - .. math:: - - u_t & = sigmoid(W_{ux} x_{t} + W_{uh} h_{t-1} + b_u) - - r_t & = sigmoid(W_{rx} x_{t} + W_{rh} h_{t-1} + b_r) - - \\tilde{h_{t}} & = tanh(W_{cx} x_{t} + W_{ch} \\odot(r_t, h_{t-1}) + b_m) - - h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}} - - Args: - hidden_size (integer): The hidden size used in the Cell. - input_size (integer): The input size used in the Cell. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, GRUCell will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of GRUCell. - If it is set to None or one attribute of ParamAttr, GRUCell will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell - dtype(string): data type used in this cell - - Returns: - None - - Examples: - - .. code-block:: python - - from paddle import fluid - import paddle.fluid.core as core - from paddle.fluid.dygraph import GRUCell - import numpy as np - batch_size = 64 - input_size = 128 - hidden_size = 256 - step_input_np = np.random.uniform(-0.1, 0.1, ( - batch_size, input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - batch_size, hidden_size)).astype('float64') - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - with fluid.dygraph.guard(place): - cudnn_gru = GRUCell(hidden_size, input_size) - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - - """ - - def __init__( - self, - hidden_size, - input_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - use_cudnn_impl=True, - dtype='float64', - ): - super().__init__() - - self._hidden_size = hidden_size - self._input_size = input_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._dtype = dtype - self._gate_activation = gate_activation or paddle.nn.functional.sigmoid - self._activation = activation or paddle.tanh - self._use_cudnn_impl = use_cudnn_impl - - if self._use_cudnn_impl: - - if ( - self._param_attr is not None - and self._param_attr.name is not None - ): - weight_ih_param_attr = copy.deepcopy(self._param_attr) - weight_hh_param_attr = copy.deepcopy(self._param_attr) - weight_ih_param_attr.name += "_weight_ih" - weight_hh_param_attr.name += "_weight_hh" - else: - weight_ih_param_attr = self._param_attr - weight_hh_param_attr = self._param_attr - - if self._bias_attr is not None and self._bias_attr.name is not None: - bias_ih_param_attr = copy.deepcopy(self._bias_attr) - bias_hh_param_attr = copy.deepcopy(self._bias_attr) - bias_ih_param_attr.name += "_bias_ih" - bias_hh_param_attr.name += "_bias_hh" - else: - bias_ih_param_attr = self._bias_attr - bias_hh_param_attr = self._bias_attr - - self._weight_ih = self.create_parameter( - attr=weight_ih_param_attr, - shape=[3 * self._hidden_size, self._input_size], - dtype=self._dtype, - ) - - self._weight_hh = self.create_parameter( - attr=weight_hh_param_attr, - shape=[3 * self._hidden_size, self._hidden_size], - dtype=self._dtype, - ) - - self._bias_ih = self.create_parameter( - attr=bias_ih_param_attr, - shape=[3 * self._hidden_size], - dtype=self._dtype, - is_bias=True, - ) - self._bias_hh = self.create_parameter( - attr=bias_hh_param_attr, - shape=[3 * self._hidden_size], - dtype=self._dtype, - is_bias=True, - ) - - else: - - if ( - self._param_attr is not None - and self._param_attr.name is not None - ): - gate_weight_param_attr = copy.deepcopy(self._param_attr) - candidate_weight_param_attr = copy.deepcopy(self._param_attr) - gate_weight_param_attr.name += "_gate_weight" - candidate_weight_param_attr.name += "_candidate_weight" - else: - gate_weight_param_attr = self._param_attr - candidate_weight_param_attr = self._param_attr - - if self._bias_attr is not None and self._bias_attr.name is not None: - gate_bias_param_attr = copy.deepcopy(self._bias_attr) - candidate_bias_param_attr = copy.deepcopy(self._bias_attr) - gate_bias_param_attr.name += "_gate_bias" - candidate_bias_param_attr.name += "_candidate_bias" - else: - gate_bias_param_attr = self._bias_attr - candidate_bias_param_attr = self._bias_attr - - self._gate_weight = self.create_parameter( - attr=gate_weight_param_attr, - shape=[ - self._input_size + self._hidden_size, - 2 * self._hidden_size, - ], - dtype=dtype, - ) - - self._candidate_weight = self.create_parameter( - attr=candidate_weight_param_attr, - shape=[self._input_size + self._hidden_size, self._hidden_size], - dtype=dtype, - ) - - self._gate_bias = self.create_parameter( - attr=gate_bias_param_attr, - shape=[2 * self._hidden_size], - dtype=dtype, - is_bias=True, - ) - self._candidate_bias = self.create_parameter( - attr=candidate_bias_param_attr, - shape=[self._hidden_size], - dtype=dtype, - is_bias=True, - ) - - def forward(self, input, pre_hidden): - - if self._use_cudnn_impl: - igates = paddle.matmul(input, y=self._weight_ih, transpose_y=True) - igates = paddle.add(igates, self._bias_ih) - hgates = paddle.matmul( - pre_hidden, self._weight_hh, transpose_y=True - ) - hgates = paddle.add(hgates, self._bias_hh) - - chunked_igates = split(igates, num_or_sections=3, dim=1) - chunked_hgates = split(hgates, num_or_sections=3, dim=1) - - reset_gate = paddle.add(chunked_igates[0], chunked_hgates[0]) - reset_gate = self._gate_activation(reset_gate) - - input_gate = paddle.add(chunked_igates[1], chunked_hgates[1]) - input_gate = self._gate_activation(input_gate) - - _temp = reset_gate * chunked_hgates[2] - new_gate = paddle.add(chunked_igates[2], _temp) - new_gate = self._activation(new_gate) - - new_hidden = (pre_hidden - new_gate) * input_gate + new_gate - - else: - - concat_input_hidden = concat([input, pre_hidden], 1) - - gate_input = paddle.matmul( - x=concat_input_hidden, y=self._gate_weight - ) - - gate_input = paddle.add(gate_input, self._gate_bias) - gate_input = self._gate_activation(gate_input) - r, u = split(gate_input, num_or_sections=2, dim=1) - - r_hidden = r * pre_hidden - - candidate = paddle.matmul( - concat([input, r_hidden], 1), self._candidate_weight - ) - candidate = paddle.add(candidate, self._candidate_bias) - - c = self._activation(candidate) - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py b/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py deleted file mode 100644 index 6a1ed39157d529bdb52a5d9f6f69107a2d859b7e..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -from paddle.fluid.dygraph import GRUCell - -np.random.seed = 123 - - -def sigmoid(x): - return 1.0 / (1.0 + np.exp(-x)) - - -def tanh(x): - return 2.0 * sigmoid(2.0 * x) - 1.0 - - -def cudnn_step( - step_input_np, pre_hidden_np, weight_ih, bias_ih, weight_hh, bias_hh -): - igates = np.matmul(step_input_np, weight_ih.transpose(1, 0)) - igates += bias_ih - hgates = np.matmul(pre_hidden_np, weight_hh.transpose(1, 0)) - hgates += bias_hh - - chunked_igates = np.split(igates, indices_or_sections=3, axis=1) - chunked_hgates = np.split(hgates, indices_or_sections=3, axis=1) - - reset_gate = chunked_igates[0] + chunked_hgates[0] - reset_gate = sigmoid(reset_gate) - - input_gate = chunked_igates[1] + chunked_hgates[1] - input_gate = sigmoid(input_gate) - - _temp = reset_gate * chunked_hgates[2] - new_gate = chunked_igates[2] + _temp - new_gate = tanh(new_gate) - - new_hidden = (pre_hidden_np - new_gate) * input_gate + new_gate - - return new_hidden - - -def non_cudnn_step( - step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b -): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - gate_input = sigmoid(gate_input) - r, u = np.split(gate_input, indices_or_sections=2, axis=1) - - r_hidden = r * pre_hidden - - candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w) - - candidate += candidate_b - c = tanh(candidate) - - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden - - -class TestCudnnGRU(unittest.TestCase): - def setUp(self): - self.input_size = 100 - self.hidden_size = 200 - self.batch_size = 64 - - def test_run(self): - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - with fluid.dygraph.guard(place): - param_attr = fluid.ParamAttr(name="param_attr") - bias_attr = fluid.ParamAttr(name="bias_attr") - named_cudnn_gru = GRUCell( - self.hidden_size, self.input_size, param_attr, bias_attr - ) - cudnn_gru = GRUCell(self.hidden_size, self.input_size) - - param_list = cudnn_gru.state_dict() - named_param_list = named_cudnn_gru.state_dict() - - # process weight and bias - - weight_ih_name = "_weight_ih" - bias_ih_name = "_bias_ih" - weight_hh_name = "_weight_hh" - bias_hh_name = "_bias_hh" - - weight_ih = param_list[weight_ih_name].numpy() - weight_ih = np.random.uniform( - -0.1, 0.1, size=weight_ih.shape - ).astype('float64') - param_list[weight_ih_name].set_value(weight_ih) - named_param_list[weight_ih_name].set_value(weight_ih) - - bias_ih = param_list[bias_ih_name].numpy() - bias_ih = np.random.uniform(-0.1, 0.1, size=bias_ih.shape).astype( - 'float64' - ) - param_list[bias_ih_name].set_value(bias_ih) - named_param_list[bias_ih_name].set_value(bias_ih) - - weight_hh = param_list[weight_hh_name].numpy() - weight_hh = np.random.uniform( - -0.1, 0.1, size=weight_hh.shape - ).astype('float64') - param_list[weight_hh_name].set_value(weight_hh) - named_param_list[weight_hh_name].set_value(weight_hh) - - bias_hh = param_list[bias_hh_name].numpy() - bias_hh = np.random.uniform(-0.1, 0.1, size=bias_hh.shape).astype( - 'float64' - ) - param_list[bias_hh_name].set_value(bias_hh) - named_param_list[bias_hh_name].set_value(bias_hh) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.input_size) - ).astype('float64') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - api_out = cudnn_gru(step_input_var, pre_hidden_var) - named_api_out = named_cudnn_gru(step_input_var, pre_hidden_var) - - np_out = cudnn_step( - step_input_np, pre_hidden_np, weight_ih, bias_ih, weight_hh, bias_hh - ) - - np.testing.assert_allclose(api_out.numpy(), np_out, rtol=1e-05, atol=0) - np.testing.assert_allclose( - named_api_out.numpy(), np_out, rtol=1e-05, atol=0 - ) - - -class TestNonCudnnGRU(unittest.TestCase): - def setUp(self): - self.input_size = 100 - self.hidden_size = 200 - self.batch_size = 64 - - def test_run(self): - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - with fluid.dygraph.guard(place): - param_attr = fluid.ParamAttr(name="param_attr") - bias_attr = fluid.ParamAttr(name="bias_attr") - named_non_cudnn_gru = GRUCell( - self.hidden_size, - self.input_size, - param_attr, - bias_attr, - use_cudnn_impl=False, - ) - non_cudnn_gru = GRUCell( - self.hidden_size, self.input_size, use_cudnn_impl=False - ) - - param_list = non_cudnn_gru.state_dict() - named_param_list = named_non_cudnn_gru.state_dict() - - # process weight and bias - - gate_w_name = "_gate_weight" - gate_b_name = "_gate_bias" - candidate_w_name = "_candidate_weight" - candidate_b_name = "_candidate_bias" - - gate_w = param_list[gate_w_name].numpy() - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float64' - ) - param_list[gate_w_name].set_value(gate_w) - named_param_list[gate_w_name].set_value(gate_w) - - gate_b = param_list[gate_b_name].numpy() - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float64' - ) - param_list[gate_b_name].set_value(gate_b) - named_param_list[gate_b_name].set_value(gate_b) - - candidate_w = param_list[candidate_w_name].numpy() - candidate_w = np.random.uniform( - -0.1, 0.1, size=candidate_w.shape - ).astype('float64') - param_list[candidate_w_name].set_value(candidate_w) - named_param_list[candidate_w_name].set_value(candidate_w) - - candidate_b = param_list[candidate_b_name].numpy() - candidate_b = np.random.uniform( - -0.1, 0.1, size=candidate_b.shape - ).astype('float64') - param_list[candidate_b_name].set_value(candidate_b) - named_param_list[candidate_b_name].set_value(candidate_b) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.input_size) - ).astype('float64') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - api_out = non_cudnn_gru(step_input_var, pre_hidden_var) - named_api_out = named_non_cudnn_gru(step_input_var, pre_hidden_var) - - np_out = non_cudnn_step( - step_input_np, - pre_hidden_np, - gate_w, - gate_b, - candidate_w, - candidate_b, - ) - - np.testing.assert_allclose(api_out.numpy(), np_out, rtol=1e-05, atol=0) - np.testing.assert_allclose( - named_api_out.numpy(), np_out, rtol=1e-05, atol=0 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py b/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py deleted file mode 100644 index 913dc958117342fdc9ba331f5396947d477306d1..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -from paddle.fluid.dygraph import LSTMCell - -np.random.seed = 123 - - -def sigmoid(x): - return 1.0 / (1.0 + np.exp(-x)) - - -def tanh(x): - return 2.0 * sigmoid(2.0 * x) - 1.0 - - -def non_cudnn_step( - step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0 -): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1) - - new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j) - new_hidden = tanh(new_cell) * sigmoid(o) - - return new_hidden, new_cell - - -def cudnn_step( - step_input_np, - pre_hidden_np, - pre_cell_np, - weight_ih, - bias_ih, - weight_hh, - bias_hh, -): - - igates = np.matmul(step_input_np, weight_ih.transpose(1, 0)) - igates = igates + bias_ih - hgates = np.matmul(pre_hidden_np, weight_hh.transpose(1, 0)) - hgates = hgates + bias_hh - - chunked_igates = np.split(igates, indices_or_sections=4, axis=1) - chunked_hgates = np.split(hgates, indices_or_sections=4, axis=1) - - ingate = chunked_igates[0] + chunked_hgates[0] - ingate = sigmoid(ingate) - - forgetgate = chunked_igates[1] + chunked_hgates[1] - forgetgate = sigmoid(forgetgate) - - cellgate = chunked_igates[2] + chunked_hgates[2] - cellgate = tanh(cellgate) - - outgate = chunked_igates[3] + chunked_hgates[3] - outgate = sigmoid(outgate) - - new_cell = (forgetgate * pre_cell_np) + (ingate * cellgate) - new_hidden = outgate * tanh(new_cell) - - return new_hidden, new_cell - - -class TestCudnnLSTM(unittest.TestCase): - def setUp(self): - self.input_size = 100 - self.hidden_size = 200 - self.batch_size = 128 - - def test_run(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - with fluid.dygraph.guard(place): - param_attr = fluid.ParamAttr(name="param_attr") - bias_attr = fluid.ParamAttr(name="bias_attr") - named_cudnn_lstm = LSTMCell( - self.hidden_size, self.input_size, param_attr, bias_attr - ) - cudnn_lstm = LSTMCell(self.hidden_size, self.input_size) - - param_list = cudnn_lstm.state_dict() - named_param_list = named_cudnn_lstm.state_dict() - - # process weight and bias - - weight_ih_name = "_weight_ih" - bias_ih_name = "_bias_ih" - weight_hh_name = "_weight_hh" - bias_hh_name = "_bias_hh" - weight_ih = param_list[weight_ih_name].numpy() - weight_ih = np.random.uniform( - -0.1, 0.1, size=weight_ih.shape - ).astype('float64') - param_list[weight_ih_name].set_value(weight_ih) - named_param_list[weight_ih_name].set_value(weight_ih) - - bias_ih = param_list[bias_ih_name].numpy() - bias_ih = np.random.uniform(-0.1, 0.1, size=bias_ih.shape).astype( - 'float64' - ) - param_list[bias_ih_name].set_value(bias_ih) - named_param_list[bias_ih_name].set_value(bias_ih) - - weight_hh = param_list[weight_hh_name].numpy() - weight_hh = np.random.uniform( - -0.1, 0.1, size=weight_hh.shape - ).astype('float64') - param_list[weight_hh_name].set_value(weight_hh) - named_param_list[weight_hh_name].set_value(weight_hh) - - bias_hh = param_list[bias_hh_name].numpy() - bias_hh = np.random.uniform(-0.1, 0.1, size=bias_hh.shape).astype( - 'float64' - ) - param_list[bias_hh_name].set_value(bias_hh) - named_param_list[bias_hh_name].set_value(bias_hh) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.input_size) - ).astype('float64') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - pre_cell_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - pre_cell_var = fluid.dygraph.to_variable(pre_cell_np) - api_out = cudnn_lstm(step_input_var, pre_hidden_var, pre_cell_var) - named_api_out = named_cudnn_lstm( - step_input_var, pre_hidden_var, pre_cell_var - ) - - api_hidden_out = api_out[0] - api_cell_out = api_out[1] - named_api_hidden_out = named_api_out[0] - named_api_cell_out = named_api_out[1] - - np_hidden_out, np_cell_out = cudnn_step( - step_input_np, - pre_hidden_np, - pre_cell_np, - weight_ih, - bias_ih, - weight_hh, - bias_hh, - ) - np.testing.assert_allclose( - api_hidden_out.numpy(), np_hidden_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - api_cell_out.numpy(), np_cell_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - named_api_hidden_out.numpy(), np_hidden_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - named_api_cell_out.numpy(), np_cell_out, rtol=1e-05, atol=0 - ) - - -class TestNonCudnnLSTM(unittest.TestCase): - def setUp(self): - self.input_size = 100 - self.hidden_size = 200 - self.batch_size = 128 - - def test_run(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - with fluid.dygraph.guard(place): - param_attr = fluid.ParamAttr(name="param_attr") - bias_attr = fluid.ParamAttr(name="bias_attr") - named_cudnn_lstm = LSTMCell( - self.hidden_size, - self.input_size, - param_attr, - bias_attr, - use_cudnn_impl=False, - ) - cudnn_lstm = LSTMCell( - self.hidden_size, self.input_size, use_cudnn_impl=False - ) - - param_list = cudnn_lstm.state_dict() - named_param_list = named_cudnn_lstm.state_dict() - - # process weight and bias - - gate_w_name = "_weight" - gate_b_name = "_bias" - - gate_w = param_list[gate_w_name].numpy() - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float64' - ) - param_list[gate_w_name].set_value(gate_w) - named_param_list[gate_w_name].set_value(gate_w) - - gate_b = param_list[gate_b_name].numpy() - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float64' - ) - param_list[gate_b_name].set_value(gate_b) - named_param_list[gate_b_name].set_value(gate_b) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.input_size) - ).astype('float64') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - pre_cell_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float64') - - step_input_var = fluid.dygraph.to_variable(step_input_np) - pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) - pre_cell_var = fluid.dygraph.to_variable(pre_cell_np) - api_out = cudnn_lstm(step_input_var, pre_hidden_var, pre_cell_var) - named_api_out = named_cudnn_lstm( - step_input_var, pre_hidden_var, pre_cell_var - ) - - api_hidden_out = api_out[0] - api_cell_out = api_out[1] - named_api_hidden_out = named_api_out[0] - named_api_cell_out = named_api_out[1] - - np_hidden_out, np_cell_out = non_cudnn_step( - step_input_np, pre_hidden_np, pre_cell_np, gate_w, gate_b - ) - - np.testing.assert_allclose( - api_hidden_out.numpy(), np_hidden_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - api_cell_out.numpy(), np_cell_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - named_api_hidden_out.numpy(), np_hidden_out, rtol=1e-05, atol=0 - ) - np.testing.assert_allclose( - named_api_cell_out.numpy(), np_cell_out, rtol=1e-05, atol=0 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index bd372cbabd552635c591081539726d6e07e3e4aa..4e552bebb75000584437a0e49dcf89cf0693a49c 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -1103,7 +1103,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_trt_reduce_sum_op', 'data_type_transform_test', 'test_gru_rnn_op', - 'test_cudnn_grucell', 'test_argsort_op', 'test_batch_norm_op', 'test_inplace', @@ -1190,7 +1189,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_nn_functional_embedding_dygraph', 'test_reshape_op', 'test_maxout_op', - 'test_cudnn_lstmcell', 'test_sigmoid_focal_loss', 'test_manual_seed', 'test_lrn_op', @@ -2727,7 +2725,6 @@ TWO_PARALLEL_JOB = [ 'test_polygon_box_transform', 'test_sequence_pad_op', 'test_sequence_expand', - 'test_cudnn_grucell', 'test_pool2d_bf16_mkldnn_op', 'test_bilinear_api', 'test_parallel_executor_inference_feed_partial_data', @@ -2783,7 +2780,6 @@ TWO_PARALLEL_JOB = [ 'test_cos_sim_op', 'test_sequence_enumerate_op', 'test_sequence_concat', - 'test_cudnn_lstmcell', 'test_data_norm_op', 'test_decoupled_py_reader_data_check', 'test_deformable_conv_v1_op',