diff --git a/python/paddle/fluid/contrib/layers/__init__.py b/python/paddle/fluid/contrib/layers/__init__.py index da18475f3e1a0776344a90cee231174e06cd2349..be5eaa17cd6833a897354ad1847bfb24e9b2335b 100644 --- a/python/paddle/fluid/contrib/layers/__init__.py +++ b/python/paddle/fluid/contrib/layers/__init__.py @@ -15,11 +15,9 @@ from . import nn from .nn import * -from .rnn_impl import * from . import metric_op from .metric_op import * __all__ = [] __all__ += nn.__all__ -__all__ += rnn_impl.__all__ __all__ += metric_op.__all__ diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py deleted file mode 100644 index 2c232320cb1bac2d3df8bbc413ac0bb6fa9ac033..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ /dev/null @@ -1,890 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy - -import paddle -from paddle.fluid import layers, unique_name -from paddle.fluid.dygraph import Layer -from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper -from paddle.fluid.layers.control_flow import StaticRNN -import paddle - -__all__ = ['BasicGRUUnit', 'basic_gru', 'BasicLSTMUnit', 'basic_lstm'] - - -class BasicGRUUnit(Layer): - """ - **** - BasicGRUUnit class, using basic operators to build GRU - The algorithm can be described as the equations below. - - .. math:: - u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) - - r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r) - - m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m) - - h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) - - Args: - name_scope(string) : The name scope used to identify parameters and biases - hidden_size (integer): The hidden size used in the Unit. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of GRU unit. - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - dtype(string): data type used in this unit - - Examples: - - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import BasicGRUUnit - - input_size = 128 - hidden_size = 256 - input = layers.data( name = "input", shape = [-1, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - - gru_unit = BasicGRUUnit( "gru_unit", hidden_size ) - - new_hidden = gru_unit( input, pre_hidden ) - - """ - - def __init__( - self, - name_scope, - hidden_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - dtype='float32', - ): - super().__init__(name_scope, dtype) - # reserve old school _full_name and _helper for static graph save load - self._full_name = unique_name.generate( - name_scope + "/" + self.__class__.__name__ - ) - self._helper = LayerObjectHelper(self._full_name) - - self._name = name_scope - self._hiden_size = hidden_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._gate_activation = gate_activation or paddle.nn.functional.sigmoid - self._activation = activation or paddle.tanh - self._dtype = dtype - - def _build_once(self, input, pre_hidden): - self._input_size = input.shape[-1] - assert self._input_size > 0 - - if self._param_attr is not None and self._param_attr.name is not None: - gate_param_attr = copy.deepcopy(self._param_attr) - candidate_param_attr = copy.deepcopy(self._param_attr) - gate_param_attr.name += "_gate" - candidate_param_attr.name += "_candidate" - else: - gate_param_attr = self._param_attr - candidate_param_attr = self._param_attr - - self._gate_weight = self.create_parameter( - attr=gate_param_attr, - shape=[self._input_size + self._hiden_size, 2 * self._hiden_size], - dtype=self._dtype, - ) - - self._candidate_weight = self.create_parameter( - attr=candidate_param_attr, - shape=[self._input_size + self._hiden_size, self._hiden_size], - dtype=self._dtype, - ) - - if self._bias_attr is not None and self._bias_attr.name is not None: - gate_bias_attr = copy.deepcopy(self._bias_attr) - candidate_bias_attr = copy.deepcopy(self._bias_attr) - gate_bias_attr.name += "_gate" - candidate_bias_attr.name += "_candidate" - else: - gate_bias_attr = self._bias_attr - candidate_bias_attr = self._bias_attr - - self._gate_bias = self.create_parameter( - attr=gate_bias_attr, - shape=[2 * self._hiden_size], - dtype=self._dtype, - is_bias=True, - ) - self._candidate_bias = self.create_parameter( - attr=candidate_bias_attr, - shape=[self._hiden_size], - dtype=self._dtype, - is_bias=True, - ) - - def forward(self, input, pre_hidden): - concat_input_hidden = layers.concat([input, pre_hidden], 1) - - gate_input = paddle.matmul(x=concat_input_hidden, y=self._gate_weight) - - gate_input = paddle.add(gate_input, self._gate_bias) - - gate_input = self._gate_activation(gate_input) - r, u = paddle.split(gate_input, num_or_sections=2, axis=1) - - r_hidden = r * pre_hidden - - candidate = paddle.matmul( - layers.concat([input, r_hidden], 1), self._candidate_weight - ) - candidate = paddle.add(candidate, self._candidate_bias) - - c = self._activation(candidate) - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden - - -def basic_gru( - input, - init_hidden, - hidden_size, - num_layers=1, - sequence_length=None, - dropout_prob=0.0, - bidirectional=False, - batch_first=True, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - dtype='float32', - name='basic_gru', -): - r""" - GRU implementation using basic operator, supports multiple layers and bidirectional gru. - - .. math:: - u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) - - r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r) - - m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m) - - h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) - - Args: - input (Variable): GRU input tensor, - if batch_first = False, shape should be ( seq_len x batch_size x input_size ) - if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) - init_hidden(Variable|None): The initial hidden state of the GRU - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - hidden_size (int): Hidden size of the GRU - num_layers (int): The total number of layers of the GRU - sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance, - This tensor will be convert to a mask to mask the padding ids - If it's None means NO padding ids - dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers, - NOT between time steps - bidirectional (bool|False): If it is bidirectional - batch_first (bool|True): The shape format of the input and output tensors. If true, - the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, - the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default - this function accepts input and emits output in batch-major form to be consistent - with most of data format, though a bit less efficient because of extra transposes. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of GRU unit. - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - dtype(string): data type used in this unit - name(string): name used to identify parameters and biases - - Returns: - rnn_out(Tensor),last_hidden(Tensor) - - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \ - if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2) - - last_hidden is the hidden state of the last step of GRU \ - shape is ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size), - can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size) - - Examples: - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import basic_gru - - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 2 - dropout = 0.5 - bidirectional = True - batch_first = False - - input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') - - - rnn_out, last_hidden = basic_gru( input, pre_hidden, hidden_size, num_layers = num_layers, \ - sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ - batch_first = batch_first) - - """ - - fw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_layers_" + str(i) - if param_attr is not None and param_attr.name is not None: - layer_param_attr = copy.deepcopy(param_attr) - layer_param_attr.name += "_fw_w_" + str(i) - else: - layer_param_attr = param_attr - if bias_attr is not None and bias_attr.name is not None: - layer_bias_attr = copy.deepcopy(bias_attr) - layer_bias_attr.name += "_fw_b_" + str(i) - else: - layer_bias_attr = bias_attr - fw_unit_list.append( - BasicGRUUnit( - new_name, - hidden_size, - layer_param_attr, - layer_bias_attr, - gate_activation, - activation, - dtype, - ) - ) - if bidirectional: - bw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_reverse_layers_" + str(i) - if param_attr is not None and param_attr.name is not None: - layer_param_attr = copy.deepcopy(param_attr) - layer_param_attr.name += "_bw_w_" + str(i) - else: - layer_param_attr = param_attr - if bias_attr is not None and bias_attr.name is not None: - layer_bias_attr = copy.deepcopy(bias_attr) - layer_bias_attr.name += "_bw_b_" + str(i) - else: - layer_bias_attr = bias_attr - - bw_unit_list.append( - BasicGRUUnit( - new_name, - hidden_size, - layer_param_attr, - layer_bias_attr, - gate_activation, - activation, - dtype, - ) - ) - - if batch_first: - input = paddle.transpose(input, [1, 0, 2]) - - mask = None - if sequence_length: - max_seq_len = paddle.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32' - ) - mask = paddle.transpose(mask, [1, 0]) - - direc_num = 1 - if bidirectional: - direc_num = 2 - if init_hidden: - init_hidden = paddle.reshape( - init_hidden, shape=[num_layers, direc_num, -1, hidden_size] - ) - - def get_single_direction_output( - rnn_input, unit_list, mask=None, direc_index=0 - ): - rnn = StaticRNN() - with rnn.step(): - step_input = rnn.step_input(rnn_input) - - if mask: - step_mask = rnn.step_input(mask) - - for i in range(num_layers): - if init_hidden: - pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) - else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, - shape=[-1, hidden_size], - ref_batch_dim_idx=1, - ) - - new_hidden = unit_list[i](step_input, pre_hidden) - - if mask: - new_hidden = paddle.tensor.math._multiply_with_axis( - new_hidden, step_mask, axis=0 - ) - paddle.tensor.math._multiply_with_axis( - pre_hidden, (step_mask - 1), axis=0 - ) - rnn.update_memory(pre_hidden, new_hidden) - - rnn.step_output(new_hidden) - - step_input = new_hidden - if dropout_prob is not None and dropout_prob > 0.0: - step_input = paddle.nn.functional.dropout( - step_input, - p=dropout_prob, - ) - - rnn.step_output(step_input) - - rnn_out = rnn() - - last_hidden_array = [] - rnn_output = rnn_out[-1] - for i in range(num_layers): - last_hidden = rnn_out[i] - last_hidden = last_hidden[-1] - last_hidden_array.append(last_hidden) - - last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = paddle.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size] - ) - - return rnn_output, last_hidden_output - # seq_len, batch_size, hidden_size - - fw_rnn_out, fw_last_hidden = get_single_direction_output( - input, fw_unit_list, mask, direc_index=0 - ) - - if bidirectional: - bw_input = paddle.reverse(input, axis=[0]) - bw_mask = None - if mask: - bw_mask = paddle.reverse(mask, axis=[0]) - bw_rnn_out, bw_last_hidden = get_single_direction_output( - bw_input, bw_unit_list, bw_mask, direc_index=1 - ) - - bw_rnn_out = paddle.reverse(bw_rnn_out, axis=[0]) - - rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) - last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) - - last_hidden = paddle.reshape( - last_hidden, shape=[num_layers * direc_num, -1, hidden_size] - ) - - if batch_first: - rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) - return rnn_out, last_hidden - else: - - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - - if batch_first: - rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden - - -def basic_lstm( - input, - init_hidden, - init_cell, - hidden_size, - num_layers=1, - sequence_length=None, - dropout_prob=0.0, - bidirectional=False, - batch_first=True, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - dtype='float32', - name='basic_lstm', -): - r""" - LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM. - - .. math:: - i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - - f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - - o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - - h_t &= o_t \odot tanh(c_t) - - Args: - input (Variable): lstm input tensor, - if batch_first = False, shape should be ( seq_len x batch_size x input_size ) - if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) - init_hidden(Variable|None): The initial hidden state of the LSTM - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - init_cell(Variable|None): The initial hidden state of the LSTM - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - hidden_size (int): Hidden size of the LSTM - num_layers (int): The total number of layers of the LSTM - sequence_length (Variabe|None): A tensor (shape [batch_size]) stores each real length of each instance, - This tensor will be convert to a mask to mask the padding ids - If it's None means NO padding ids - dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of each layers, - NOT between time steps - bidirectional (bool|False): If it is bidirectional - batch_first (bool|True): The shape format of the input and output tensors. If true, - the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, - the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default - this function accepts input and emits output in batch-major form to be consistent - with most of data format, though a bit less efficient because of extra transposes. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTM unit. - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - forget_bias (float|1.0) : Forget bias used to compute the forget gate - dtype(string): Data type used in this unit - name(string): Name used to identify parameters and biases - - Returns: - rnn_out(Tensor), last_hidden(Tensor), last_cell(Tensor) - - rnn_out is the result of LSTM hidden, shape is (seq_len x batch_size x hidden_size) \ - if is_bidirec set to True, it's shape will be ( seq_len x batch_sze x hidden_size*2) - - last_hidden is the hidden state of the last step of LSTM \ - with shape ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), - and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. - - last_cell is the hidden state of the last step of LSTM \ - with shape ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), - and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. - - Examples: - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import basic_lstm - - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 2 - dropout = 0.5 - bidirectional = True - batch_first = False - - input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') - - rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \ - hidden_size, num_layers = num_layers, \ - sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ - batch_first = batch_first) - - """ - fw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_layers_" + str(i) - if param_attr is not None and param_attr.name is not None: - layer_param_attr = copy.deepcopy(param_attr) - layer_param_attr.name += "_fw_w_" + str(i) - else: - layer_param_attr = param_attr - if bias_attr is not None and bias_attr.name is not None: - layer_bias_attr = copy.deepcopy(bias_attr) - layer_bias_attr.name += "_fw_b_" + str(i) - else: - layer_bias_attr = bias_attr - fw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=layer_param_attr, - bias_attr=layer_bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype, - ) - ) - if bidirectional: - bw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_reverse_layers_" + str(i) - if param_attr is not None and param_attr.name is not None: - layer_param_attr = copy.deepcopy(param_attr) - layer_param_attr.name += "_bw_w_" + str(i) - else: - layer_param_attr = param_attr - if bias_attr is not None and bias_attr.name is not None: - layer_bias_attr = copy.deepcopy(bias_attr) - layer_bias_attr.name += "_bw_b_" + str(i) - else: - layer_bias_attr = param_attr - bw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=layer_param_attr, - bias_attr=layer_bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype, - ) - ) - - if batch_first: - input = paddle.transpose(input, [1, 0, 2]) - - mask = None - if sequence_length: - max_seq_len = paddle.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32' - ) - - mask = paddle.transpose(mask, [1, 0]) - - direc_num = 1 - if bidirectional: - direc_num = 2 - # convert to [num_layers, 2, batch_size, hidden_size] - if init_hidden: - init_hidden = paddle.reshape( - init_hidden, shape=[num_layers, direc_num, -1, hidden_size] - ) - init_cell = paddle.reshape( - init_cell, shape=[num_layers, direc_num, -1, hidden_size] - ) - - # forward direction - def get_single_direction_output( - rnn_input, unit_list, mask=None, direc_index=0 - ): - rnn = StaticRNN() - with rnn.step(): - step_input = rnn.step_input(rnn_input) - - if mask: - step_mask = rnn.step_input(mask) - - for i in range(num_layers): - if init_hidden: - pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) - pre_cell = rnn.memory(init=init_cell[i, direc_index]) - else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size] - ) - pre_cell = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size] - ) - - new_hidden, new_cell = unit_list[i]( - step_input, pre_hidden, pre_cell - ) - - if mask: - new_hidden = paddle.tensor.math._multiply_with_axis( - new_hidden, step_mask, axis=0 - ) - paddle.tensor.math._multiply_with_axis( - pre_hidden, (step_mask - 1), axis=0 - ) - new_cell = paddle.tensor.math._multiply_with_axis( - new_cell, step_mask, axis=0 - ) - paddle.tensor.math._multiply_with_axis( - pre_cell, (step_mask - 1), axis=0 - ) - - rnn.update_memory(pre_hidden, new_hidden) - rnn.update_memory(pre_cell, new_cell) - - rnn.step_output(new_hidden) - rnn.step_output(new_cell) - - step_input = new_hidden - if dropout_prob is not None and dropout_prob > 0.0: - step_input = paddle.nn.functional.dropout( - step_input, - p=dropout_prob, - mode='upscale_in_train', - ) - - rnn.step_output(step_input) - - rnn_out = rnn() - - last_hidden_array = [] - last_cell_array = [] - rnn_output = rnn_out[-1] - for i in range(num_layers): - last_hidden = rnn_out[i * 2] - last_hidden = last_hidden[-1] - last_hidden_array.append(last_hidden) - last_cell = rnn_out[i * 2 + 1] - last_cell = last_cell[-1] - last_cell_array.append(last_cell) - - last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = paddle.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size] - ) - last_cell_output = layers.concat(last_cell_array, axis=0) - last_cell_output = paddle.reshape( - last_cell_output, shape=[num_layers, -1, hidden_size] - ) - - return rnn_output, last_hidden_output, last_cell_output - # seq_len, batch_size, hidden_size - - fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output( - input, fw_unit_list, mask, direc_index=0 - ) - - if bidirectional: - bw_input = paddle.reverse(input, axis=[0]) - bw_mask = None - if mask: - bw_mask = paddle.reverse(mask, axis=[0]) - bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output( - bw_input, bw_unit_list, bw_mask, direc_index=1 - ) - - bw_rnn_out = paddle.reverse(bw_rnn_out, axis=[0]) - - rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) - last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) - last_hidden = paddle.reshape( - last_hidden, shape=[num_layers * direc_num, -1, hidden_size] - ) - - last_cell = layers.concat([fw_last_cell, bw_last_cell], axis=1) - last_cell = paddle.reshape( - last_cell, shape=[num_layers * direc_num, -1, hidden_size] - ) - - if batch_first: - rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) - return rnn_out, last_hidden, last_cell - else: - - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - last_cell = fw_last_cell - - if batch_first: - rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden, last_cell - - -class BasicLSTMUnit(Layer): - r""" - **** - BasicLSTMUnit class, Using basic operator to build LSTM - The algorithm can be described as the code below. - - .. math:: - - i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - - f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - - o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - - h_t &= o_t \odot tanh(c_t) - - - $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix - of weights from the input gate to the input) - - The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector). - - sigmoid is the logistic sigmoid function. - - $i, f, o$ and $c$ are the input gate, forget gate, output gate, - and cell activation vectors, respectively, all of which have the same size as - the cell output activation vector $h$. - - The :math:`\odot` is the element-wise product of the vectors. - - :math:`tanh` is the activation functions. - - :math:`\\tilde{c_t}` is also called candidate hidden state, - which is computed based on the current input and the previous hidden state. - - Args: - name_scope(string) : The name scope used to identify parameter and bias name - hidden_size (integer): The hidden size used in the Unit. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTM unit. - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized as zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cells (actNode). - Default: 'fluid.layers.tanh' - forget_bias(float|1.0): forget bias used when computing forget gate - dtype(string): data type used in this unit - - Examples: - - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import BasicLSTMUnit - - input_size = 128 - hidden_size = 256 - input = layers.data( name = "input", shape = [-1, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32') - - lstm_unit = BasicLSTMUnit( "gru_unit", hidden_size) - - new_hidden, new_cell = lstm_unit( input, pre_hidden, pre_cell ) - - """ - - def __init__( - self, - name_scope, - hidden_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - dtype='float32', - ): - super().__init__(name_scope, dtype) - # reserve old school _full_name and _helper for static graph save load - self._full_name = unique_name.generate( - name_scope + "/" + self.__class__.__name__ - ) - self._helper = LayerObjectHelper(self._full_name) - - self._name = name_scope - self._hiden_size = hidden_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._gate_activation = gate_activation or paddle.nn.functional.sigmoid - self._activation = activation or paddle.tanh - self._forget_bias = layers.fill_constant( - [1], dtype=dtype, value=forget_bias - ) - self._forget_bias.stop_gradient = False - self._dtype = dtype - - def _build_once(self, input, pre_hidden, pre_cell): - self._input_size = input.shape[-1] - assert self._input_size > 0 - - self._weight = self.create_parameter( - attr=self._param_attr, - shape=[self._input_size + self._hiden_size, 4 * self._hiden_size], - dtype=self._dtype, - ) - - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hiden_size], - dtype=self._dtype, - is_bias=True, - ) - - def forward(self, input, pre_hidden, pre_cell): - concat_input_hidden = layers.concat([input, pre_hidden], 1) - gate_input = paddle.matmul(x=concat_input_hidden, y=self._weight) - - gate_input = paddle.add(gate_input, self._bias) - i, j, f, o = paddle.split(gate_input, num_or_sections=4, axis=-1) - new_cell = paddle.add( - paddle.multiply( - pre_cell, - paddle.nn.functional.sigmoid(paddle.add(f, self._forget_bias)), - ), - paddle.multiply(paddle.nn.functional.sigmoid(i), paddle.tanh(j)), - ) - new_hidden = paddle.tanh(new_cell) * paddle.nn.functional.sigmoid(o) - - return new_hidden, new_cell diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 3602d3759115148d0f5839418e615c1876de9634..e4e8954abc8ab7cc131ff84e6b899e6d77b9543b 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -444,10 +444,6 @@ list(REMOVE_ITEM TEST_OPS list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model) list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist) list(REMOVE_ITEM TEST_OPS test_install_check) -list(REMOVE_ITEM TEST_OPS test_basic_gru_api) -list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op) -list(REMOVE_ITEM TEST_OPS test_basic_lstm_api) -list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op) list(REMOVE_ITEM TEST_OPS test_fuse_all_reduce_pass) list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass) list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass) diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py deleted file mode 100644 index 17ffed2629e0e1afe3ffffbea19bc04fe5edaf19..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py +++ /dev/null @@ -1,388 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers -from paddle.fluid import framework -from paddle.fluid.contrib.layers import basic_gru -from paddle.fluid.executor import Executor - -np.set_seed(123) - -SIGMOID_THRESHOLD_MIN = -40.0 -SIGMOID_THRESHOLD_MAX = 13.0 -EXP_MAX_INPUT = 40.0 - - -def sigmoid(x): - y = np.copy(x) - y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN - y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX - return 1.0 / (1.0 + np.exp(-y)) - - -def tanh(x): - y = -2.0 * x - y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT - return (2.0 / (1.0 + np.exp(y))) - 1.0 - - -def gru_np( - input, - init_h, - hidden_size, - gate_weight, - gate_bias, - candidate_weight, - candidate_bias, - num_layers=1, - batch_first=False, - is_bidirect=False, - sequence_length=None, -): - def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - gate_input = sigmoid(gate_input) - r, u = np.split(gate_input, indices_or_sections=2, axis=1) - - r_hidden = r * pre_hidden - - candidate = np.matmul( - np.concatenate([step_in, r_hidden], 1), candidate_w - ) - - candidate += candidate_b - c = tanh(candidate) - - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden - - if batch_first: - input = np.tranpose(input, [1, 0, 2]) - - batch_size = input.shape[1] - mask = None - if sequence_length is not None: - max_seq_len = input.shape[0] - - mask = np.zeros([batch_size, max_seq_len]) - - for i, len in enumerate(sequence_length): - mask[i, :len] = 1.0 - - mask = np.transpose(mask, [1, 0]) - - direc_num = 1 - if is_bidirect: - direc_num = 2 - if init_h: - init_h = np.reshape( - init_h, shape=[num_layers, direc_num, -1, hidden_size] - ) - else: - init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size]) - - def get_single_direction_output(rnn_input, mask=None, direc_index=0): - seq_len = rnn_input.shape[0] - - output = [] - # init pre hidden - pre_hidden_array = [] - for i in range(num_layers): - pre_hidden_array.append(init_h[i, direc_index]) - - for i in range(seq_len): - step_input = rnn_input[i] - - if mask is not None: - step_mask = mask[i] - step_mask = np.reshape(step_mask, [-1, 1]) - - for i in range(num_layers): - new_hidden = step( - step_input, - pre_hidden_array[i], - gate_weight[direc_index * num_layers + i], - gate_bias[direc_index * num_layers + i], - candidate_weight[direc_index * num_layers + i], - candidate_bias[direc_index * num_layers + i], - ) - - if mask is not None: - new_hidden = ( - new_hidden * step_mask - + (1 - step_mask) * pre_hidden_array[i] - ) - - pre_hidden_array[i] = new_hidden - - step_input = new_hidden - output.append(step_input) - rnn_out = np.concatenate(output, 0) - rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size]) - - last_hidden_out = np.concatenate(pre_hidden_array, 0) - last_hidden_out = np.reshape( - last_hidden_out, [num_layers, -1, hidden_size] - ) - - return rnn_out, last_hidden_out - - fw_rnn_out, fw_last_hidden = get_single_direction_output( - input, mask, direc_index=0 - ) - - if is_bidirect: - bw_input = input[::-1] - bw_mask = None - if mask is not None: - bw_mask = mask[::-1] - - bw_rnn_out, bw_last_hidden = get_single_direction_output( - bw_input, bw_mask, direc_index=1 - ) - - bw_rnn_out = bw_rnn_out[::-1] - - rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2) - last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1) - last_hidden = np.reshape( - last_hidden, [num_layers * direc_num, -1, hidden_size] - ) - - if batch_first: - rnn_out = np.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden - else: - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - - if batch_first: - rnn_out = np.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden - - -class TestBasicGRUApi(unittest.TestCase): - def setUp(self): - self.hidden_size = 10 - self.batch_size = 5 - self.seq_len = 6 - self.num_layers = 2 - self.is_bidirect = True - self.batch_first = False - - def test_run(self): - x = layers.data( - name='x', - shape=[-1, self.batch_size, self.hidden_size], - dtype='float32', - ) - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='float32' - ) - - rnn_out, last_hidden = basic_gru( - x, - None, - self.hidden_size, - num_layers=self.num_layers, - batch_first=self.batch_first, - bidirectional=self.is_bidirect, - sequence_length=sequence_length, - ) - - last_hidden.persisbale = True - rnn_out.persisbale = True - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - exe = Executor(place) - exe.run(framework.default_startup_program()) - - param_list = fluid.default_main_program().block(0).all_parameters() - - # process weight and bias - gate_weight = [] - gate_bias = [] - candidate_weight = [] - candidate_bias = [] - - for i in range(self.num_layers): - gate_w_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_0" - gate_b_name = "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_0" - candidate_w_name = ( - "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.w_1" - ) - candidate_b_name = ( - "basic_gru_layers_" + str(i) + "/BasicGRUUnit_0.b_1" - ) - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - candidate_w = np.array( - fluid.global_scope().find_var(candidate_w_name).get_tensor() - ) - candidate_w = np.random.uniform( - -0.1, 0.1, size=candidate_w.shape - ).astype('float32') - fluid.global_scope().find_var(candidate_w_name).get_tensor().set( - candidate_w, place - ) - - candidate_b = np.array( - fluid.global_scope().find_var(candidate_b_name).get_tensor() - ) - candidate_b = np.random.uniform( - -0.1, 0.1, size=candidate_b.shape - ).astype('float32') - fluid.global_scope().find_var(candidate_b_name).get_tensor().set( - candidate_b, place - ) - - gate_weight.append(gate_w) - gate_bias.append(gate_b) - candidate_weight.append(candidate_w) - candidate_bias.append(candidate_b) - - if self.is_bidirect: - for i in range(self.num_layers): - gate_w_name = ( - "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_0" - ) - gate_b_name = ( - "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_0" - ) - candidate_w_name = ( - "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.w_1" - ) - candidate_b_name = ( - "basic_gru_reverse_layers_" + str(i) + "/BasicGRUUnit_0.b_1" - ) - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - candidate_w = np.array( - fluid.global_scope().find_var(candidate_w_name).get_tensor() - ) - candidate_w = np.random.uniform( - -0.1, 0.1, size=candidate_w.shape - ).astype('float32') - fluid.global_scope().find_var( - candidate_w_name - ).get_tensor().set(candidate_w, place) - - candidate_b = np.array( - fluid.global_scope().find_var(candidate_b_name).get_tensor() - ) - candidate_b = np.random.uniform( - -0.1, 0.1, size=candidate_b.shape - ).astype('float32') - fluid.global_scope().find_var( - candidate_b_name - ).get_tensor().set(candidate_b, place) - - gate_weight.append(gate_w) - gate_bias.append(gate_b) - candidate_weight.append(candidate_w) - candidate_bias.append(candidate_b) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size) - ).astype('float32') - sequence_length_np = np.random.randint( - self.seq_len // 2, self.seq_len, size=(self.batch_size) - ).astype('int64') - - out = exe.run( - feed={'x': step_input_np, 'sequence_length': sequence_length_np}, - fetch_list=[rnn_out, last_hidden], - ) - - api_rnn_out = out[0] - api_last_hidden = out[1] - - np_out = gru_np( - step_input_np, - None, - self.hidden_size, - gate_weight, - gate_bias, - candidate_weight, - candidate_bias, - num_layers=self.num_layers, - batch_first=self.batch_first, - is_bidirect=self.is_bidirect, - sequence_length=sequence_length_np, - ) - - np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0) - - np.testing.assert_allclose( - api_last_hidden, np_out[1], rtol=0.0001, atol=0 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py deleted file mode 100644 index 152c76bf6fa014d0b93bfe5e5c07be48a9d41385..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers -from paddle.fluid import framework -from paddle.fluid.contrib.layers import BasicGRUUnit -from paddle.fluid.executor import Executor - -np.set_seed(123) - -SIGMOID_THRESHOLD_MIN = -40.0 -SIGMOID_THRESHOLD_MAX = 13.0 -EXP_MAX_INPUT = 40.0 - - -def sigmoid(x): - y = np.copy(x) - y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN - y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX - return 1.0 / (1.0 + np.exp(-y)) - - -def tanh(x): - y = -2.0 * x - y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT - return (2.0 / (1.0 + np.exp(y))) - 1.0 - - -def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - gate_input = sigmoid(gate_input) - r, u = np.split(gate_input, indices_or_sections=2, axis=1) - - r_hidden = r * pre_hidden - - candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), candidate_w) - - candidate += candidate_b - c = tanh(candidate) - - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden - - -class TestBasicGRUUnit(unittest.TestCase): - def setUp(self): - self.hidden_size = 5 - self.batch_size = 5 - - def test_run(self): - x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32' - ) - gru_unit = BasicGRUUnit("gru_unit", self.hidden_size) - - new_hidden = gru_unit(x, pre_hidden) - - new_hidden.persisbale = True - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - exe = Executor(place) - exe.run(framework.default_startup_program()) - - param_list = fluid.default_main_program().block(0).all_parameters() - - # process weight and bias - - gate_w_name = "gru_unit/BasicGRUUnit_0.w_0" - gate_b_name = "gru_unit/BasicGRUUnit_0.b_0" - candidate_w_name = "gru_unit/BasicGRUUnit_0.w_1" - candidate_b_name = "gru_unit/BasicGRUUnit_0.b_1" - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - candidate_w = np.array( - fluid.global_scope().find_var(candidate_w_name).get_tensor() - ) - candidate_w = np.random.uniform( - -0.1, 0.1, size=candidate_w.shape - ).astype('float32') - fluid.global_scope().find_var(candidate_w_name).get_tensor().set( - candidate_w, place - ) - - candidate_b = np.array( - fluid.global_scope().find_var(candidate_b_name).get_tensor() - ) - candidate_b = np.random.uniform( - -0.1, 0.1, size=candidate_b.shape - ).astype('float32') - fluid.global_scope().find_var(candidate_b_name).get_tensor().set( - candidate_b, place - ) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float32') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float32') - - out = exe.run( - feed={'x': step_input_np, 'pre_hidden': pre_hidden_np}, - fetch_list=[new_hidden], - ) - - api_out = out[0] - - np_out = step( - step_input_np, - pre_hidden_np, - gate_w, - gate_b, - candidate_w, - candidate_b, - ) - - np.testing.assert_allclose(api_out, np_out, rtol=0.0001, atol=0) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py deleted file mode 100644 index ba7132cfea99d0a3c6053c12a1d7c973220d7e6f..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers -from paddle.fluid import framework -from paddle.fluid.contrib.layers import basic_lstm -from paddle.fluid.executor import Executor - -np.set_seed(123) - -SIGMOID_THRESHOLD_MIN = -40.0 -SIGMOID_THRESHOLD_MAX = 13.0 -EXP_MAX_INPUT = 40.0 - - -def sigmoid(x): - y = np.copy(x) - y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN - y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX - return 1.0 / (1.0 + np.exp(-y)) - - -def tanh(x): - y = -2.0 * x - y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT - return (2.0 / (1.0 + np.exp(y))) - 1.0 - - -def lstm_np( - input, - init_h, - init_c, - hidden_size, - gate_weight, - gate_bias, - num_layers=1, - batch_first=False, - is_bidirect=False, - sequence_length=None, - forget_bias=1.0, -): - def step(step_in, pre_hidden, pre_cell, gate_w, gate_b): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1) - - new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j) - new_hidden = tanh(new_cell) * sigmoid(o) - - return new_hidden, new_cell - - mask = None - - if batch_first: - input = np.tranpose(input, [1, 0, 2]) - if mask is not None: - mask = np.transpose(mask, [1, 0]) - - batch_size = input.shape[1] - if sequence_length is not None: - max_seq_len = input.shape[0] - - mask = np.zeros([batch_size, max_seq_len]) - - for i, len in enumerate(sequence_length): - mask[i, :len] = 1.0 - - mask = np.transpose(mask, [1, 0]) - - direc_num = 1 - if is_bidirect: - direc_num = 2 - if init_h: - init_h = np.reshape(init_h, [num_layers, direc_num, -1, hidden_size]) - init_c = np.reshape(init_c, [num_layers, direc_num, -1, hidden_size]) - else: - init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size]) - init_c = np.zeros([num_layers, direc_num, batch_size, hidden_size]) - - def get_single_direction_output(rnn_input, mask=None, direc_index=0): - seq_len = rnn_input.shape[0] - - output = [] - # init pre hidden - pre_hidden_array = [] - pre_cell_array = [] - for i in range(num_layers): - pre_hidden_array.append(init_h[i, direc_index]) - pre_cell_array.append(init_c[i, direc_index]) - - for i in range(seq_len): - step_input = rnn_input[i] - - if mask is not None: - step_mask = mask[i] - step_mask = np.reshape(step_mask, [-1, 1]) - # print("np mask", step_mask.shape ) - - for i in range(num_layers): - new_hidden, new_cell = step( - step_input, - pre_hidden_array[i], - pre_cell_array[i], - gate_weight[direc_index * num_layers + i], - gate_bias[direc_index * num_layers + i], - ) - - if mask is not None: - - new_hidden = np.multiply( - new_hidden, step_mask - ) - np.multiply(pre_hidden_array[i], (step_mask - 1.0)) - # new_hidden = new_hidden * step_mask - pre_hidden_array[i] * ( step_mask -1 ) - # new_cell = new_cell * step_mask - pre_cell_array[i] * (step_mask -1) - new_cell = np.multiply(new_cell, step_mask) - np.multiply( - pre_cell_array[i], (step_mask - 1.0) - ) - - pre_hidden_array[i] = new_hidden - pre_cell_array[i] = new_cell - - step_input = new_hidden - output.append(step_input) - rnn_out = np.concatenate(output, 0) - rnn_out = np.reshape(rnn_out, [seq_len, -1, hidden_size]) - - last_hidden_out = np.concatenate(pre_hidden_array, 0) - last_hidden_out = np.reshape( - last_hidden_out, [num_layers, -1, hidden_size] - ) - - last_cell_out = np.concatenate(pre_cell_array, 0) - last_cell_out = np.reshape(last_cell_out, [num_layers, -1, hidden_size]) - - return rnn_out, last_hidden_out, last_cell_out - - fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output( - input, mask, direc_index=0 - ) - - if is_bidirect: - bw_input = input[::-1] - bw_mask = None - if mask is not None: - bw_mask = mask[::-1] - - bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output( - bw_input, bw_mask, direc_index=1 - ) - - bw_rnn_out = bw_rnn_out[::-1] - - rnn_out = np.concatenate([fw_rnn_out, bw_rnn_out], 2) - last_hidden = np.concatenate([fw_last_hidden, bw_last_hidden], 1) - last_hidden = np.reshape( - last_hidden, [num_layers * direc_num, -1, hidden_size] - ) - - last_cell = np.concatenate([fw_last_cell, bw_last_cell], 1) - last_cell = np.reshape( - last_cell, [num_layers * direc_num, -1, hidden_size] - ) - - if batch_first: - rnn_out = np.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden, last_cell - else: - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - last_cell = fw_last_cell - - if batch_first: - rnn_out = np.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden, last_cell - - -class TestBasicLSTMApi(unittest.TestCase): - def setUp(self): - self.hidden_size = 10 - self.batch_size = 5 - self.seq_len = 6 - self.num_layers = 2 - self.is_bidirect = True - self.batch_first = False - self.forget_bias = 1.0 - - def test_run(self): - x = layers.data( - name='x', - shape=[-1, self.batch_size, self.hidden_size], - dtype='float32', - ) - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='float32' - ) - - rnn_out, last_hidden, last_cell = basic_lstm( - x, - None, - None, - self.hidden_size, - num_layers=self.num_layers, - batch_first=self.batch_first, - bidirectional=self.is_bidirect, - sequence_length=sequence_length, - forget_bias=self.forget_bias, - ) - - last_hidden.persisbale = True - rnn_out.persisbale = True - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = Executor(place) - exe.run(framework.default_startup_program()) - - param_list = fluid.default_main_program().block(0).all_parameters() - - # process weight and bias - gate_weight = [] - gate_bias = [] - - for i in range(self.num_layers): - gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0" - gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0" - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - gate_weight.append(gate_w) - gate_bias.append(gate_b) - - if self.is_bidirect: - for i in range(self.num_layers): - gate_w_name = ( - "basic_lstm_reverse_layers_" - + str(i) - + "/BasicLSTMUnit_0.w_0" - ) - gate_b_name = ( - "basic_lstm_reverse_layers_" - + str(i) - + "/BasicLSTMUnit_0.b_0" - ) - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - gate_weight.append(gate_w) - gate_bias.append(gate_b) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.seq_len, self.batch_size, self.hidden_size) - ).astype('float32') - sequence_length_np = np.random.randint( - self.seq_len // 2, self.seq_len, size=(self.batch_size) - ).astype('int64') - - out = exe.run( - feed={'x': step_input_np, 'sequence_length': sequence_length_np}, - fetch_list=[rnn_out, last_hidden, last_cell], - ) - - api_rnn_out = out[0] - api_last_hidden = out[1] - api_last_cell = out[2] - - np_out = lstm_np( - step_input_np, - None, - None, - self.hidden_size, - gate_weight, - gate_bias, - num_layers=self.num_layers, - batch_first=self.batch_first, - is_bidirect=self.is_bidirect, - sequence_length=sequence_length_np, - ) - - np.testing.assert_allclose(api_rnn_out, np_out[0], rtol=0.0001, atol=0) - np.testing.assert_allclose( - api_last_hidden, np_out[1], rtol=0.0001, atol=0 - ) - np.testing.assert_allclose( - api_last_cell, np_out[2], rtol=0.0001, atol=0 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py deleted file mode 100644 index 0b17e611b6806920c354f7960f199d4f78b5eb44..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers -from paddle.fluid import framework -from paddle.fluid.contrib.layers import BasicLSTMUnit -from paddle.fluid.executor import Executor - -np.set_seed(123) - -SIGMOID_THRESHOLD_MIN = -40.0 -SIGMOID_THRESHOLD_MAX = 13.0 -EXP_MAX_INPUT = 40.0 - - -def sigmoid(x): - y = np.copy(x) - y[x < SIGMOID_THRESHOLD_MIN] = SIGMOID_THRESHOLD_MIN - y[x > SIGMOID_THRESHOLD_MAX] = SIGMOID_THRESHOLD_MAX - return 1.0 / (1.0 + np.exp(-y)) - - -def tanh(x): - y = -2.0 * x - y[y > EXP_MAX_INPUT] = EXP_MAX_INPUT - return (2.0 / (1.0 + np.exp(y))) - 1.0 - - -def step(step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0): - concat_1 = np.concatenate([step_in, pre_hidden], 1) - - gate_input = np.matmul(concat_1, gate_w) - gate_input += gate_b - i, j, f, o = np.split(gate_input, indices_or_sections=4, axis=1) - - new_cell = pre_cell * sigmoid(f + forget_bias) + sigmoid(i) * tanh(j) - new_hidden = tanh(new_cell) * sigmoid(o) - - return new_hidden, new_cell - - -class TestBasicGRUUnit(unittest.TestCase): - def setUp(self): - self.hidden_size = 5 - self.batch_size = 5 - - def test_run(self): - x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32' - ) - pre_cell = layers.data( - name="pre_cell", shape=[-1, self.hidden_size], dtype='float32' - ) - - lstm_unit = BasicLSTMUnit("lstm_unit", self.hidden_size) - - new_hidden, new_cell = lstm_unit(x, pre_hidden, pre_cell) - - new_hidden.persisbale = True - new_cell.persisbale = True - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - - exe = Executor(place) - exe.run(framework.default_startup_program()) - - param_list = fluid.default_main_program().block(0).all_parameters() - - # process weight and bias - - gate_w_name = "lstm_unit/BasicLSTMUnit_0.w_0" - gate_b_name = "lstm_unit/BasicLSTMUnit_0.b_0" - - gate_w = np.array( - fluid.global_scope().find_var(gate_w_name).get_tensor() - ) - gate_w = np.random.uniform(-0.1, 0.1, size=gate_w.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_w_name).get_tensor().set( - gate_w, place - ) - - gate_b = np.array( - fluid.global_scope().find_var(gate_b_name).get_tensor() - ) - gate_b = np.random.uniform(-0.1, 0.1, size=gate_b.shape).astype( - 'float32' - ) - fluid.global_scope().find_var(gate_b_name).get_tensor().set( - gate_b, place - ) - - step_input_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float32') - pre_hidden_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float32') - pre_cell_np = np.random.uniform( - -0.1, 0.1, (self.batch_size, self.hidden_size) - ).astype('float32') - - out = exe.run( - feed={ - 'x': step_input_np, - 'pre_hidden': pre_hidden_np, - 'pre_cell': pre_cell_np, - }, - fetch_list=[new_hidden, new_cell], - ) - - api_hidden_out = out[0] - api_cell_out = out[1] - - np_hidden_out, np_cell_out = step( - step_input_np, pre_hidden_np, pre_cell_np, gate_w, gate_b - ) - - np.testing.assert_allclose( - api_hidden_out, np_hidden_out, rtol=0.0001, atol=0 - ) - np.testing.assert_allclose( - api_cell_out, np_cell_out, rtol=0.0001, atol=0 - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py deleted file mode 100644 index c01a266bdbb45b2e98c6ce91d004915773ed07cc..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from test_imperative_base import new_program_scope - -import paddle.fluid as fluid -import paddle.fluid.layers as layers -from paddle.fluid.contrib.layers import basic_gru, basic_lstm - - -class TestBasicGRUApiName(unittest.TestCase): - def setUp(self): - self.name_set = set( - [ - "test1_fw_w_0_gate", - "test1_fw_w_0_candidate", - "test1_fw_b_0_gate", - "test1_fw_b_0_candidate", - "test1_bw_w_0_gate", - "test1_bw_w_0_candidate", - "test1_bw_b_0_gate", - "test1_bw_b_0_candidate", - ] - ) - - def test_name(self): - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 1 - dropout = 0.5 - bidirectional = True - batch_first = False - - with new_program_scope(): - input = layers.data( - name="input", - shape=[-1, batch_size, input_size], - dtype='float32', - ) - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, hidden_size], dtype='float32' - ) - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='int32' - ) - - rnn_out, last_hidden = basic_gru( - input, - pre_hidden, - hidden_size, - num_layers=num_layers, - sequence_length=sequence_length, - dropout_prob=dropout, - bidirectional=bidirectional, - batch_first=batch_first, - param_attr=fluid.ParamAttr(name="test1"), - bias_attr=fluid.ParamAttr(name="test1"), - name="basic_gru", - ) - - var_list = fluid.io.get_program_parameter( - fluid.default_main_program() - ) - - for var in var_list: - self.assertTrue(var.name in self.name_set) - - -class TestBasicLSTMApiName(unittest.TestCase): - def setUp(self): - self.name_set = set( - [ - "test1_fw_w_0", - "test1_fw_b_0", - "test1_fw_w_1", - "test1_fw_b_1", - "test1_bw_w_0", - "test1_bw_b_0", - "test1_bw_w_1", - "test1_bw_b_1", - ] - ) - - def test_name(self): - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 2 - dropout = 0.5 - bidirectional = True - batch_first = False - - with new_program_scope(): - input = layers.data( - name="input", - shape=[-1, batch_size, input_size], - dtype='float32', - ) - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, hidden_size], dtype='float32' - ) - pre_cell = layers.data( - name="pre_cell", shape=[-1, hidden_size], dtype='float32' - ) - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='int32' - ) - - rnn_out, last_hidden, last_cell = basic_lstm( - input, - pre_hidden, - pre_cell, - hidden_size, - num_layers=num_layers, - sequence_length=sequence_length, - dropout_prob=dropout, - bidirectional=bidirectional, - param_attr=fluid.ParamAttr(name="test1"), - bias_attr=fluid.ParamAttr(name="test1"), - batch_first=batch_first, - ) - - var_list = fluid.io.get_program_parameter( - fluid.default_main_program() - ) - - for var in var_list: - self.assertTrue(var.name in self.name_set) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index 18588f5c53111ca42375dd1b5718afbec96e5c5a..ea1401d3fbe97ff9f8db145a2ea8507531dccd73 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -20,8 +20,6 @@ import numpy as np import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers -from paddle.fluid import ParamAttr -from paddle.fluid.contrib.layers import basic_lstm from paddle.fluid.executor import Executor from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN @@ -85,7 +83,7 @@ class RNNConfig: else: raise ValueError('Unsupported model_type.') - if rnn_model not in ('static', 'padding', 'cudnn', 'basic_lstm'): + if rnn_model not in ('static', 'padding', 'cudnn'): raise ValueError('Unsupported rnn_model.') self.batch_size = 12 @@ -406,23 +404,6 @@ def lm_model( init_hidden=init_hidden_reshape, init_cell=init_cell_reshape, ) - elif rnn_model == "basic_lstm": - rnn_out, last_hidden, last_cell = basic_lstm( - x_emb, - init_hidden, - init_cell, - hidden_size, - num_layers=num_layers, - batch_first=True, - dropout_prob=dropout, - param_attr=ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale - ) - ), - bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)), - forget_bias=0.0, - ) else: print("type not support") return diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 4a2ef462432e6aefd050c412c0a6f764df4ff34b..350770b4faeca7a49235d4ba7aad4e0ee92e1f33 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2329,33 +2329,6 @@ class TestBook(LayerTest): ) return output - def test_basic_gru(self): - input_size = 128 - hidden_size = 256 - with self.static_graph(): - input = fluid.data( - name="input", shape=[None, None, input_size], dtype='float32' - ) - pre_hidden = fluid.data( - name="pre_hidden", shape=[None, hidden_size], dtype='float32' - ) - sequence_length = fluid.data( - name="sequence_length", shape=[None], dtype='int32' - ) - - for bidirectional in [True, False]: - for batch_first in [True, False]: - rnn_out, last_hidden = fluid.contrib.layers.basic_gru( - input, - pre_hidden, - hidden_size=256, - num_layers=2, - sequence_length=sequence_length, - dropout_prob=0.5, - bidirectional=bidirectional, - batch_first=batch_first, - ) - class ExampleNet(paddle.nn.Layer): def __init__(self): diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 47f83b67fa8f0477814b2d90db1f61e5c7d773ac..062d923dfc85bcf3085e03bf601912fe49b7e604 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -274,7 +274,6 @@ HIGH_PARALLEL_JOB_NEW = [ 'test_mkldnn_op_nhwc', 'test_fc_act_mkldnn_fuse_pass', 'test_fleet_base_3', - 'test_basic_rnn_name', 'test_query_op', 'test_fleet_base_4', 'save_load_op_test', @@ -1980,7 +1979,6 @@ CPU_PARALLEL_JOB = [ 'test_beam_search_op', 'test_batch_sampler', 'test_batch_norm_act_fuse_pass', - 'test_basic_rnn_name', 'test_attention_lstm_op', 'test_analyzer', 'test_aligned_allocator', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 7a4f3109d3f45bcafafbbf19da226c6ef2fa3ee6..9771bf6f195ad6d29d2ba9df9a34ce7b781ef971 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -71,7 +71,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_auc_single_pred_op', 'test_avoid_twice_initialization', 'test_backward', - 'test_basic_rnn_name', 'test_batch_norm_op', 'test_batch_norm_op_v2', 'test_bce_loss',