From 4672ea8e9b02d1df2f9d9b082c55ff11bc2fa217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AA=91=E9=A9=AC=E5=B0=8F=E7=8C=AB?= <1435130236@qq.com> Date: Thu, 15 Dec 2022 16:35:12 +0800 Subject: [PATCH] [FluidAPI] remove fluid rnn apis (#49050) * remove lstm api * remove gru_unit api * remove lstm in all * remove beam-search * remove beam_search slot * remove lstm test code * remove fluid.layers.nn api * update gru-unit * revert gru_unit white list --- python/paddle/fluid/layers/rnn.py | 942 ------------------ python/paddle/fluid/tests/book/CMakeLists.txt | 1 - .../tests/book/notest_understand_sentiment.py | 75 -- .../tests/book/test_label_semantic_roles.py | 408 -------- .../fluid/tests/unittests/CMakeLists.txt | 10 - .../ir/inference/test_fc_gru_fuse_pass.py | 93 -- .../ir/inference/test_fc_lstm_fuse_pass.py | 56 -- .../unittests/test_eager_deletion_gru_net.py | 59 -- .../unittests/test_eager_deletion_lstm_net.py | 61 -- .../test_eager_deletion_padding_rnn.py | 15 - .../fluid/tests/unittests/test_gru_op.py | 22 - .../fluid/tests/unittests/test_gru_unit_op.py | 51 - .../unittests/test_ir_memory_optimize_nlp.py | 62 -- .../fluid/tests/unittests/test_layers.py | 14 - .../tests/unittests/test_lstm_cudnn_op.py | 87 -- .../fluid/tests/unittests/test_lstm_op.py | 170 ---- .../fluid/tests/unittests/test_lstmp_op.py | 62 -- .../unittests/test_program_prune_backward.py | 55 - tools/parallel_UT_rule.py | 8 - tools/static_mode_white_list.py | 5 - 20 files changed, 2256 deletions(-) delete mode 100644 python/paddle/fluid/tests/book/test_label_semantic_roles.py delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py delete mode 100644 python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py delete mode 100644 python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py delete mode 100644 python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 8073cfe28f..90cc5a6853 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -37,11 +37,6 @@ from collections.abc import Sequence __all__ = [ 'dynamic_decode', - 'dynamic_lstm', - 'dynamic_lstmp', - 'dynamic_gru', - 'gru_unit', - 'lstm', ] @@ -476,940 +471,3 @@ def dynamic_decode( return_length, **kwargs ) - - -def dynamic_lstm( - input, - size, - h_0=None, - c_0=None, - param_attr=None, - bias_attr=None, - use_peepholes=True, - is_reverse=False, - gate_activation='sigmoid', - cell_activation='tanh', - candidate_activation='tanh', - dtype='float32', - name=None, -): - r""" - - **Note**: - 1. This OP only supports LoDTensor as inputs. If you need to deal with Tensor, please use :ref:`api_fluid_layers_lstm` . - 2. In order to improve efficiency, users must first map the input of dimension [T, hidden_size] to input of [T, 4 * hidden_size], and then pass it to this OP. - - The implementation of this OP include diagonal/peephole connections. - Please refer to `Gers, F. A., & Schmidhuber, J. (2000) `_ . - If you do not need peephole connections, please set use_peepholes to False . - - This OP computes each timestep as follows: - - .. math:: - i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_{x_i} + b_{h_i}) - .. math:: - f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_{x_f} + b_{h_f}) - .. math:: - o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_{x_o} + b_{h_o}) - .. math:: - \widetilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + b{x_c} + b_{h_c}) - .. math:: - c_t = f_t \odot c_{t-1} + i_t \odot \widetilde{c_t} - .. math:: - h_t = o_t \odot tanh(c_t) - - The symbolic meanings in the formula are as follows: - - - :math:`x_{t}` represents the input at timestep :math:`t` - - :math:`h_{t}` represents the hidden state at timestep :math:`t` - - :math:`h_{t-1}, c_{t-1}` represent the hidden state and cell state at timestep :math:`t-1` , respectively - - :math:`\widetilde{c_t}` represents the candidate cell state - - :math:`i_t` , :math:`f_t` and :math:`o_t` represent input gate, forget gate, output gate, respectively - - :math:`W` represents weight (e.g., :math:`W_{ix}` is the weight of a linear transformation of input :math:`x_{t}` when calculating input gate :math:`i_t` ) - - :math:`b` represents bias (e.g., :math:`b_{i}` is the bias of input gate) - - :math:`\sigma` represents nonlinear activation function for gate, default sigmoid - - :math:`\odot` represents the Hadamard product of a matrix, i.e. multiplying the elements of the same position for two matrices with the same dimension to get another matrix with the same dimension - - Parameters: - input ( :ref:`api_guide_Variable_en` ): LSTM input tensor, multi-dimensional LODTensor of shape :math:`[T, 4*hidden\_size]` . Data type is float32 or float64. - size (int): must be 4 * hidden_size. - h_0( :ref:`api_guide_Variable_en` , optional): The initial hidden state of the LSTM, multi-dimensional Tensor of shape :math:`[batch\_size, hidden\_size]` . - Data type is float32 or float64. If set to None, it will be a vector of all 0. Default: None. - c_0( :ref:`api_guide_Variable_en` , optional): The initial hidden state of the LSTM, multi-dimensional Tensor of shape :math:`[batch\_size, hidden\_size]` . - Data type is float32 or float64. If set to None, it will be a vector of all 0. `h_0` and `c_0` can be None but only at the same time. Default: None. - param_attr(ParamAttr, optional): Parameter attribute of weight. If it is None, the default weight parameter attribute is used. Please refer to ref:`api_fluid_ParamAttr' . - If the user needs to set this parameter, the dimension must be :math:`[hidden\_size, 4*hidden\_size]` . Default: None. - - - Weights = :math:`\{ W_{cr},W_{ir},W_{fr},W_{or} \}` , the shape is [hidden_size, 4*hidden_size]. - - bias_attr (ParamAttr, optional): The bias attribute for the learnable bias - weights, which contains two parts, input-hidden - bias weights and peephole connections weights if - setting `use_peepholes` to `True`. - Please refer to ref:`api_fluid_ParamAttr' . Default: None. - - 1. `use_peepholes = False` - - Biases = {:math:`b_c, b_i, b_f, b_o`}. - - The shape is [1, 4*hidden_size]. - 2. `use_peepholes = True` - - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - W_{fc}, W_{oc}`}. - - The shape is [1, 7*hidden_size]. - - use_peepholes (bool, optional): Whether to use peephole connection or not. Default: True. - is_reverse (bool, optional): Whether to calculate reverse LSTM. Default: False. - gate_activation (str, optional): The activation for input gate, forget gate and output gate. Default: "sigmoid". - cell_activation (str, optional): The activation for cell output. Default: "tanh". - candidate_activation (str, optional): The activation for candidate hidden state. Default: "tanh". - dtype (str, optional): Data type, can be "float32" or "float64". Default: "float32". - name (str, optional): A name for this layer. Please refer to :ref:`api_guide_Name` . Default: None. - - Returns: - tuple ( :ref:`api_guide_Variable` , :ref:`api_guide_Variable` ) : - - The hidden state and cell state of LSTM - - - hidden: LoDTensor with shape of :math:`[T, hidden\_size]` , and its lod and dtype is the same as the input. - - cell: LoDTensor with shape of :math:`[T, hidden\_size]` , and its lod and dtype is the same as the input. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - emb_dim = 256 - vocab_size = 10000 - hidden_dim = 512 - - data = fluid.data(name='x', shape=[None], dtype='int64', lod_level=1) - emb = fluid.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True) - - forward_proj = fluid.layers.fc(input=emb, size=hidden_dim * 4, - bias_attr=False) - - forward, cell = fluid.layers.dynamic_lstm( - input=forward_proj, size=hidden_dim * 4, use_peepholes=False) - forward.shape # (-1, 512) - cell.shape # (-1, 512) - """ - assert ( - _non_static_mode() is not True - ), "please use lstm instead of dynamic_lstm in dygraph mode!" - assert ( - bias_attr is not False - ), "bias_attr should not be False in dynamic_lstm." - - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'dynamic_lstm' - ) - - check_type(h_0, 'h_0', (Variable, type(None)), 'dynamic_lstm') - if isinstance(h_0, Variable): - check_variable_and_dtype( - h_0, 'h_0', ['float32', 'float64'], 'dynamic_lstm' - ) - - check_type(c_0, 'c_0', (Variable, type(None)), 'dynamic_lstm') - if isinstance(c_0, Variable): - check_variable_and_dtype( - c_0, 'c_0', ['float32', 'float64'], 'dynamic_lstm' - ) - - helper = LayerHelper('lstm', **locals()) - size = size // 4 - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype - ) - bias_size = [1, 7 * size] - if not use_peepholes: - bias_size[1] = 4 * size - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True - ) - - hidden = helper.create_variable_for_type_inference(dtype) - cell = helper.create_variable_for_type_inference(dtype) - batch_gate = helper.create_variable_for_type_inference(dtype) - batch_cell_pre_act = helper.create_variable_for_type_inference(dtype) - inputs = {'Input': input, 'Weight': weight, 'Bias': bias} - batch_size = input.shape[0] - if h_0: - assert h_0.shape == (batch_size, size), ( - 'The shape of h0 should be (batch_size, %d)' % size - ) - inputs['H0'] = h_0 - if c_0: - assert c_0.shape == (batch_size, size), ( - 'The shape of c0 should be (batch_size, %d)' % size - ) - inputs['C0'] = c_0 - - helper.append_op( - type='lstm', - inputs=inputs, - outputs={ - 'Hidden': hidden, - 'Cell': cell, - 'BatchGate': batch_gate, - 'BatchCellPreAct': batch_cell_pre_act, - }, - attrs={ - 'use_peepholes': use_peepholes, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'cell_activation': cell_activation, - 'candidate_activation': candidate_activation, - }, - ) - return hidden, cell - - -@deprecated( - since='2.0.0', - update_to='paddle.nn.LSTM', - reason="This API may occur CUDNN errors.", -) -def lstm( - input, - init_h, - init_c, - max_len, - hidden_size, - num_layers, - dropout_prob=0.0, - is_bidirec=False, - is_test=False, - name=None, - default_initializer=None, - seed=-1, -): - r""" - - **Note**: - This OP only supports running on GPU devices. - - This OP implements LSTM operation - `Hochreiter, S., & Schmidhuber, J. (1997) `_ . - - The implementation of this OP does not include diagonal/peephole connections. - Please refer to `Gers, F. A., & Schmidhuber, J. (2000) `_ . - If you need peephole connections, please use :ref:`api_fluid_layers_dynamic_lstm` . - - This OP computes each timestep as follows: - - .. math:: - i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_{x_i} + b_{h_i}) - .. math:: - f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_{x_f} + b_{h_f}) - .. math:: - o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_{x_o} + b_{h_o}) - .. math:: - \widetilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + b{x_c} + b_{h_c}) - .. math:: - c_t = f_t \odot c_{t-1} + i_t \odot \widetilde{c_t} - .. math:: - h_t = o_t \odot tanh(c_t) - - The symbolic meanings in the formula are as follows: - - - :math:`x_{t}` represents the input at timestep :math:`t` - - :math:`h_{t}` represents the hidden state at timestep :math:`t` - - :math:`h_{t-1}, c_{t-1}` represent the hidden state and cell state at timestep :math:`t-1` , respectively - - :math:`\widetilde{c_t}` represents the candidate cell state - - :math:`i_t` , :math:`f_t` and :math:`o_t` represent input gate, forget gate, output gate, respectively - - :math:`W` represents weight (e.g., :math:`W_{ix}` is the weight of a linear transformation of input :math:`x_{t}` when calculating input gate :math:`i_t` ) - - :math:`b` represents bias (e.g., :math:`b_{i}` is the bias of input gate) - - :math:`\sigma` represents nonlinear activation function for gate, default sigmoid - - :math:`\odot` represents the Hadamard product of a matrix, i.e. multiplying the elements of the same position for two matrices with the same dimension to get another matrix with the same dimension - - Parameters: - input ( :ref:`api_guide_Variable_en` ): LSTM input tensor, 3-D Tensor of shape :math:`[batch\_size, seq\_len, input\_dim]` . Data type is float32 or float64 - init_h( :ref:`api_guide_Variable_en` ): The initial hidden state of the LSTM, 3-D Tensor of shape :math:`[num\_layers, batch\_size, hidden\_size]` . - If is_bidirec = True, shape should be :math:`[num\_layers*2, batch\_size, hidden\_size]` . Data type is float32 or float64. - max_len (int): This parameter has no effect and will be discarded. - init_c( :ref:`api_guide_Variable_en` ): The initial cell state of the LSTM, 3-D Tensor of shape :math:`[num\_layers, batch\_size, hidden\_size]` . - If is_bidirec = True, shape should be :math:`[num\_layers*2, batch\_size, hidden\_size]` . Data type is float32 or float64. - hidden_size (int): hidden size of the LSTM. - num_layers (int): total layers number of the LSTM. - dropout_prob(float, optional): dropout prob, dropout ONLY work between rnn layers, NOT between time steps - There is NO dropout work on rnn output of the last RNN layers. - Default: 0.0. - is_bidirec (bool, optional): If it is bidirectional. Default: False. - is_test (bool, optional): If it is in test phrase. Default: False. - name (str, optional): A name for this layer. If set None, the layer - will be named automatically. Default: None. - default_initializer(Initializer, optional): Where use initializer to initialize the Weight - If set None, default initializer will be used. Default: None. - seed(int, optional): Seed for dropout in LSTM, If it's -1, dropout will use random seed. Default: 1. - - Returns: - tuple ( :ref:`api_guide_Variable_en` , :ref:`api_guide_Variable_en` , :ref:`api_guide_Variable_en` ) : - - Three tensors, rnn_out, last_h, last_c: - - - rnn_out is result of LSTM hidden, shape is :math:`[seq\_len, batch\_size, hidden\_size]` \ - if is_bidirec set to True, shape will be :math:`[seq\_len, batch\_size, hidden\_size*2]` - - last_h is the hidden state of the last step of LSTM \ - shape is :math:`[num\_layers, batch\_size, hidden\_size]` \ - if is_bidirec set to True, shape will be :math:`[num\_layers*2, batch\_size, hidden\_size]` - - last_c(Tensor): the cell state of the last step of LSTM \ - shape is :math:`[num\_layers, batch\_size, hidden\_size]` \ - if is_bidirec set to True, shape will be :math:`[num\_layers*2, batch\_size, hidden\_size]` - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - import paddle.fluid.layers as layers - paddle.enable_static() - - emb_dim = 256 - vocab_size = 10000 - data = fluid.data(name='x', shape=[None, 100], dtype='int64') - emb = fluid.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True) - batch_size = 100 - dropout_prob = 0.2 - input_size = 100 - hidden_size = 150 - num_layers = 1 - max_len = 12 - init_h = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0 ) - init_c = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0 ) - rnn_out, last_h, last_c = layers.lstm( emb, init_h, init_c, \ - max_len, hidden_size, num_layers, \ - dropout_prob=dropout_prob) - rnn_out.shape # (-1, 100, 150) - last_h.shape # (1, 20, 150) - last_c.shape # (1, 20, 150) - """ - - helper = LayerHelper('cudnn_lstm', **locals()) - check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'lstm') - check_variable_and_dtype(init_h, 'init_h', ['float32', 'float64'], 'lstm') - check_variable_and_dtype(init_c, 'init_c', ['float32', 'float64'], 'lstm') - check_type(max_len, 'max_len', (int), 'lstm') - check_type(hidden_size, 'hidden_size', (int), 'lstm') - check_type(num_layers, 'num_layers', (int), 'lstm') - dtype = input.dtype - input_shape = list(input.shape) - input_size = input_shape[-1] - weight_size = 0 - num_dirrection = 2 if is_bidirec == True else 1 - - for i in range(num_layers): - if i == 0: - input_weight_size = (input_size * hidden_size) * 4 * num_dirrection - else: - input_weight_size = (hidden_size * hidden_size) * 4 * num_dirrection - hidden_weight_size = (hidden_size * hidden_size) * 4 * num_dirrection - - weight_size += input_weight_size + hidden_weight_size - weight_size += hidden_size * 8 * num_dirrection - - weight = helper.create_parameter( - attr=helper.param_attr, - shape=[weight_size], - dtype=dtype, - default_initializer=default_initializer, - ) - - out = helper.create_variable_for_type_inference(dtype) - last_h = helper.create_variable_for_type_inference(dtype) - last_c = helper.create_variable_for_type_inference(dtype) - reserve = helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.UINT8, stop_gradient=True - ) - state_out = helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.UINT8, stop_gradient=True - ) - state_out.persistable = True - - helper.append_op( - type='cudnn_lstm', - inputs={ - 'Input': input, - 'InitH': init_h, - 'InitC': init_c, - 'W': weight, - }, - outputs={ - 'Out': out, - 'LastH': last_h, - 'LastC': last_c, - 'Reserve': reserve, - 'StateOut': state_out, - }, - attrs={ - 'is_bidirec': is_bidirec, - 'input_size': input_size, - 'hidden_size': hidden_size, - 'num_layers': num_layers, - 'is_test': is_test, - 'dropout_prob': dropout_prob, - 'seed': seed, - }, - ) - return out, last_h, last_c - - -def dynamic_lstmp( - input, - size, - proj_size, - param_attr=None, - bias_attr=None, - use_peepholes=True, - is_reverse=False, - gate_activation='sigmoid', - cell_activation='tanh', - candidate_activation='tanh', - proj_activation='tanh', - dtype='float32', - name=None, - h_0=None, - c_0=None, - cell_clip=None, - proj_clip=None, -): - r""" - - **Note**: - 1. In order to improve efficiency, users must first map the input of dimension [T, hidden_size] to input of [T, 4 * hidden_size], and then pass it to this OP. - - This OP implements the LSTMP (LSTM Projected) layer. - The LSTMP layer has a separate linear mapping layer behind the LSTM layer. -- `Sak, H., Senior, A., & Beaufays, F. (2014) `_ . - - Compared with the standard LSTM layer, LSTMP has an additional linear mapping layer, - which is used to map from the original hidden state :math:`h_t` to the lower dimensional state :math:`r_t` . - This reduces the total number of parameters and computational complexity, especially when the output unit is relatively large. - - The default implementation of the OP contains diagonal/peephole connections, - please refer to `Gers, F. A., & Schmidhuber, J. (2000) `_ . - If you need to disable the peephole connections, set use_peepholes to False. - - This OP computes each timestep as follows: - - .. math:: - i_t = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i) - .. math:: - f_t = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f) - .. math:: - o_t = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_{t-1} + b_o) - .. math:: - \widetilde{c_t} = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c) - .. math:: - c_t = f_t \odot c_{t-1} + i_t \odot \widetilde{c_t} - .. math:: - h_t = o_t \odot act_h(c_t) - .. math:: - r_t = \overline{act_h}(W_{rh}h_t) - - The symbolic meanings in the formula are as follows: - - - :math:`x_{t}` represents the input at timestep :math:`t` - - :math:`h_{t}` represents the hidden state at timestep :math:`t` - - :math:`r_{t}` : represents the state of the projected output of the hidden state :math:`h_{t}` - - :math:`h_{t-1}, c_{t-1}, r_{t-1}` represent the hidden state, cell state and projected output at timestep :math:`t-1` , respectively - - :math:`\widetilde{c_t}` represents the candidate cell state - - :math:`i_t` , :math:`f_t` and :math:`o_t` represent input gate, forget gate, output gate, respectively - - :math:`W` represents weight (e.g., :math:`W_{ix}` is the weight of a linear transformation of input :math:`x_{t}` when calculating input gate :math:`i_t` ) - - :math:`b` represents bias (e.g., :math:`b_{i}` is the bias of input gate) - - :math:`\sigma` represents nonlinear activation function for gate, default sigmoid - - :math:`\odot` represents the Hadamard product of a matrix, i.e. multiplying the elements of the same position for two matrices with the same dimension to get another matrix with the same dimension - - Parameters: - input( :ref:`api_guide_Variable_en` ): The input of dynamic_lstmp layer, which supports - variable-time length input sequence. - It is a multi-dimensional LODTensor of shape :math:`[T, 4*hidden\_size]` . Data type is float32 or float64. - size(int): must be 4 * hidden_size. - proj_size(int): The size of projection output. - param_attr(ParamAttr, optional): Parameter attribute of weight. If it is None, the default weight parameter attribute is used. Please refer to ref:`api_fluid_ParamAttr' . - If the user needs to set this parameter, the dimension must be :math:`[hidden\_size, 4*hidden\_size]` . Default: None. - - - Weights = :math:`\{ W_{cr},W_{ir},W_{fr},W_{or} \}` , the shape is [P, 4*hidden_size] , where P is the projection size. - - Projection weight = :math:`\{ W_{rh} \}` , the shape is [hidden_size, P]. - - bias_attr (ParamAttr, optional): The bias attribute for the learnable bias - weights, which contains two parts, input-hidden - bias weights and peephole connections weights if - setting `use_peepholes` to `True`. - Please refer to ref:`api_fluid_ParamAttr' . Default: None. - - 1. `use_peepholes = False` - - Biases = {:math:`b_c, b_i, b_f, b_o`}. - - The shape is [1, 4*hidden_size]. - 2. `use_peepholes = True` - - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ - W_{fc}, W_{oc}`}. - - The shape is [1, 7*hidden_size]. - - use_peepholes (bool, optional): Whether to use peephole connection or not. Default True. - is_reverse (bool, optional): Whether to calculate reverse LSTM. Default False. - gate_activation (str, optional): The activation for input gate, forget gate and output gate. Default "sigmoid". - cell_activation (str, optional): The activation for cell output. Default "tanh". - candidate_activation (str, optional): The activation for candidate hidden state. Default "tanh". - proj_activation(str, optional): The activation for projection output. Default "tanh". - dtype (str, optional): Data type, can be "float32" or "float64". Default "float32". - name (str, optional): A name for this layer. Please refer to :ref:`api_guide_Name` . Default: None. - h_0( :ref:`api_guide_Variable` , optional): The initial hidden state is an optional input, default is zero. - This is a tensor with shape :math:`[batch\_size, P]` , where P is the projection size. Default: None. - c_0( :ref:`api_guide_Variable` , optional): The initial cell state is an optional input, default is zero. - This is a tensor with shape :math:`[batch\_size, P]` , where P is the projection size. - `h_0` and `c_0` can be None but only at the same time. Default: None. - cell_clip(float, optional): If not None, the cell state is clipped - by this value prior to the cell output activation. Default: None. - proj_clip(float, optional): If `num_proj > 0` and `proj_clip` is - provided, then the projected values are clipped elementwise to within - `[-proj_clip, proj_clip]`. Default: None. - - Returns: - tuple ( :ref:`api_guide_Variable` , :ref:`api_guide_Variable` ) : - - The hidden state and cell state of LSTMP - - - hidden: LoDTensor with shape of :math:`[T, P]` , and its lod and dtype is the same as the input. - - cell: LoDTensor with shape of :math:`[T, hidden\_size]` , and its lod and dtype is the same as the input. - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - dict_dim, emb_dim = 128, 64 - data = fluid.data(name='sequence', shape=[None], dtype='int64', lod_level=1) - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim, proj_dim = 512, 256 - fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4, - act=None, bias_attr=None) - proj_out, last_c = fluid.layers.dynamic_lstmp(input=fc_out, - size=hidden_dim * 4, - proj_size=proj_dim, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh") - proj_out.shape # (-1, 256) - last_c.shape # (-1, 512) - """ - - assert ( - _non_static_mode() is not True - ), "please use lstm instead of dynamic_lstmp in dygraph mode!" - - assert ( - bias_attr is not False - ), "bias_attr should not be False in dynamic_lstmp." - - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'dynamic_lstmp' - ) - - check_type(h_0, 'h_0', (Variable, type(None)), 'dynamic_lstmp') - if isinstance(h_0, Variable): - check_variable_and_dtype( - h_0, 'h_0', ['float32', 'float64'], 'dynamic_lstmp' - ) - - check_type(c_0, 'c_0', (Variable, type(None)), 'dynamic_lstmp') - if isinstance(c_0, Variable): - check_variable_and_dtype( - c_0, 'c_0', ['float32', 'float64'], 'dynamic_lstmp' - ) - - helper = LayerHelper('lstmp', **locals()) - size = size // 4 - weight = helper.create_parameter( - attr=helper.param_attr, shape=[proj_size, 4 * size], dtype=dtype - ) - proj_weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, proj_size], dtype=dtype - ) - bias_size = [1, 7 * size] - if not use_peepholes: - bias_size[1] = 4 * size - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True - ) - - projection = helper.create_variable_for_type_inference(dtype) - cell = helper.create_variable_for_type_inference(dtype) - ordered_proj0 = helper.create_variable_for_type_inference(dtype) - batch_hidden = helper.create_variable_for_type_inference(dtype) - batch_gate = helper.create_variable_for_type_inference(dtype) - batch_cell_pre_act = helper.create_variable_for_type_inference(dtype) - inputs = { - 'Input': input, - 'Weight': weight, - 'ProjWeight': proj_weight, - 'Bias': bias, - } - batch_size = input.shape[0] - if h_0: - assert h_0.shape == (batch_size, proj_size), ( - 'The shape of h0 should be (batch_size, %d)' % proj_size - ) - inputs['H0'] = h_0 - if c_0: - assert c_0.shape == (batch_size, size), ( - 'The shape of c0 should be (batch_size, %d)' % size - ) - inputs['C0'] = c_0 - - if cell_clip: - assert cell_clip >= 0, "cell_clip should not be negative." - if proj_clip: - assert proj_clip >= 0, "proj_clip should not be negative." - - helper.append_op( - type='lstmp', - inputs=inputs, - outputs={ - 'Projection': projection, - 'Cell': cell, - 'BatchHidden': batch_hidden, - 'BatchGate': batch_gate, - 'BatchCellPreAct': batch_cell_pre_act, - }, - attrs={ - 'use_peepholes': use_peepholes, - 'cell_clip': cell_clip, - 'proj_clip': proj_clip, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'cell_activation': cell_activation, - 'candidate_activation': candidate_activation, - 'proj_activation': proj_activation, - }, - ) - return projection, cell - - -def dynamic_gru( - input, - size, - param_attr=None, - bias_attr=None, - is_reverse=False, - gate_activation='sigmoid', - candidate_activation='tanh', - h_0=None, - origin_mode=False, -): - r""" - - **Note: The input type of this must be LoDTensor. If the input type to be - processed is Tensor, use** :ref:`api_fluid_layers_StaticRNN` . - - This operator is used to perform the calculations for a single layer of - Gated Recurrent Unit (GRU) on full sequences step by step. The calculations - in one time step support these two modes: - - If ``origin_mode`` is True, then the formula used is from paper - `Learning Phrase Representations using RNN Encoder Decoder for Statistical - Machine Translation `_ . - - .. math:: - - u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u) - - r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r) - - \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c) - - h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t} - - - if ``origin_mode`` is False, then the formula used is from paper - `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence - Modeling `_ - - .. math:: - - u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u) - - r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r) - - \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c) - - h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t} - - :math:`x_t` is the input of current time step, but it is not from ``input`` . - This operator does not include the calculations :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` , - **Note** thus a fully-connect layer whose size is 3 times of ``size`` should - be used before this operator, and the output should be used as ``input`` here. - :math:`h_{t-1}` is the hidden state from previous time step. - :math:`u_t` , :math:`r_t` , :math:`\\tilde{h_t}` and :math:`h_t` stand for - update gate, reset gate, candidate hidden and hidden output separately. - :math:`W_{uh}, b_u` , :math:`W_{rh}, b_r` and :math:`W_{ch}, b_c` stand for - the weight matrix and bias used in update gate, reset gate, candidate hidden - calculations. For implementation, the three weight matrix are merged into a - tensor shaped :math:`[D, D \\times 3]` , the three bias are concatenated as - a tensor shaped :math:`[1, D \\times 3]` , where :math:`D` stands for the - hidden size; The data layout of weight tensor is: :math:`W_{uh}` and :math:`W_{rh}` - are concatenated with shape :math:`[D, D \\times 2]` lying on the first part, - and :math:`W_{ch}` lying on the latter part with shape :math:`[D, D]` . - - - Args: - input(Variable): A LoDTensor whose lod level is 1, representing the input - after linear projection. Its shape should be :math:`[T, D \\times 3]` , - where :math:`T` stands for the total sequence lengths in this mini-batch, - :math:`D` for the hidden size. The data type should be float32 or float64. - size(int): Indicate the hidden size. - param_attr(ParamAttr, optional): To specify the weight parameter property. - Default: None, which means the default weight parameter property is used. - See usage for details in :ref:`api_fluid_ParamAttr` . - bias_attr (ParamAttr, optional): To specify the bias parameter property. - Default: None, which means the default bias parameter property is used. - See usage for details in :ref:`api_fluid_ParamAttr` . - is_reverse(bool, optional): Whether to compute in the reversed order of - input sequences. Default False. - gate_activation(str, optional): The activation function corresponding to - :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity" - are supported. Default "sigmoid". - candidate_activation(str, optional): The activation function corresponding to - :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity" - are supported. Default "tanh". - h_0 (Variable, optional): A Tensor representing the initial hidden state. - It not provided, the default initial hidden state is 0. The shape is - :math:`[N, D]` , where :math:`N` is the number of sequences in the - mini-batch, :math:`D` for the hidden size. The data type should be - same as ``input`` . Default None. - - Returns: - Variable: A LoDTensor whose lod level is 1 and shape is :math:`[T, D]` , \ - where :math:`T` stands for the total sequence lengths in this mini-batch \ - :math:`D` for the hidden size. It represents GRU transformed sequence output, \ - and has the same lod and data type with ``input`` . - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - - dict_dim, emb_dim = 128, 64 - data = fluid.data(name='sequence', - shape=[None], - dtype='int64', - lod_level=1) - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim = 512 - x = fluid.layers.fc(input=emb, size=hidden_dim * 3) - hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim) - """ - - assert ( - _non_static_mode() is not True - ), "please use gru instead of dynamic_gru in dygraph mode!" - - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'dynamic_gru' - ) - - check_type(h_0, 'h_0', (Variable, type(None)), 'dynamic_gru') - if isinstance(h_0, Variable): - check_variable_and_dtype( - h_0, 'h_0', ['float32', 'float64'], 'dynamic_gru' - ) - - helper = LayerHelper('gru', **locals()) - dtype = helper.input_dtype() - - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype - ) - bias = helper.create_parameter( - attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True - ) - batch_size = input.shape[0] - inputs = {'Input': input, 'Weight': weight, 'Bias': bias} - if h_0: - assert h_0.shape == (batch_size, size), ( - 'The shape of h0 should be(batch_size, %d)' % size - ) - inputs['H0'] = h_0 - - hidden = helper.create_variable_for_type_inference(dtype) - batch_gate = helper.create_variable_for_type_inference(dtype) - batch_reset_hidden_prev = helper.create_variable_for_type_inference(dtype) - batch_hidden = helper.create_variable_for_type_inference(dtype) - - helper.append_op( - type='gru', - inputs=inputs, - outputs={ - 'Hidden': hidden, - 'BatchGate': batch_gate, - 'BatchResetHiddenPrev': batch_reset_hidden_prev, - 'BatchHidden': batch_hidden, - }, - attrs={ - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'activation': candidate_activation, - 'origin_mode': origin_mode, - }, - ) - return hidden - - -def gru_unit( - input, - hidden, - size, - param_attr=None, - bias_attr=None, - activation='tanh', - gate_activation='sigmoid', - origin_mode=False, -): - r""" - - Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for - one time step and it supports these two modes: - - If ``origin_mode`` is True, then the formula used is from paper - `Learning Phrase Representations using RNN Encoder Decoder for Statistical - Machine Translation `_ . - - .. math:: - - u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u) - - r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r) - - \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c) - - h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \\tilde{h_t} - - - if ``origin_mode`` is False, then the formula used is from paper - `Empirical Evaluation of Gated Recurrent Neural Networks on Sequence - Modeling `_ - - .. math:: - - u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u) - - r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r) - - \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c) - - h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t} - - :math:`x_t` is the input of current time step, but it is not ``input`` . - This operator does not include the calculations :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` , - **Note** thus a fully-connect layer whose size is 3 times of GRU hidden size should - be used before this operator, and the output should be used as ``input`` here. - :math:`h_{t-1}` is the hidden state from previous time step. - :math:`u_t` , :math:`r_t` , :math:`\\tilde{h_t}` and :math:`h_t` stand for - update gate, reset gate, candidate hidden and hidden output separately. - :math:`W_{uh}, b_u` , :math:`W_{rh}, b_r` and :math:`W_{ch}, b_c` stand for - the weight matrix and bias used in update gate, reset gate, candidate hidden - calculations. For implementation, the three weight matrix are merged into a - tensor shaped :math:`[D, D \\times 3]` , the three bias are concatenated as - a tensor shaped :math:`[1, D \\times 3]` , where :math:`D` stands for the - hidden size; The data layout of weight tensor is: :math:`W_{uh}` and :math:`W_{rh}` - are concatenated with shape :math:`[D, D \\times 2]` lying on the first part, - and :math:`W_{ch}` lying on the latter part with shape :math:`[D, D]` . - - - Args: - input(Variable): A 2D Tensor representing the input after linear projection - after linear projection. Its shape should be :math:`[N, D \\times 3]` , - where :math:`N` stands for batch size, :math:`D` for the hidden size. - The data type should be float32 or float64. - hidden(Variable): A 2D Tensor representing the hidden state from previous step. - Its shape should be :math:`[N, D]` , where :math:`N` stands for batch size, - :math:`D` for the hidden size. The data type should be same as ``input`` . - size(int): Indicate the hidden size. - param_attr(ParamAttr, optional): To specify the weight parameter property. - Default: None, which means the default weight parameter property is used. - See usage for details in :ref:`api_fluid_ParamAttr` . - bias_attr (ParamAttr, optional): To specify the bias parameter property. - Default: None, which means the default bias parameter property is used. - See usage for details in :ref:`api_fluid_ParamAttr` . - activation(str, optional): The activation function corresponding to - :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity" - are supported. Default "tanh". - gate_activation(str, optional): The activation function corresponding to - :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity" - are supported. Default "sigmoid". - - Returns: - tuple: The tuple contains three Tensor variables with the same data type \ - as ``input`` . They represent the hidden state for next time step ( :math:`h_t` ), \ - reset previous hidden state ( :math:`r_t \odot h_{t-1}` ), and the \ - concatenation of :math:`h_t, r_t, \\tilde{h_t}` . And they have shape \ - :math:`[N, D]` , :math:`[N, D]` , :math:`[N, D \times 3]` separately. \ - Usually only the hidden state for next time step ( :math:`h_t` ) is used \ - as output and state, the other two are intermediate results of calculations. - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - - dict_dim, emb_dim = 128, 64 - data = fluid.data(name='step_data', shape=[None], dtype='int64') - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim = 512 - x = fluid.layers.fc(input=emb, size=hidden_dim * 3) - pre_hidden = fluid.data( - name='pre_hidden', shape=[None, hidden_dim], dtype='float32') - hidden = fluid.layers.gru_unit( - input=x, hidden=pre_hidden, size=hidden_dim * 3) - - """ - check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'gru_unit') - check_variable_and_dtype( - hidden, 'hidden', ['float32', 'float64'], 'gru_unit' - ) - check_type(size, 'size', (int), 'gru_unit') - activation_dict = dict( - identity=0, - sigmoid=1, - tanh=2, - relu=3, - ) - activation = activation_dict[activation] - gate_activation = activation_dict[gate_activation] - - helper = LayerHelper('gru_unit', **locals()) - dtype = helper.input_dtype() - size = size // 3 - - # create weight - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype - ) - - gate = helper.create_variable_for_type_inference(dtype) - reset_hidden_pre = helper.create_variable_for_type_inference(dtype) - updated_hidden = helper.create_variable_for_type_inference(dtype) - inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight} - # create bias - if helper.bias_attr: - bias_size = [1, 3 * size] - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True - ) - inputs['Bias'] = bias - - helper.append_op( - type='gru_unit', - inputs=inputs, - outputs={ - 'Gate': gate, - 'ResetHiddenPrev': reset_hidden_pre, - 'Hidden': updated_hidden, - }, - attrs={ - 'activation': 2, # tanh - 'gate_activation': 1, # sigmoid - 'origin_mode': origin_mode, - }, - ) - - return updated_hidden, reset_hidden_pre, gate diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index aa61391305..8a5589856d 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -12,5 +12,4 @@ endforeach() set_tests_properties(test_word2vec_book PROPERTIES TIMEOUT 120) set_tests_properties(test_recognize_digits PROPERTIES TIMEOUT 120) set_tests_properties(test_image_classification PROPERTIES TIMEOUT 200) -set_tests_properties(test_label_semantic_roles PROPERTIES TIMEOUT 240) set_tests_properties(test_fit_a_line PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 1d952bffc4..b204fde04b 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -55,43 +55,6 @@ def convolution_net( return avg_cost, accuracy, prediction -def stacked_lstm_net( - data, label, input_dim, class_dim=2, emb_dim=128, hid_dim=512, stacked_num=3 -): - assert stacked_num % 2 == 1 - - emb = fluid.layers.embedding( - input=data, size=[input_dim, emb_dim], is_sparse=True - ) - # add bias attr - - # TODO(qijun) linear act - fc1 = fluid.layers.fc(input=emb, size=hid_dim) - lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) - - inputs = [fc1, lstm1] - - for i in range(2, stacked_num + 1): - fc = fluid.layers.fc(input=inputs, size=hid_dim) - lstm, cell = fluid.layers.dynamic_lstm( - input=fc, size=hid_dim, is_reverse=(i % 2) == 0 - ) - inputs = [fc, lstm] - - fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') - lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') - - prediction = fluid.layers.fc( - input=[fc_last, lstm_last], size=class_dim, act='softmax' - ) - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(cost) - accuracy = paddle.static.accuracy(input=prediction, label=label) - return avg_cost, accuracy, prediction - - def train( word_dict, net_method, @@ -278,25 +241,6 @@ class TestUnderstandSentiment(unittest.TestCase): parallel=True, ) - @unittest.skip(reason="make CI faster") - def test_stacked_lstm_cpu(self): - with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=False, - save_dirname="understand_sentiment_stacked_lstm.inference.model", - ) - - def test_stacked_lstm_cpu_parallel(self): - with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=False, - parallel=True, - ) - def test_conv_gpu(self): with self.new_program_scope(): main( @@ -315,25 +259,6 @@ class TestUnderstandSentiment(unittest.TestCase): parallel=True, ) - @unittest.skip(reason="make CI faster") - def test_stacked_lstm_gpu(self): - with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=True, - save_dirname="understand_sentiment_stacked_lstm.inference.model", - ) - - def test_stacked_lstm_gpu_parallel(self): - with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=True, - parallel=True, - ) - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py deleted file mode 100644 index 102b4c5ec9..0000000000 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ /dev/null @@ -1,408 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import contextlib -import os -import tempfile -import time -import unittest - -import numpy as np - -import paddle -import paddle.dataset.conll05 as conll05 -import paddle.fluid as fluid - -paddle.enable_static() - -word_dict, verb_dict, label_dict = conll05.get_dict() -word_dict_len = len(word_dict) -label_dict_len = len(label_dict) -pred_dict_len = len(verb_dict) - -mark_dict_len = 2 -word_dim = 32 -mark_dim = 5 -hidden_dim = 512 -depth = 8 -mix_hidden_lr = 1e-3 - -IS_SPARSE = True -PASS_NUM = 2 -BATCH_SIZE = 10 - -embedding_name = 'emb' - - -def load_parameter(file_name, h, w): - with open(file_name, 'rb') as f: - f.read(16) # skip header. - return np.fromfile(f, dtype=np.float32).reshape(h, w) - - -def db_lstm( - word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, **ignored -): - # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - size=[pred_dict_len, word_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='vemb', - ) - - mark_embedding = fluid.layers.embedding( - input=mark, - size=[mark_dict_len, mark_dim], - dtype='float32', - is_sparse=IS_SPARSE, - ) - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - fluid.layers.embedding( - size=[word_dict_len, word_dim], - input=x, - param_attr=fluid.ParamAttr(name=embedding_name, trainable=False), - ) - for x in word_input - ] - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0_layers = [ - fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers - ] - - hidden_0 = fluid.layers.sums(input=hidden_0_layers) - - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - ) - - # stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, depth): - mix_hidden = fluid.layers.sums( - input=[ - fluid.layers.fc(input=input_tmp[0], size=hidden_dim), - fluid.layers.fc(input=input_tmp[1], size=hidden_dim), - ] - ) - - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1), - ) - - input_tmp = [mix_hidden, lstm] - - feature_out = fluid.layers.sums( - input=[ - fluid.layers.fc( - input=input_tmp[0], size=label_dict_len, act='tanh' - ), - fluid.layers.fc( - input=input_tmp[1], size=label_dict_len, act='tanh' - ), - ] - ) - - return feature_out - - -def train(use_cuda, save_dirname=None, is_local=True): - # define network topology - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1 - ) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1 - ) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1 - ) - feature_out = db_lstm(**locals()) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1 - ) - cost = fluid.layers.softmax_with_cross_entropy(feature_out, target) - avg_cost = paddle.mean(cost) - - # TODO(qiao) - # check other optimizers and check why out will be NAN - sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True, - ) - ) - sgd_optimizer.minimize(avg_cost) - - # TODO(qiao) - # add dependency track and move this config before optimizer - - train_data = paddle.batch( - paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192), - batch_size=BATCH_SIZE, - ) - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - feeder = fluid.DataFeeder( - feed_list=[ - word, - ctx_n2, - ctx_n1, - ctx_0, - ctx_p1, - ctx_p2, - predicate, - mark, - target, - ], - place=place, - ) - exe = fluid.Executor(place) - - def train_loop(main_program): - exe.run(fluid.default_startup_program()) - embedding_param = ( - fluid.global_scope().find_var(embedding_name).get_tensor() - ) - embedding_param.set( - load_parameter(conll05.get_embedding(), word_dict_len, word_dim), - place, - ) - - start_time = time.time() - batch_id = 0 - for pass_id in range(PASS_NUM): - for data in train_data(): - cost = exe.run( - main_program, feed=feeder.feed(data), fetch_list=[avg_cost] - ) - cost = cost[0] - - if batch_id % 10 == 0: - print("avg_cost:" + str(cost)) - if batch_id != 0: - print( - "second per batch: " - + str((time.time() - start_time) / batch_id) - ) - # Set the threshold low to speed up the CI test - if float(cost) < 80.0: - if save_dirname is not None: - # TODO(liuyiqun): Change the target to crf_decode - fluid.io.save_inference_model( - save_dirname, - [ - 'word_data', - 'verb_data', - 'ctx_n2_data', - 'ctx_n1_data', - 'ctx_0_data', - 'ctx_p1_data', - 'ctx_p2_data', - 'mark_data', - ], - [feature_out], - exe, - ) - return - - batch_id = batch_id + 1 - - raise RuntimeError( - "This model should save_inference_model and return, but not reach here, please check!" - ) - - if is_local: - train_loop(fluid.default_main_program()) - else: - port = os.getenv("PADDLE_PSERVER_PORT", "6174") - pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... - eplist = [] - for ip in pserver_ips.split(","): - eplist.append(':'.join([ip, port])) - pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("PADDLE_TRAINERS")) - current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) - training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") - t = fluid.DistributeTranspiler() - t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) - if training_role == "PSERVER": - pserver_prog = t.get_pserver_program(current_endpoint) - pserver_startup = t.get_startup_program( - current_endpoint, pserver_prog - ) - exe.run(pserver_startup) - exe.run(pserver_prog) - elif training_role == "TRAINER": - train_loop(t.get_trainer_program()) - - -def infer(use_cuda, save_dirname=None): - if save_dirname is None: - return - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - - inference_scope = fluid.core.Scope() - with fluid.scope_guard(inference_scope): - # Use fluid.io.load_inference_model to obtain the inference program desc, - # the feed_target_names (the names of variables that will be fed - # data using feed operators), and the fetch_targets (variables that - # we want to obtain data from using fetch operators). - [ - inference_program, - feed_target_names, - fetch_targets, - ] = fluid.io.load_inference_model(save_dirname, exe) - - # Setup input by creating LoDTensor to represent sequence of words. - # Here each word is the basic element of the LoDTensor and the shape of - # each word (base_shape) should be [1] since it is simply an index to - # look up for the corresponding word vector. - # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], - # which has only one level of detail. Then the created LoDTensor will have only - # one higher level structure (sequence of words, or sentence) than the basic - # element (word). Hence the LoDTensor will hold data for three sentences of - # length 3, 4 and 2, respectively. - # Note that recursive_sequence_lengths should be a list of lists. - recursive_seq_lens = [[3, 4, 2]] - base_shape = [1] - # The range of random integers is [low, high] - word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - pred = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=pred_dict_len - 1 - ) - ctx_n2 = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - ctx_n1 = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - ctx_0 = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - ctx_p1 = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - ctx_p2 = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1 - ) - mark = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=mark_dict_len - 1 - ) - - # Construct feed as a dictionary of {feed_target_name: feed_target_data} - # and results will contain a list of data corresponding to fetch_targets. - assert feed_target_names[0] == 'word_data' - assert feed_target_names[1] == 'verb_data' - assert feed_target_names[2] == 'ctx_n2_data' - assert feed_target_names[3] == 'ctx_n1_data' - assert feed_target_names[4] == 'ctx_0_data' - assert feed_target_names[5] == 'ctx_p1_data' - assert feed_target_names[6] == 'ctx_p2_data' - assert feed_target_names[7] == 'mark_data' - - results = exe.run( - inference_program, - feed={ - feed_target_names[0]: word, - feed_target_names[1]: pred, - feed_target_names[2]: ctx_n2, - feed_target_names[3]: ctx_n1, - feed_target_names[4]: ctx_0, - feed_target_names[5]: ctx_p1, - feed_target_names[6]: ctx_p2, - feed_target_names[7]: mark, - }, - fetch_list=fetch_targets, - return_numpy=False, - ) - print(results[0].recursive_sequence_lengths()) - np_data = np.array(results[0]) - print("Inference Shape: ", np_data.shape) - - -def main(use_cuda, is_local=True): - if use_cuda and not fluid.core.is_compiled_with_cuda(): - return - - temp_dir = tempfile.TemporaryDirectory() - # Directory for saving the trained model - save_dirname = os.path.join( - temp_dir.name, "label_semantic_roles.inference.model" - ) - - train(use_cuda, save_dirname, is_local) - infer(use_cuda, save_dirname) - - temp_dir.cleanup() - - -class TestLabelSemanticRoles(unittest.TestCase): - def test_cuda(self): - with self.scope_prog_guard(): - main(use_cuda=True) - - def test_cpu(self): - with self.scope_prog_guard(): - main(use_cuda=False) - - @contextlib.contextmanager - def scope_prog_guard(self): - prog = fluid.Program() - startup_prog = fluid.Program() - scope = fluid.core.Scope() - with fluid.scope_guard(scope): - with fluid.program_guard(prog, startup_prog): - yield - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index de83eeb536..30f38fea01 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -113,7 +113,6 @@ if(WIN32) list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3) list(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor) list(REMOVE_ITEM TEST_OPS test_ps_dispatcher) - list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp) list(REMOVE_ITEM TEST_OPS test_nvprof) # TODO: Fix these unittests failed on Windows @@ -997,13 +996,6 @@ set_tests_properties(test_parallel_executor_transformer PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_div_op PROPERTIES TIMEOUT 120) set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150) set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 120) -if(NOT WIN32) - if(WITH_NV_JETSON) - set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 1200) - else() - set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120) - endif() -endif() set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120) set_tests_properties(test_bilateral_slice_op PROPERTIES TIMEOUT 120) set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES TIMEOUT @@ -1080,7 +1072,6 @@ set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT 120) set_tests_properties(test_crop_tensor_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_eager_deletion_lstm_net PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_executor_mnist PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_ptb_rnn PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_save_load_v2 PROPERTIES TIMEOUT 120) @@ -1124,7 +1115,6 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 250) set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 250) set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120) set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270) set_tests_properties(test_normal PROPERTIES TIMEOUT 120) set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py deleted file mode 100644 index 6f59a7124a..0000000000 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from inference_pass_test import InferencePassTest - -import paddle.fluid as fluid -from paddle.fluid.core import PassVersionChecker - - -class FcGruFusePassTest(InferencePassTest): - def setUp(self): - with fluid.program_guard(self.main_program, self.startup_program): - dict_dim, emb_dim = 128, 64 - data = fluid.data( - name='step_data', shape=[None], dtype='int64', lod_level=1 - ) - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim = 512 - x = fluid.layers.fc(input=emb, size=hidden_dim * 3) - hidden = fluid.layers.dynamic_gru( - input=x, - size=hidden_dim, - bias_attr=True, - origin_mode=False, - is_reverse=True, - ) - - batch = 16 - lod_tensor = fluid.LoDTensor() - lod_tensor.set( - np.random.randint(0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace(), - ) - lod_tensor.set_lod([[0, batch]]) - self.feeds = {"step_data": lod_tensor} - self.fetch_list = [hidden] - - def test_check_output(self): - use_gpu = False - self.check_output_with_option(use_gpu) - self.assertTrue(PassVersionChecker.IsCompatible('fc_gru_fuse_pass')) - - -class MulGruFusePassTest(InferencePassTest): - def setUp(self): - with fluid.program_guard(self.main_program, self.startup_program): - dict_dim, emb_dim = 128, 64 - data = fluid.data( - name='step_data', shape=[None], dtype='int64', lod_level=1 - ) - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim = 512 - x = fluid.layers.fc(input=emb, size=hidden_dim * 3, bias_attr=False) - hidden = fluid.layers.dynamic_gru( - input=x, - size=hidden_dim, - bias_attr=True, - origin_mode=False, - is_reverse=True, - ) - - batch = 16 - lod_tensor = fluid.LoDTensor() - lod_tensor.set( - np.random.randint(0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace(), - ) - lod_tensor.set_lod([[0, batch]]) - self.feeds = {"step_data": lod_tensor} - self.fetch_list = [hidden] - - def test_check_output(self): - use_gpu = False - self.check_output_with_option(use_gpu) - self.assertTrue(PassVersionChecker.IsCompatible('mul_gru_fuse_pass')) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py deleted file mode 100644 index 7c85b1f1a2..0000000000 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from inference_pass_test import InferencePassTest - -import paddle.fluid as fluid -from paddle.fluid.core import PassVersionChecker - - -class MulLstmFusePassTest(InferencePassTest): - def setUp(self): - with fluid.program_guard(self.main_program, self.startup_program): - dict_dim, emb_dim = 128, 64 - hidden_dim = 512 - - data = fluid.data( - name='data', shape=[1], dtype='int64', lod_level=1 - ) - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - x = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False) - forward, cell = fluid.layers.dynamic_lstm( - input=x, size=hidden_dim * 4 - ) - - batch = 16 - lod_tensor = fluid.LoDTensor() - lod_tensor.set( - np.random.randint(0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace(), - ) - lod_tensor.set_lod([[0, batch]]) - self.feeds = {"data": lod_tensor} - self.fetch_list = [forward, cell] - - def test_check_output(self): - use_gpu = False - self.check_output_with_option(use_gpu) - self.assertTrue(PassVersionChecker.IsCompatible('mul_lstm_fuse_pass')) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py deleted file mode 100644 index 1ac689d8df..0000000000 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from test_eager_deletion_dynamic_rnn_base import TestBase - -import paddle -import paddle.fluid as fluid - -fluid.core._set_eager_deletion_mode(0.0, 1.0, True) - - -def gru_net( - data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=400.0, -): - emb = fluid.layers.embedding( - input=data, - size=[dict_dim, emb_dim], - param_attr=fluid.ParamAttr(learning_rate=emb_lr), - ) - fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3) - gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) - gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') - gru_max_tanh = paddle.tanh(gru_max) - fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(x=cost) - return avg_cost - - -class GRUTest(TestBase): - def setUp(self): - self.net = gru_net - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py deleted file mode 100644 index 09fc60e6b1..0000000000 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from test_eager_deletion_dynamic_rnn_base import TestBase - -import paddle -import paddle.fluid as fluid - -fluid.core._set_eager_deletion_mode(0.0, 1.0, True) - - -def lstm_net( - data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=30.0, -): - emb = fluid.layers.embedding( - input=data, - size=[dict_dim, emb_dim], - param_attr=fluid.ParamAttr(learning_rate=emb_lr), - ) - fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False - ) - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = paddle.tanh(lstm_max) - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(x=cost) - return avg_cost - - -class LSTMTest(TestBase): - def setUp(self): - self.net = lstm_net - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index e612709386..990a41b435 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -404,21 +404,6 @@ def lm_model( init_hidden=init_hidden_reshape, init_cell=init_cell_reshape, ) - elif rnn_model == "cudnn": - x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) - rnn_out, last_hidden, last_cell = layers.lstm( - x_emb, - init_hidden_reshape, - init_cell_reshape, - num_steps, - hidden_size, - num_layers, - is_bidirec=False, - default_initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale - ), - ) - rnn_out = paddle.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py index 2fbc60b454..f3e297d3b8 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_op.py @@ -18,8 +18,6 @@ import unittest import numpy as np from op_test import OpTest -from paddle import fluid -from paddle.fluid import Program, program_guard from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION @@ -267,25 +265,5 @@ class TestGRUOpInference(TestGRUOp): pass -class TestGruOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - - def test_Variable(): - input_data = np.random.random((1, 1536)).astype("float32") - fluid.layers.dynamic_gru(input=input_data, size=512) - - self.assertRaises(TypeError, test_Variable) - - def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 1536], dtype="float32" - ) - h = fluid.data(name="h", shape=[None, 512], dtype="int32") - fluid.layers.dynamic_gru(input=in_data, size=512, h_0=h) - - self.assertRaises(TypeError, test_h_0) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py index edb8c83ced..87fcb5c351 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py @@ -19,8 +19,6 @@ import numpy as np from op_test import OpTest import paddle.fluid as fluid -from paddle.fluid.framework import Program, program_guard -from paddle.fluid.layers import gru_unit class GRUActivationType(OpTest): @@ -46,55 +44,6 @@ def relu(x): return np.maximum(x, 0) -class TestGRUUnitOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - batch_size = 5 - hidden_dim = 40 - input = fluid.data( - name='input', shape=[None, hidden_dim * 3], dtype='float32' - ) - pre_hidden = fluid.data( - name='pre_hidden', shape=[None, hidden_dim], dtype='float32' - ) - np_input = np.random.uniform( - -0.1, 0.1, (batch_size, hidden_dim * 3) - ).astype('float64') - np_pre_hidden = np.random.uniform( - -0.1, 0.1, (batch_size, hidden_dim) - ).astype('float64') - - def test_input_Variable(): - gru_unit(np_input, pre_hidden, hidden_dim * 3) - - self.assertRaises(TypeError, test_input_Variable) - - def test_pre_hidden_Variable(): - gru_unit(input, np_pre_hidden, hidden_dim * 3) - - self.assertRaises(TypeError, test_pre_hidden_Variable) - - def test_input_type(): - error_input = fluid.data( - name='error_input', - shape=[None, hidden_dim * 3], - dtype='int32', - ) - gru_unit(error_input, pre_hidden, hidden_dim * 3) - - self.assertRaises(TypeError, test_input_type) - - def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_dim], - dtype='int32', - ) - gru_unit(input, error_pre_hidden, hidden_dim * 3) - - self.assertRaises(TypeError, test_pre_hidden_type) - - class TestGRUUnitOp(OpTest): batch_size = 5 frame_size = 40 diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py deleted file mode 100644 index 04afd37c26..0000000000 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# nlp model stack of op operate on lod. It's a classical test case in optimize pass. - -import unittest - -from ir_memory_optimize_net_base import TestIrMemOptBase - -import paddle -import paddle.fluid as fluid - - -def lstm_net( - data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=30.0, -): - emb = fluid.layers.embedding( - input=data, - size=[dict_dim, emb_dim], - param_attr=fluid.ParamAttr(learning_rate=emb_lr), - ) - fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False - ) - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = paddle.tanh(lstm_max) - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(x=cost) - return avg_cost - - -class TestIrMemOptRNN(TestIrMemOptBase): - def setUp(self): - self.network = lstm_net - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index ea6c9399c8..c96074a928 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2593,20 +2593,6 @@ class TestBook(LayerTest): out = paddle.nn.functional.square_error_cost(input=x, label=y) return out - def test_dynamic_lstmp(self): - # TODO(minqiyang): dygraph do not support lod now - with self.static_graph(): - hidden_dim, proj_dim = 16, 8 - seq_data = layers.data( - name='seq_data', shape=[10, 10], dtype='float32', lod_level=1 - ) - fc_out = layers.fc(input=seq_data, size=4 * hidden_dim) - self.assertIsNotNone( - layers.dynamic_lstmp( - input=fc_out, size=4 * hidden_dim, proj_size=proj_dim - ) - ) - def test_lod_reset(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py index 536fc59f42..4f941ebb76 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py @@ -20,9 +20,7 @@ import numpy as np from op_test import OpTest import paddle -import paddle.fluid as fluid import paddle.fluid.core as core -import paddle.fluid.layers as layers random.seed(2) np.set_printoptions(threshold=np.inf) @@ -539,90 +537,5 @@ class TestCUDNNLstmOp(OpTest): ) -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestCUDNNlstmAPI(unittest.TestCase): - def test_lstm(self): - seq_len = 20 - batch_size = 5 - hidden_size = 20 - dropout_prob = 0.0 - num_layers = 1 - dtype = 'float32' if core.is_compiled_with_rocm() else 'float64' - input = fluid.data( - name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype - ) - init_h = layers.fill_constant( - [num_layers, batch_size, hidden_size], dtype, 0.0 - ) - init_c = layers.fill_constant( - [num_layers, batch_size, hidden_size], dtype, 0.0 - ) - rnn_out, last_h, last_c = layers.lstm( - input, - init_h, - init_c, - seq_len, - hidden_size, - num_layers, - dropout_prob, - False, - ) - exe = fluid.Executor(fluid.CUDAPlace(0)) - exe.run(fluid.default_startup_program()) - input_i = np.random.uniform( - low=-0.1, high=0.1, size=(seq_len, batch_size, hidden_size) - ).astype("float64") - out = exe.run( - fluid.default_main_program(), - feed={'input': input_i}, - fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0'], - ) - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestCUDNNlstmAPI(unittest.TestCase): # noqa: F811 - def test_lstm(self): - seq_len = 20 - batch_size = 5 - hidden_size = 20 - dropout_prob = 0.0 - num_layers = 2 - dtype = 'float32' if core.is_compiled_with_rocm() else 'float64' - input = fluid.data( - name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype - ) - init_h = layers.fill_constant( - [num_layers, batch_size, hidden_size], dtype, 0.0 - ) - init_c = layers.fill_constant( - [num_layers, batch_size, hidden_size], dtype, 0.0 - ) - rnn_out, last_h, last_c = layers.lstm( - input, - init_h, - init_c, - seq_len, - hidden_size, - num_layers, - dropout_prob, - False, - True, - ) - exe = fluid.Executor(fluid.CUDAPlace(0)) - exe.run(fluid.default_startup_program()) - input_i = np.random.uniform( - low=-0.1, high=0.1, size=(seq_len, batch_size, hidden_size) - ).astype(dtype) - out = exe.run( - fluid.default_main_program(), - feed={'input': input_i}, - fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0'], - ) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_lstm_op.py b/python/paddle/fluid/tests/unittests/test_lstm_op.py index 8df805a231..a9b5c98f78 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py @@ -17,11 +17,6 @@ import unittest import numpy as np from op_test import OpTest -from paddle import fluid -from paddle.fluid.framework import Program, program_guard -from paddle.fluid.layers import fill_constant -from paddle.fluid.layers import lstm as LSTM - SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 @@ -132,130 +127,6 @@ def lstm( return hidden, cell -class LstmUnitTestError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - batch_size = 20 - seq_len = 100 - dropout_prob = 0.2 - hidden_size = 150 - num_layers = 1 - input = fluid.data( - name='input', - shape=[batch_size, seq_len, hidden_size], - dtype='float32', - ) - pre_hidden = fill_constant( - [num_layers, batch_size, hidden_size], 'float32', 0.0 - ) - pre_cell = fill_constant( - [num_layers, batch_size, hidden_size], 'float32', 0.0 - ) - - np_input = np.random.uniform( - -0.1, 0.1, (batch_size, seq_len, hidden_size) - ).astype('float64') - np_pre_hidden = np.random.uniform( - -0.1, 0.1, (num_layers, batch_size, hidden_size) - ).astype('float64') - np_pre_cell = np.random.uniform( - -0.1, 0.1, (num_layers, batch_size, hidden_size) - ).astype('float64') - - def test_input_Variable(): - LSTM( - np_input, - pre_hidden, - pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_input_Variable) - - def test_pre_hidden_Variable(): - LSTM( - np_input, - np_pre_hidden, - pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_pre_hidden_Variable) - - def test_pre_cell_Variable(): - LSTM( - np_input, - pre_hidden, - np_pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_pre_cell_Variable) - - def test_input_type(): - error_input = fluid.data( - name='error_input', - shape=[None, hidden_size * 3], - dtype='int32', - ) - LSTM( - error_input, - pre_hidden, - pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_input_type) - - def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_size], - dtype='int32', - ) - LSTM( - input, - error_pre_hidden, - pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_pre_hidden_type) - - def test_pre_cell_type(): - error_pre_cell = fluid.data( - name='error_pre_cell', - shape=[None, hidden_size], - dtype='int32', - ) - LSTM( - input, - pre_hidden, - error_pre_cell, - seq_len, - hidden_size, - num_layers, - dropout_prob=dropout_prob, - ) - - self.assertRaises(TypeError, test_pre_cell_type) - - class TestLstmOp(OpTest): def set_is_test(self): self.is_test = False @@ -374,47 +245,6 @@ class TestLstmOpInference(TestLstmOp): pass -class TestLstmOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - - def test_Variable(): - input_data = np.random.random((1, 2048)).astype("float32") - fluid.layers.dynamic_lstm( - input=input_data, size=2048, use_peepholes=False - ) - - self.assertRaises(TypeError, test_Variable) - - def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 2048], dtype="float32" - ) - h = fluid.data(name="h", shape=[None, 512], dtype="int32") - c = fluid.data(name="c", shape=[None, 512], dtype="float32") - fluid.layers.dynamic_lstm( - input=in_data, size=2048, use_peepholes=False, h_0=h, c_0=c - ) - - self.assertRaises(TypeError, test_h_0) - - def test_c_0(): - in_data_ = fluid.data( - name="input_", shape=[None, 2048], dtype="float32" - ) - h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32") - c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32") - fluid.layers.dynamic_lstm( - input=in_data_, - size=2048, - use_peepholes=False, - h_0=h_, - c_0=c_, - ) - - self.assertRaises(TypeError, test_c_0) - - # class TestLstmOpHasInitial(TestLstmOp): # def set_argument(self): # self.lod = [[2, 3, 2]] diff --git a/python/paddle/fluid/tests/unittests/test_lstmp_op.py b/python/paddle/fluid/tests/unittests/test_lstmp_op.py index c299f54159..f1af219140 100644 --- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py @@ -17,9 +17,6 @@ import unittest import numpy as np import test_lstm_op as LstmTest -from paddle import fluid -from paddle.fluid import Program, program_guard - ACTIVATION = { 'identity': LstmTest.identity, 'sigmoid': LstmTest.sigmoid, @@ -378,64 +375,5 @@ class TestLstmpOpLen0Case2(TestLstmpOp): self.lod = [[2, 0, 3]] -class TestLstmpOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - - def test_Variable(): - input_data = np.random.random((1, 2048)).astype("float32") - fluid.layers.dynamic_lstmp( - input=input_data, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh", - ) - - self.assertRaises(TypeError, test_Variable) - - def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 2048], dtype="float32" - ) - h = fluid.data(name="h", shape=[None, 512], dtype="int32") - c = fluid.data(name="c", shape=[None, 512], dtype="float32") - fluid.layers.dynamic_lstmp( - input=in_data, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh", - h_0=h, - c_0=c, - ) - - self.assertRaises(TypeError, test_h_0) - - def test_c_0(): - in_data_ = fluid.data( - name="input_", shape=[None, 2048], dtype="float32" - ) - h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32") - c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32") - fluid.layers.dynamic_lstmp( - input=in_data_, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh", - h_0=h_, - c_0=c_, - ) - - self.assertRaises(TypeError, test_c_0) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index d9205c8e3b..dbbd14394f 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -17,7 +17,6 @@ import unittest import numpy as np import seresnext_net -from fake_reader import fake_imdb_reader from simple_nets import fc_with_batchnorm, init_data, simple_fc_net from test_parallel_executor_transformer import ( DeviceType, @@ -30,37 +29,6 @@ import paddle.fluid as fluid import paddle.fluid.core as core -def lstm_net(use_feed): - dict_dim = 5147 - emb_dim = 128 - hid_dim = 128 - hid_dim2 = 96 - class_dim = 2 - emb_lr = 30.0 - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1 - ) - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - emb = fluid.layers.embedding( - input=data, - size=[dict_dim, emb_dim], - param_attr=fluid.ParamAttr(learning_rate=emb_lr), - ) - fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False - ) - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = paddle.tanh(lstm_max) - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(x=cost) - return avg_cost - - def simple_fc_net_with_accuracy(use_feed): img = fluid.layers.data(name='image', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') @@ -268,29 +236,6 @@ class TestProgramPruneBackward(unittest.TestCase): method=transformer, feed_dict=feed_dict, optimizer=optimizer ) - def test_lstm(self): - def optimizer(): - optimizer = fluid.optimizer.Adagrad( - learning_rate=0.001, - regularization=fluid.regularizer.L2Decay(1e-4), - ) - return optimizer - - with self.program_scope_guard(): - word_dict_size = 5147 - reader = fake_imdb_reader(word_dict_size, 1) - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1 - ) - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - feeder = fluid.DataFeeder( - feed_list=[data, label], place=core.CPUPlace() - ) - feed_data = feeder.feed(reader()) - self.check_prune_correctness( - method=lstm_net, feed_dict=feed_data, optimizer=optimizer - ) - def test_cond(self): def optimizer(): optimizer = fluid.optimizer.SGD(learning_rate=0.01) diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 2584dbd1e0..c99e6a2ae5 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -91,7 +91,6 @@ HIGH_PARALLEL_JOB_NEW = [ 'test_seqpool_concat_fuse_pass', 'test_analyzer_save_model', 'test_exception', - 'test_fc_lstm_fuse_pass', 'test_similarity_focus_op', 'test_conv_batch_norm_mkldnn_fuse_pass', 'test_sequence_last_step', @@ -457,7 +456,6 @@ HIGH_PARALLEL_JOB_NEW = [ 'test_spawn_and_init_parallel_env', 'test_fleet_gradient_scale', 'unroll_array_ops_test', - 'test_fc_gru_fuse_pass', 'op_version_registry_test', 'test_cudnn_placement_pass', 'cipher_utils_test', @@ -1188,7 +1186,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_sigmoid_focal_loss', 'test_manual_seed', 'test_lrn_op', - 'test_ir_memory_optimize_nlp', 'test_dataset_dataloader', 'test_complex_variable', 'test_lite_engine', @@ -1199,7 +1196,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_elementwise_sub_op', 'test_compare_op', 'test_simnet', - 'test_label_semantic_roles', 'test_normal', 'test_tensor_scalar_type_promotion_static', 'test_trt_group_norm_op', @@ -1249,7 +1245,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_input_spec', 'test_adam_op', 'test_elementwise_floordiv_op', - 'test_eager_deletion_gru_net', 'test_diagonal_op', 'test_imperative_static_runner_mnist', 'test_nearest_interp_op', @@ -1468,7 +1463,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_nearest_interp_v2_op', 'test_sequence_slice_op', 'test_program_translator', - 'test_eager_deletion_lstm_net', 'malloc_test', 'test_size_op', 'test_analysis_predictor', @@ -1906,8 +1900,6 @@ CPU_PARALLEL_JOB = [ 'test_fetch_handler', 'test_feed_fetch_method', 'test_fc_mkldnn_op', - 'test_fc_lstm_fuse_pass', - 'test_fc_gru_fuse_pass', 'test_fc_elementwise_layernorm_fuse_pass_cc', 'test_fc_bf16_mkldnn_op', 'test_executor_feed_non_tensor', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 81660ab7f9..d56c2c438d 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -162,8 +162,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_dynrnn_static_input', 'test_eager_deletion_conditional_block', 'test_eager_deletion_delete_vars', - 'test_eager_deletion_gru_net', - 'test_eager_deletion_lstm_net', 'test_eager_deletion_padding_rnn', 'test_eager_deletion_recurrent_op', 'test_eager_deletion_while_op', @@ -586,8 +584,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_conv_elementwise_add_act_fuse_pass', 'test_conv_elementwise_add_fuse_pass', 'test_fc_fuse_pass', - 'test_fc_gru_fuse_pass', - 'test_fc_lstm_fuse_pass', 'test_repeated_fc_relu_fuse_pass', 'test_seqconv_eltadd_relu_fuse_pass', 'test_squared_mat_sub_fuse_pass', @@ -683,7 +679,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_fleet_rolemaker_new', 'test_fused_fc_elementwise_layernorm_op', 'test_fusion_transpose_flatten_concat_op', - 'test_ir_memory_optimize_nlp', 'test_nvprof', 'test_pipeline', 'test_weight_decay', -- GitLab