[remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv (#48654)

* [remove fluid] PRelu BilinearTensorProduct * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv

[remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv (#48654)
* [remove fluid] PRelu BilinearTensorProduct * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv * [remove fluid] PRelu BilinearTensorProduct Conv2DTranspose SequenceConv RowConv
c2f15f05 · wangzhen38 · GitHub · 9a9e0aa0 · c2f15f05 · c2f15f05
13 changed file
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -53,9 +53,6 @@ __all__ = [
    'Linear',
    'BatchNorm',
    'Embedding',
-    'PRelu',
-    'BilinearTensorProduct',
-    'Conv2DTranspose',
    'Conv3DTranspose',
    'GroupNorm',
    'SpectralNorm',
@@ -1128,637 +1125,6 @@ class Embedding(layers.Layer):
        return out
-class PRelu(layers.Layer):
-    r"""
-    This interface is used to construct a callable object of the ``PRelu`` class.
-    For more details, refer to code examples.
-    It implements three activation methods of the ``PRelu`` activation function.
-    Equation:
-    .. math::
-        y = \max(0, x) + \\alpha * \min(0, x)
-    Parameters:
-        mode (str): The mode for weight sharing. It supports all, channel
-          and element. all: all elements share same weight
-          channel:elements in a channel share same weight
-          element:each element has a weight
-        channel (int, optional): The number of channels.
-          This argument is required when mode is "channel".
-          Default: None.
-        input_shape (list or tuple, optional): The shape of input.
-          This argument is required when mode is "element".
-          Default: None.
-        param_attr(ParamAttr, optional): The parameter attribute for the learnable
-          weight (alpha). Default: None.
-        dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
-    Attribute:
-        **weight** (Parameter): the learnable weights of this layer.
-    Returns:
-        None
-    Examples:
-        .. code-block:: python
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph.base import to_variable
-          import numpy as np
-          inp_np = np.ones([5, 200, 100, 100]).astype('float32')
-          with fluid.dygraph.guard():
-              inp_np = to_variable(inp_np)
-              prelu0 = fluid.PRelu(
-                 mode='all',
-                 param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
-              dy_rlt0 = prelu0(inp_np)
-              prelu1 = fluid.PRelu(
-                 mode='channel',
-                 channel=200,
-                 param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
-              dy_rlt1 = prelu1(inp_np)
-              prelu2 = fluid.PRelu(
-                 mode='element',
-                 input_shape=inp_np.shape,
-                 param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
-              dy_rlt2 = prelu2(inp_np)
-    """
-    def __init__(
-        self,
-        mode,
-        channel=None,
-        input_shape=None,
-        param_attr=None,
-        dtype='float32',
-    ):
-        # need specify name_scope since snake-cased 'PRelu' is 'p_relu'
-        super().__init__(name_scope='prelu')
-        self._mode = mode
-        self._param_attr = param_attr
-        self._dtype = dtype
-        if mode == 'all':
-            self._alpha_shape = [1]
-        elif mode == 'channel':
-            assert isinstance(
-                channel, int
-            ), "channel argument is required when mode is 'channel'."
-            # NOTE(zhiqiu): The _alpha_shape should be [1, channel] + [1] * len(input_shape[2:]), not [1, channel, 1, 1].
-            # However, the suffix 1 in the list is useless, since the tensor is viewed as one demension array during kernel calculation.
-            # And, input_shape is not required when mode is 'channel', so it is simplified.
-            # NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version.
-            self._alpha_shape = [1, channel, 1, 1]
-        elif mode == 'element':
-            assert isinstance(
-                input_shape, (list, tuple)
-            ), "input_shape argument is required when mode is 'element'."
-            self._alpha_shape = [1] + list(input_shape)[1:]
-        else:
-            raise ValueError('mode should be one of all, channel, element.')
-        self.weight = self.create_parameter(
-            attr=self._param_attr,
-            shape=self._alpha_shape,
-            dtype='float32',
-            is_bias=False,
-            default_initializer=Constant(1.0),
-        )
-    def forward(self, input):
-        if in_dygraph_mode():
-            return _C_ops.prelu(input, self.weight, "NCHW", self._mode)
-        check_variable_and_dtype(input, 'input', ['float32'], 'PRelu')
-        out = self._helper.create_variable_for_type_inference(self._dtype)
-        self._helper.append_op(
-            type="prelu",
-            inputs={"X": input, 'Alpha': self.weight},
-            attrs={"mode": self._mode},
-            outputs={"Out": out},
-        )
-        return out
-class BilinearTensorProduct(layers.Layer):
-    r"""
-    **Add Bilinear Tensor Product Layer**
-    This layer performs bilinear tensor product on two inputs.
-    For example:
-    .. math::
-      out_{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1
-    In this formula:
-     - :math:`x`: the first input contains M elements, shape is [batch_size, M].
-     - :math:`y`: the second input contains N elements, shape is [batch_size, N].
-     - :math:`W_{i}`: the i-th learned weight, shape is [M, N]
-     - :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
-     - :math:`y^\mathrm{T}`: the transpose of :math:`y`.
-    Parameters:
-       input1_dim (int): The dimension of each first input.
-       input2_dim (int): The dimension of each second input.
-       output_dim (int): The dimension of output of this layer.
-       name (str, optional): The default value is None. Normally there is no need for user
-           to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
-       act (str, optional): Activation to be applied to the output of this layer. The default value is None.
-       param_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of
-           this layer. The default value is None.
-       bias_attr (ParamAttr, optional): The parameter attribute for the bias
-           of this layer. If it is set to False, no bias will be added to the output units.
-           If it is set to None, the bias is initialized zero. The default value is None.
-       dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
-    Attribute:
-        **weight** (Parameter): the learnable weights of this layer.
-        **bias** (Parameter): the learnable bias of this layer.
-    Returns:
-       Tensor: A 2-D Tensor of shape [batch_size, size].
-    Examples:
-       .. code-block:: python
-        import paddle
-        import numpy
-        layer1 = numpy.random.random((5, 5)).astype('float32')
-        layer2 = numpy.random.random((5, 4)).astype('float32')
-        bilinearTensorProduct = paddle.nn.BilinearTensorProduct(
-            input1_dim=5, input2_dim=4, output_dim=1000)
-        ret = bilinearTensorProduct(paddle.to_tensor(layer1),
-                                    paddle.to_tensor(layer2))
-    """
-    def __init__(
-        self,
-        input1_dim,
-        input2_dim,
-        output_dim,
-        name=None,
-        act=None,
-        param_attr=None,
-        bias_attr=None,
-        dtype='float32',
-    ):
-        super().__init__()
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self._name = name
-        self._input1_dim = input1_dim
-        self._input2_dim = input2_dim
-        self._output_dim = output_dim
-        self._inputs = dict()
-        self._dtype = dtype
-        param_shape = [self._output_dim, self._input1_dim, self._input2_dim]
-        self.weight = self.create_parameter(
-            attr=self._param_attr,
-            shape=param_shape,
-            dtype=self._dtype,
-            is_bias=False,
-        )
-        bias_size = [1, self._output_dim]
-        self.bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=bias_size,
-            dtype=self._dtype,
-            is_bias=True,
-        )
-    @deprecated(
-        since="2.0.0",
-        update_to="paddle.nn.Bilinear",
-        reason="New name and new args in Bilinear, easier to use.",
-    )
-    def forward(self, x, y):
-        check_variable_and_dtype(
-            x, 'x', ['float32', 'float64'], 'BilinearTensorProduct'
-        )
-        check_variable_and_dtype(
-            y, 'y', ['float32', 'float64'], 'BilinearTensorProduct'
-        )
-        self._inputs = {"X": x, "Y": y, "Weight": self.weight}
-        if self.bias is not None:
-            self._inputs["Bias"] = self.bias
-        if self._name is not None:
-            out = self._helper.create_variable(
-                name=".".join([self.full_name(), self._name]),
-                dtype=self._dtype,
-                persistable=False,
-            )
-        else:
-            out = self._helper.create_variable(
-                dtype=self._dtype, persistable=False
-            )
-        self._helper.append_op(
-            type="bilinear_tensor_product",
-            inputs=self._inputs,
-            outputs={"Out": out},
-        )
-        # add activation
-        return self._helper.append_activation(out, act=self._act)
-class Conv2DTranspose(layers.Layer):
-    r"""
-    This interface is used to construct a callable object of the ``Conv2DTranspose`` class.
-    For more details, refer to code examples.
-    The convolution2D transpose layer calculates the output based on the input,
-    filter, and dilations, strides, paddings. Input and output
-    are in NCHW format. Where N is batch size, C is the number of feature map,
-    H is the height of the feature map, and W is the width of the feature map.
-    Filter's shape is [MCHW] , where M is the number of input feature map,
-    C is the number of output feature map, H is the height of the filter,
-    and W is the width of the filter. If the groups is greater than 1,
-    C will equal the number of input feature map divided by the groups.
-    If bias attribution and activation type are provided, bias is added to
-    the output of the convolution, and the corresponding activation function
-    is applied to the final result.
-    The details of convolution transpose layer, please refer to the following explanation and references
-    `conv2dtranspose <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_ .
-    For each input :math:`X`, the equation is:
-    .. math::
-        Out = \sigma (W \\ast X + b)
-    Where:
-    * :math:`X`: Input value, a ``Tensor`` with NCHW format.
-    * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
-    * :math:`\\ast`: Convolution operation.
-    * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
-    * :math:`\\sigma`: Activation function.
-    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
-    Example:
-        - Input:
-          Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
-          Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
-        - Output:
-          Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
-        Where
-        .. math::
-           H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
-           W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
-           H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
-           W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
-    Parameters:
-        num_channels(int): The number of channels in the input image.
-        num_filters(int): The number of the filter. It is as same as the output
-            feature map.
-        filter_size(int or tuple): The filter size. If filter_size is a tuple,
-            it must contain two integers, (filter_size_H, filter_size_W).
-            Otherwise, the filter will be a square.
-        output_size(int or tuple, optional): The output image size. If output size is a
-            tuple, it must contain two integers, (image_H, image_W). None if use
-            filter_size, padding, and stride to calculate output_size.
-            if output_size and filter_size are specified at the same time, They
-            should follow the formula above. Default: None.
-        padding(int or tuple, optional): The padding size. If padding is a tuple, it must
-            contain two integers, (padding_H, padding_W). Otherwise, the
-            padding_H = padding_W = padding. Default: 0.
-        stride(int or tuple, optional): The stride size. If stride is a tuple, it must
-            contain two integers, (stride_H, stride_W). Otherwise, the
-            stride_H = stride_W = stride. Default: 1.
-        dilation(int or tuple, optional): The dilation size. If dilation is a tuple, it must
-            contain two integers, (dilation_H, dilation_W). Otherwise, the
-            dilation_H = dilation_W = dilation. Default: 1.
-        groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by
-            grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
-            when group=2, the first half of the filters is only connected to the
-            first half of the input channels, while the second half of the
-            filters is only connected to the second half of the input channels.
-            Default: 1.
-        param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
-            of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
-            will create ParamAttr as param_attr. If the Initializer of the param_attr
-            is not set, the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d_transpose.
-            If it is set to False, no bias will be added to the output units.
-            If it is set to None or one attribute of ParamAttr, conv2d_transpose
-            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized zero. Default: None.
-        use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
-            library is installed. Default: True.
-        act (str, optional): Activation type, if it is set to None, activation is not appended.
-            Default: None.
-        dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
-    Attribute:
-        **weight** (Parameter): the learnable weights of filters of this layer.
-        **bias** (Parameter or None): the learnable bias of this layer.
-    Returns:
-        None
-    Examples:
-       .. code-block:: python
-          import paddle.fluid as fluid
-          import numpy as np
-          with fluid.dygraph.guard():
-              data = np.random.random((3, 32, 32, 5)).astype('float32')
-              conv2DTranspose = fluid.dygraph.nn.Conv2DTranspose(
-                    num_channels=32, num_filters=2, filter_size=3)
-              ret = conv2DTranspose(fluid.dygraph.base.to_variable(data))
-    """
-    def __init__(
-        self,
-        num_channels,
-        num_filters,
-        filter_size,
-        output_size=None,
-        padding=0,
-        stride=1,
-        dilation=1,
-        groups=None,
-        param_attr=None,
-        bias_attr=None,
-        use_cudnn=True,
-        act=None,
-        dtype='float32',
-    ):
-        super().__init__()
-        assert (
-            param_attr is not False
-        ), "param_attr should not be False in conv2d_transpose."
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self._groups = groups
-        self._num_channels = num_channels
-        self._num_filters = num_filters
-        self._use_cudnn = use_cudnn
-        self._padding = padding
-        self._stride = stride
-        self._dilation = dilation
-        self._filter_size = filter_size
-        self._output_size = output_size
-        self._dtype = dtype
-        if (
-            self._num_channels == self._groups
-            and self._num_filters == self._num_channels
-            and not self._use_cudnn
-        ):
-            self._op_type = 'depthwise_conv2d_transpose'
-        else:
-            self._op_type = 'conv2d_transpose'
-        self._padding = utils.convert_to_list(self._padding, 2, 'padding')
-        self._stride = utils.convert_to_list(self._stride, 2, 'stride')
-        self._dilation = utils.convert_to_list(self._dilation, 2, 'dilation')
-        self._filter_size = utils.convert_to_list(
-            self._filter_size, 2, 'conv2d_transpose.filter_size'
-        )
-        if self._output_size is None:
-            self._output_size = []
-        elif isinstance(self._output_size, list):
-            if utils._contain_var(self._output_size):
-                self._output_size = utils._convert_to_tensor_list(
-                    self._output_size
-                )
-            else:
-                self._output_size = utils.convert_to_list(
-                    self._output_size, 2, 'output_size'
-                )
-        elif isinstance(self._output_size, int):
-            self._output_size = utils.convert_to_list(
-                self._output_size, 2, 'output_size'
-            )
-        elif isinstance(self._output_size, Variable):
-            check_dtype(
-                self._output_size.dtype,
-                'output_size',
-                ['int32', 'int64'],
-                'Conv2DTranspose',
-            )
-            if len(self._output_size.shape) == 1 and (
-                self._output_size.shape[0] == 1
-                or self._output_size.shape[0] == 2
-            ):
-                if self._output_size.shape[0] == 1:
-                    self._output_size = [self._output_size, self._output_size]
-            else:
-                raise ValueError(
-                    "output_size must contain one or two integers."
-                )
-        else:
-            raise ValueError("output_size should be list or int or Tensor")
-        self._padding = utils.convert_to_list(self._padding, 2, 'padding')
-        self._groups = 1 if self._groups is None else self._groups
-        filter_shape = [
-            self._num_channels,
-            self._num_filters // self._groups,
-        ] + self._filter_size
-        self.weight = self.create_parameter(
-            dtype=self._dtype, shape=filter_shape, attr=self._param_attr
-        )
-        self.bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=[self._num_filters],
-            dtype=self._dtype,
-            is_bias=True,
-        )
-    def forward(self, input):
-        if _non_static_mode():
-            op = getattr(_legacy_C_ops, self._op_type)
-            out = op(
-                input,
-                self.weight,
-                'output_size',
-                self._output_size,
-                'strides',
-                self._stride,
-                'paddings',
-                self._padding,
-                'dilations',
-                self._dilation,
-                'groups',
-                self._groups,
-                'use_cudnn',
-                self._use_cudnn,
-            )
-            pre_bias = out
-            pre_act = dygraph_utils._append_bias_in_dygraph(
-                pre_bias, self.bias, 1
-            )
-            return dygraph_utils._append_activation_in_dygraph(
-                pre_act, act=self._act
-            )
-        check_variable_and_dtype(
-            input, 'input', ['float16', 'float32', 'float64'], "Conv2DTranspose"
-        )
-        inputs = {'Input': [input], 'Filter': [self.weight]}
-        attrs = {
-            'output_size': self._output_size,
-            'strides': self._stride,
-            'paddings': self._padding,
-            'dilations': self._dilation,
-            'groups': self._groups,
-            'use_cudnn': self._use_cudnn,
-        }
-        pre_bias = self._helper.create_variable_for_type_inference(
-            dtype=input.dtype
-        )
-        self._helper.append_op(
-            type=self._op_type,
-            inputs=inputs,
-            outputs={'Output': pre_bias},
-            attrs=attrs,
-        )
-        if self.bias is not None:
-            pre_act = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype
-            )
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias], 'Y': [self.bias]},
-                outputs={'Out': [pre_act]},
-                attrs={'axis': 1},
-            )
-        else:
-            pre_act = pre_bias
-        out = self._helper.append_activation(pre_act, act=self._act)
-        return out
-class SequenceConv(layers.Layer):
-    """
-    This function creates the op for sequence_conv, using the inputs and
-    other convolutional configurations for the filters and stride as given
-    in the input parameters to the function.
-    Parameters:
-        name_scope(str): The name of this class.
-        num_filters (int): number of filters.
-        filter_size (int): the filter size (H and W). Default: 3.
-        filter_stride (int): stride of the filter. Default: 1.
-        padding (bool|None): if True, add paddings. Default: None
-        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv.
-            If it is set to False, no bias will be added to the output units.
-            If it is set to None or one attribute of ParamAttr, sequence_conv
-            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized zero. Default: None.
-        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
-            of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv
-            will create ParamAttr as param_attr. If the Initializer of the param_attr
-            is not set, the parameter is initialized with Xavier. Default: None.
-        act (str): Activation type, if it is set to None, activation is not appended.
-            Default: None.
-    Attributes:
-        weight (Parameter): the learnable weights of filters of this layer.
-        bias (Parameter|None): the learnable bias of this layer.
-    Returns:
-        Variable: output of sequence_conv
-    """
-    def __init__(
-        self,
-        name_scope,
-        num_filters,
-        filter_size=3,
-        filter_stride=1,
-        padding=None,
-        bias_attr=None,
-        param_attr=None,
-        act=None,
-    ):
-        assert (
-            not _non_static_mode()
-        ), "SequenceConv is not supported by dynamic graph mode yet!"
-        super().__init__(name_scope)
-        self._num_filters = num_filters
-        self._filter_size = filter_size
-        self._filter_stride = filter_stride
-        self._padding = padding
-        self._bias_attr = bias_attr
-        self._param_attr = param_attr
-        self._act = act
-    def _build_once(self, input):
-        self._dtype = self._helper.input_dtype(input)
-        filter_shape = [self._filter_size * input.shape[1], self._num_filters]
-        self.weight = self.create_parameter(
-            attr=self._param_attr, shape=filter_shape, dtype=self._dtype
-        )
-        self.bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=[self._num_filters],
-            dtype=self._dtype,
-            is_bias=True,
-        )
-    def forward(self, input):
-        pre_bias = self._helper.create_variable_for_type_inference(self._dtype)
-        self._helper.append_op(
-            type='sequence_conv',
-            inputs={
-                'X': [input],
-                'Filter': [self.weight],
-            },
-            outputs={"Out": pre_bias},
-            attrs={
-                'contextStride': self._filter_stride,
-                'contextStart': -int(self._filter_size // 2),
-                'contextLength': self._filter_size,
-            },
-        )
-        if self.bias is not None:
-            pre_act = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype
-            )
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias], 'Y': [self.bias]},
-                outputs={'Out': [pre_act]},
-                attrs={'axis': 1},
-            )
-        else:
-            pre_act = pre_bias
-        return self._helper.append_activation(pre_act, act=self._act)
 class RowConv(layers.Layer):
    """
    ***Row-convolution operator***

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
@@ -113,11 +113,11 @@ class TestDygraphBasicApi_ToVariable(unittest.TestCase):
 # 1. test Apis that inherit from layers.Layer
 def dyfunc_BilinearTensorProduct(layer1, layer2):
-    bilinearTensorProduct = fluid.dygraph.nn.BilinearTensorProduct(
+    bilinearTensorProduct = paddle.nn.Bilinear(
-        input1_dim=5,
+        5,
-        input2_dim=4,
+        4,
-        output_dim=1000,
+        1000,
-        param_attr=fluid.ParamAttr(
+        weight_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Constant(value=0.99)
        ),
        bias_attr=fluid.ParamAttr(
@@ -165,12 +165,11 @@ def dyfunc_Conv3D(input):
 def dyfunc_Conv2DTranspose(input):
-    conv2dTranspose = fluid.dygraph.nn.Conv2DTranspose(
+    conv2dTranspose = paddle.nn.Conv2DTranspose(
-        num_channels=3,
+        3,
-        num_filters=12,
+        12,
-        filter_size=12,
+        12,
-        use_cudnn=False,
+        weight_attr=fluid.ParamAttr(
-        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Constant(value=0.99)
        ),
        bias_attr=fluid.ParamAttr(
@@ -221,11 +220,12 @@ def dyfunc_Pool2D(input):
 def dyfunc_Prelu(input):
-    prelu0 = fluid.PRelu(
+    prelu0 = paddle.nn.PReLU(
-        mode='all',
+        weight_attr=fluid.ParamAttr(
-        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)),
+            initializer=fluid.initializer.Constant(1.0)
+        ),
    )
-    res = prelu0(input=input)
+    res = prelu0(input)
    return res

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -37,7 +37,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph.nn import BatchNorm, Conv2DTranspose
+from paddle.fluid.dygraph.nn import BatchNorm
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
@@ -430,14 +430,13 @@ class DeConv2D(fluid.dygraph.Layer):
                initializer=fluid.initializer.Constant(0.0)
            )
-        self._deconv = Conv2DTranspose(
+        self._deconv = paddle.nn.Conv2DTranspose(
            num_channels,
            num_filters,
-            filter_size=filter_size,
+            filter_size,
            stride=stride,
            padding=padding,
-            use_cudnn=use_cudnn,
+            weight_attr=fluid.ParamAttr(
-            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.NormalInitializer(
                    loc=0.0, scale=stddev
                )

--- a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
@@ -24,9 +24,7 @@ import paddle.fluid as fluid
 class TestDygraphBilinearTensorProductAPIError(unittest.TestCase):
    def test_errors(self):
        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            layer = fluid.dygraph.nn.BilinearTensorProduct(
+            layer = paddle.nn.Bilinear(5, 4, 1000)
-                input1_dim=5, input2_dim=4, output_dim=1000
-            )
            # the input must be Variable.
            x0 = fluid.create_lod_tensor(
                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()

--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -1084,86 +1084,5 @@ class TestTensorOutputSize4(TestTensorOutputSize1):
        return out
-class TestTensorOutputSize5(TestTensorOutputSize1):
-    def path_prefix(self):
-        return 'conv2d_transpose_tensor_output_size5'
-    def call_func(self, x):
-        w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
-        output_size = [17, paddle.assign([17])]
-        conv2d_trans = paddle.fluid.dygraph.Conv2DTranspose(
-            num_channels=3,
-            num_filters=6,
-            filter_size=3,
-            output_size=output_size,
-            stride=2,
-        )
-        out = conv2d_trans(x)
-        return out
-class TestTensorOutputSize6(TestTensorOutputSize1):
-    def path_prefix(self):
-        return 'conv2d_transpose_tensor_output_size6'
-    def var_prefix(self):
-        return "Var["
-    def call_func(self, x):
-        w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
-        output_size = paddle.assign([17, 17])
-        conv2d_trans = paddle.fluid.dygraph.Conv2DTranspose(
-            num_channels=3,
-            num_filters=6,
-            filter_size=3,
-            output_size=output_size,
-            stride=2,
-        )
-        out = conv2d_trans(x)
-        return out
-class TestTensorOutputSize7(TestTensorOutputSize1):
-    def path_prefix(self):
-        return 'conv2d_transpose_tensor_output_size7'
-    def var_prefix(self):
-        return ""
-    def call_func(self, x):
-        w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
-        output_size = 17
-        conv2d_trans = paddle.fluid.dygraph.Conv2DTranspose(
-            num_channels=3,
-            num_filters=6,
-            filter_size=3,
-            output_size=output_size,
-            stride=2,
-        )
-        out = conv2d_trans(x)
-        return out
-class TestTensorOutputSize8(TestTensorOutputSize1):
-    def path_prefix(self):
-        return 'conv2d_transpose_tensor_output_size8'
-    def var_prefix(self):
-        return ""
-    def call_func(self, x):
-        w_var = paddle.randn((3, 6, 3, 3), dtype='float32')
-        output_size = [17, 17]
-        conv2d_trans = paddle.fluid.dygraph.Conv2DTranspose(
-            num_channels=3,
-            num_filters=6,
-            filter_size=3,
-            output_size=output_size,
-            stride=2,
-        )
-        out = conv2d_trans(x)
-        return out
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import unittest
+import numpy as np
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 def infinite_reader():

--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -21,7 +21,7 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-from paddle.fluid.dygraph.nn import BatchNorm, Embedding, GroupNorm, PRelu
+from paddle.fluid.dygraph.nn import BatchNorm, Embedding, GroupNorm
 from paddle.nn import Linear
@@ -212,9 +212,6 @@ class TestDygraphLoadStatic(unittest.TestCase):
                    self.layer_norm_1 = paddle.nn.LayerNorm([10])
                    self.layer_norm_2 = paddle.nn.LayerNorm(10)
-                    self.prelu1 = PRelu("channel", channel=5)
-                    self.prelu2 = PRelu("channel", channel=5)
                    self.group_norm1 = GroupNorm(8, 4)
                    self.gourp_norm2 = GroupNorm(8, 4)

--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -185,10 +185,10 @@ class Deconv2DLayer(fluid.dygraph.Layer):
    ):
        super().__init__()
-        self._deconv = fluid.dygraph.Conv2DTranspose(
+        self._deconv = paddle.nn.Conv2DTranspose(
-            num_channels=num_channels,
+            num_channels,
-            num_filters=num_filters,
+            num_filters,
-            filter_size=filter_size,
+            filter_size,
            stride=stride,
            padding=padding,
            bias_attr=None if use_bias else False,

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -33,8 +33,6 @@ from paddle.fluid.framework import (
    default_main_program,
    program_guard,
 )
-from paddle.fluid.initializer import Constant
-from paddle.fluid.param_attr import ParamAttr
 from paddle.tensor import random
@@ -383,54 +381,6 @@ class TestLayer(LayerTest):
        np.testing.assert_allclose(n, min_eager_ret_value, rtol=1e-05)
        np.testing.assert_allclose(n2, max_eager_ret_value, rtol=1e-05)
-    def test_sequence_conv(self):
-        inp_np = np.arange(12).reshape([3, 4]).astype('float32')
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        with self.static_graph():
-            seq = layers.data(
-                name='seq_in',
-                shape=[3, 4],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
-            )
-            out = layers.sequence_conv(seq, 2, act='sigmoid')
-            static_rlt = self.get_static_graph_result(
-                feed={
-                    "seq_in": fluid.create_lod_tensor(
-                        data=inp_np, recursive_seq_lens=[[1, 1, 1]], place=place
-                    )
-                },
-                fetch_list=[out],
-                with_lod=True,
-            )[0]
-        with self.static_graph():
-            seq = layers.data(
-                name='seq_in',
-                shape=[3, 4],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
-            )
-            seq_conv = nn.SequenceConv('seq_conv', num_filters=2, act='sigmoid')
-            out = seq_conv(seq)
-            static_rlt2 = self.get_static_graph_result(
-                feed={
-                    "seq_in": fluid.create_lod_tensor(
-                        data=inp_np, recursive_seq_lens=[[1, 1, 1]], place=place
-                    )
-                },
-                fetch_list=[out],
-                with_lod=True,
-            )[0]
-        np.testing.assert_array_equal(
-            np.array(static_rlt), np.array(static_rlt2)
-        )
    def test_conv2d_transpose(self):
        inp_np = np.arange(0, 24).reshape([2, 3, 2, 2]).astype('float32')
        with self.static_graph():
@@ -447,37 +397,37 @@ class TestLayer(LayerTest):
            )[0]
        with self.static_graph():
            img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
-            conv2d_transpose = nn.Conv2DTranspose(
+            conv2d_transpose = paddle.nn.Conv2DTranspose(
-                num_channels=3,
+                3,
-                num_filters=10,
+                10,
-                filter_size=27,
+                27,
-                act='sigmoid',
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
            )
            out = conv2d_transpose(img)
+            out = paddle.nn.functional.sigmoid(out)
            static_rlt2 = self.get_static_graph_result(
                feed={'pixel': inp_np}, fetch_list=[out]
            )[0]
        with self.dynamic_graph():
            with _test_eager_guard():
-                conv2d_transpose = nn.Conv2DTranspose(
+                conv2d_transpose = paddle.nn.Conv2DTranspose(
-                    num_channels=3,
+                    3,
-                    num_filters=10,
+                    10,
-                    filter_size=27,
+                    27,
-                    act='sigmoid',
                    bias_attr=fluid.initializer.ConstantInitializer(value=1),
                )
                dy_eager_rlt = conv2d_transpose(base.to_variable(inp_np))
+                dy_eager_rlt = paddle.nn.functional.sigmoid(dy_eager_rlt)
                dy_eager_rlt_value = dy_eager_rlt.numpy()
-            conv2d_transpose = nn.Conv2DTranspose(
+            conv2d_transpose = paddle.nn.Conv2DTranspose(
-                num_channels=3,
+                3,
-                num_filters=10,
+                10,
-                filter_size=27,
+                27,
-                act='sigmoid',
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
            )
            dy_rlt = conv2d_transpose(base.to_variable(inp_np))
+            dy_rlt = paddle.nn.functional.sigmoid(dy_rlt)
            dy_rlt_value = dy_rlt.numpy()
        np.testing.assert_allclose(static_rlt2, static_rlt, rtol=1e-05)
        np.testing.assert_allclose(dy_rlt_value, static_rlt2, rtol=1e-05)
@@ -492,14 +442,12 @@ class TestLayer(LayerTest):
                        custom_weight
                    )
                )
-                conv2d1 = nn.Conv2DTranspose(
+                conv2d1 = paddle.nn.Conv2DTranspose(3, 3, [2, 2])
-                    num_channels=3, num_filters=3, filter_size=[2, 2]
+                conv2d2 = paddle.nn.Conv2DTranspose(
-                )
+                    3,
-                conv2d2 = nn.Conv2DTranspose(
+                    3,
-                    num_channels=3,
+                    [2, 2],
-                    num_filters=3,
+                    weight_attr=weight_attr,
-                    filter_size=[2, 2],
-                    param_attr=weight_attr,
                )
                dy_ret1 = conv2d1(base.to_variable(images))
                dy_ret2 = conv2d2(base.to_variable(images))
@@ -537,14 +485,12 @@ class TestLayer(LayerTest):
                    custom_weight
                )
            )
-            conv2d1 = nn.Conv2DTranspose(
+            conv2d1 = paddle.nn.Conv2DTranspose(3, 3, [2, 2])
-                num_channels=3, num_filters=3, filter_size=[2, 2]
+            conv2d2 = paddle.nn.Conv2DTranspose(
-            )
+                3,
-            conv2d2 = nn.Conv2DTranspose(
+                3,
-                num_channels=3,
+                [2, 2],
-                num_filters=3,
+                weight_attr=weight_attr,
-                filter_size=[2, 2],
-                param_attr=weight_attr,
            )
            dy_ret1 = conv2d1(base.to_variable(images))
            dy_ret2 = conv2d2(base.to_variable(images))
@@ -578,9 +524,7 @@ class TestLayer(LayerTest):
            # the input of Conv2DTranspose must be Variable.
            def test_Variable():
                images = np.ones([2, 3, 5, 5], dtype='float32')
-                conv2d = nn.Conv2DTranspose(
+                conv2d = paddle.nn.Conv2DTranspose(3, 3, [2, 2])
-                    num_channels=3, num_filters=3, filter_size=[2, 2]
-                )
                conv2d_ret1 = conv2d(images)
            self.assertRaises(TypeError, test_Variable)
@@ -591,9 +535,7 @@ class TestLayer(LayerTest):
                images = layers.data(
                    name='pixel', shape=[3, 5, 5], dtype='int32'
                )
-                conv2d = nn.Conv2DTranspose(
+                conv2d = paddle.nn.Conv2DTranspose(3, 3, [2, 2])
-                    num_channels=3, num_filters=3, filter_size=[2, 2]
-                )
                conv2d_ret2 = conv2d(images)
            self.assertRaises(TypeError, test_type)
@@ -628,53 +570,55 @@ class TestLayer(LayerTest):
            data_y = layers.data(
                name='y', shape=[1, 3], dtype="float32", append_batch_size=False
            )
-            btp = nn.BilinearTensorProduct(
+            btp = paddle.nn.Bilinear(
                3,
                3,
                6,
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
            )
            out = btp(data_x, data_y)
+            out = paddle.nn.functional.sigmoid(out)
            static_rlt2 = self.get_static_graph_result(
                feed={'x': inp_np_x, 'y': inp_np_y}, fetch_list=[out]
            )[0]
        with self.dynamic_graph():
            with _test_eager_guard():
-                btp = nn.BilinearTensorProduct(
+                btp = paddle.nn.Bilinear(
                    3,
                    3,
                    6,
                    bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                    act='sigmoid',
                )
                dy_eager_rlt = btp(
                    base.to_variable(inp_np_x), base.to_variable(inp_np_y)
                )
+                dy_eager_rlt = paddle.nn.functional.sigmoid(dy_eager_rlt)
                dy_eager_rlt_value = dy_eager_rlt.numpy()
-            btp = nn.BilinearTensorProduct(
+            btp = paddle.nn.Bilinear(
                3,
                3,
                6,
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
            )
            dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
+            dy_rlt = paddle.nn.functional.sigmoid(dy_rlt)
            dy_rlt_value = dy_rlt.numpy()
        with self.dynamic_graph():
            with _test_eager_guard():
-                btp2 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
+                btp2 = paddle.nn.Bilinear(3, 3, 6)
                dy_eager_rlt2 = btp2(
                    base.to_variable(inp_np_x), base.to_variable(inp_np_y)
                )
+                dy_eager_rlt2 = paddle.nn.functional.sigmoid(dy_eager_rlt2)
                dy_eager_rlt2_value = dy_eager_rlt2.numpy()
-            btp2 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
+            btp2 = paddle.nn.Bilinear(3, 3, 6)
            dy_rlt2 = btp2(
                base.to_variable(inp_np_x), base.to_variable(inp_np_y)
            )
+            dy_rlt2 = paddle.nn.functional.sigmoid(dy_rlt2)
            dy_rlt2_value = dy_rlt2.numpy()
        with self.static_graph():
@@ -706,16 +650,16 @@ class TestLayer(LayerTest):
                        custom_weight
                    )
                )
-                btp1 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
+                btp1 = paddle.nn.Bilinear(3, 3, 6)
-                btp2 = nn.BilinearTensorProduct(
+                btp2 = paddle.nn.Bilinear(3, 3, 6, weight_attr=weight_attr)
-                    3, 3, 6, act='sigmoid', param_attr=weight_attr
-                )
                dy_rlt1 = btp1(
                    base.to_variable(inp_np_x), base.to_variable(inp_np_y)
                )
+                dy_rlt1 = paddle.nn.functional.sigmoid(dy_rlt1)
                dy_rlt2 = btp2(
                    base.to_variable(inp_np_x), base.to_variable(inp_np_y)
                )
+                dy_rlt2 = paddle.nn.functional.sigmoid(dy_rlt2)
                self.assertFalse(
                    np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())
                )
@@ -744,16 +688,16 @@ class TestLayer(LayerTest):
                    custom_weight
                )
            )
-            btp1 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
+            btp1 = paddle.nn.Bilinear(3, 3, 6)
-            btp2 = nn.BilinearTensorProduct(
+            btp2 = paddle.nn.Bilinear(3, 3, 6, weight_attr=weight_attr)
-                3, 3, 6, act='sigmoid', param_attr=weight_attr
-            )
            dy_rlt1 = btp1(
                base.to_variable(inp_np_x), base.to_variable(inp_np_y)
            )
+            dy_rlt1 = paddle.nn.functional.sigmoid(dy_rlt1)
            dy_rlt2 = btp2(
                base.to_variable(inp_np_x), base.to_variable(inp_np_y)
            )
+            dy_rlt2 = paddle.nn.functional.sigmoid(dy_rlt2)
            self.assertFalse(np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy()))
            btp2.weight.set_value(btp1.weight.numpy())
            btp2.bias.set_value(btp1.bias)
@@ -772,133 +716,6 @@ class TestLayer(LayerTest):
            )
            np.testing.assert_array_equal(btp1.bias.numpy(), btp2.bias.numpy())
-    def prelu_test(self, mode):
-        inp_np = np.ones([5, 200, 100, 100]).astype('float32')
-        with self.static_graph():
-            data_t = layers.data(
-                name="input",
-                shape=[5, 200, 100, 100],
-                dtype="float32",
-                append_batch_size=False,
-            )
-            out = paddle.static.nn.prelu(
-                data_t, mode, param_attr=ParamAttr(initializer=Constant(1.0))
-            )
-            static_rlt = self.get_static_graph_result(
-                feed={"input": inp_np}, fetch_list=[out]
-            )[0]
-        with self.static_graph():
-            data_t = layers.data(
-                name="input",
-                shape=[5, 200, 100, 100],
-                dtype="float32",
-                append_batch_size=False,
-            )
-            prelu = nn.PRelu(
-                mode=mode,
-                channel=inp_np.shape[1],
-                input_shape=data_t.shape,
-                param_attr=ParamAttr(initializer=Constant(1.0)),
-            )
-            out = prelu(data_t)
-            static_rlt2 = self.get_static_graph_result(
-                feed={"input": inp_np}, fetch_list=[out]
-            )[0]
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                prelu = nn.PRelu(
-                    mode=mode,
-                    channel=inp_np.shape[1],
-                    input_shape=inp_np.shape,
-                    param_attr=ParamAttr(initializer=Constant(1.0)),
-                )
-                dy_eager_rlt = prelu(base.to_variable(inp_np))
-                dy_eager_rlt_value = dy_eager_rlt.numpy()
-            prelu = nn.PRelu(
-                mode=mode,
-                channel=inp_np.shape[1],
-                input_shape=inp_np.shape,
-                param_attr=ParamAttr(initializer=Constant(1.0)),
-            )
-            dy_rlt = prelu(base.to_variable(inp_np))
-            dy_rlt_value = dy_rlt.numpy()
-        np.testing.assert_allclose(static_rlt2, static_rlt, rtol=1e-05)
-        np.testing.assert_allclose(dy_rlt_value, static_rlt, rtol=1e-05)
-        np.testing.assert_allclose(dy_eager_rlt_value, static_rlt, rtol=1e-05)
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                inp_np = np.random.randn(5, 200, 100, 100).astype("float32")
-                inp = base.to_variable(inp_np)
-                prelu1 = nn.PRelu(
-                    mode=mode,
-                    channel=inp_np.shape[1],
-                    input_shape=inp_np.shape,
-                    param_attr=ParamAttr(initializer=Constant(2.0)),
-                )
-                prelu2 = nn.PRelu(
-                    mode=mode,
-                    channel=inp_np.shape[1],
-                    input_shape=inp_np.shape,
-                    param_attr=ParamAttr(initializer=Constant(1.0)),
-                )
-                dy_rlt1 = prelu1(inp)
-                dy_rlt2 = prelu2(inp)
-                self.assertFalse(
-                    np.array_equal(prelu1.weight.numpy(), prelu2.weight.numpy())
-                )
-                self.assertFalse(
-                    np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())
-                )
-                prelu2.weight.set_value(prelu1.weight.numpy())
-                dy_rlt1 = prelu1(inp)
-                dy_rlt2 = prelu2(inp)
-                np.testing.assert_array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())
-                prelu2.weight = prelu1.weight
-                np.testing.assert_array_equal(
-                    prelu1.weight.numpy(), prelu2.weight.numpy()
-                )
-            inp_np = np.random.randn(5, 200, 100, 100).astype("float32")
-            inp = base.to_variable(inp_np)
-            prelu1 = nn.PRelu(
-                mode=mode,
-                channel=inp_np.shape[1],
-                input_shape=inp_np.shape,
-                param_attr=ParamAttr(initializer=Constant(2.0)),
-            )
-            prelu2 = nn.PRelu(
-                mode=mode,
-                channel=inp_np.shape[1],
-                input_shape=inp_np.shape,
-                param_attr=ParamAttr(initializer=Constant(1.0)),
-            )
-            dy_rlt1 = prelu1(inp)
-            dy_rlt2 = prelu2(inp)
-            self.assertFalse(
-                np.array_equal(prelu1.weight.numpy(), prelu2.weight.numpy())
-            )
-            self.assertFalse(np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy()))
-            prelu2.weight.set_value(prelu1.weight.numpy())
-            dy_rlt1 = prelu1(inp)
-            dy_rlt2 = prelu2(inp)
-            np.testing.assert_array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())
-            prelu2.weight = prelu1.weight
-            np.testing.assert_array_equal(
-                prelu1.weight.numpy(), prelu2.weight.numpy()
-            )
-    def test_prelu(self):
-        self.prelu_test("channel")
-        self.prelu_test("element")
-        self.prelu_test("all")
    def test_embeding(self):
        inp_word = np.array([[[1]]]).astype('int64')
        dict_size = 20
@@ -1207,56 +1024,6 @@ class TestLayer(LayerTest):
                conv3d1.bias.numpy(), conv3d2.bias.numpy()
            )
-    def test_row_conv(self):
-        input = np.arange(15).reshape([3, 5]).astype('float32')
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        with self.static_graph():
-            x = layers.data(
-                name='X',
-                shape=[3, 5],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
-            )
-            ret = layers.row_conv(input=x, future_context_size=2)
-            static_ret = self.get_static_graph_result(
-                feed={
-                    'X': fluid.create_lod_tensor(
-                        data=input, recursive_seq_lens=[[1, 1, 1]], place=place
-                    )
-                },
-                fetch_list=[ret],
-                with_lod=True,
-            )[0]
-        with self.static_graph():
-            x = layers.data(
-                name='X',
-                shape=[3, 5],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
-            )
-            rowConv = nn.RowConv('RowConv', future_context_size=2)
-            ret = rowConv(x)
-            static_ret2 = self.get_static_graph_result(
-                feed={
-                    'X': fluid.create_lod_tensor(
-                        data=input, recursive_seq_lens=[[1, 1, 1]], place=place
-                    )
-                },
-                fetch_list=[ret],
-                with_lod=True,
-            )[0]
-        # TODO: dygraph can't support LODTensor
-        np.testing.assert_allclose(static_ret, static_ret2, rtol=1e-05)
    def func_group_norm(self):
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)

--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
@@ -16,7 +16,6 @@ import sys
 import time
 import unittest
-import paddle
 import numpy as np
 from test_multiprocess_dataloader_static import (
    BATCH_SIZE,

--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -19,9 +19,6 @@ import numpy as np
 sys.path.append("..")
-import paddle
-import paddle.nn.functional as F
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
@@ -31,6 +28,7 @@ from xpu.get_test_cover_info import (
 )
 import paddle
+import paddle.nn.functional as F
 paddle.enable_static()

--- a/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py
@@ -12,18 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import sys
 import unittest
+import numpy as np
+import paddle
+import paddle.fluid as fluid
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
    create_test_class,
    get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 paddle.enable_static()

--- a/python/paddle/static/nn/metric.py
+++ b/python/paddle/static/nn/metric.py
@@ -15,16 +15,12 @@
 All layers just related to metric.
 """
-from paddle.fluid.layer_helper import LayerHelper
+from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import Variable, _non_static_mode, _varbase_creator
 from paddle.fluid.initializer import Constant
-from paddle.fluid.framework import (
+from paddle.fluid.layer_helper import LayerHelper
-    Variable,
-    _non_static_mode,
-    _varbase_creator,
-)
 from paddle.fluid.layers import tensor
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle import _legacy_C_ops
 __all__ = ['accuracy', 'auc']