Split PR, test=develop

ec613d90 · zhhsplendid · f4f5f3f2 · f4f5f3f2 · f4f5f3f2 · f4f5f3f2
3 changed file
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from functools import reduce
-import paddle
-
-
-class EmbeddingLayer(object):
-    """
-    Embedding Layer class
-    """
-
-    def __init__(self, dict_size, emb_dim, name="emb", padding_idx=None):
-        """
-        initialize
-        """
-        self.dict_size = dict_size
-        self.emb_dim = emb_dim
-        self.name = name
-        self.padding_idx = padding_idx
-
-    def ops(self):
-        """
-        operation
-        """
-        # TODO(huihuangzheng): The original code set the is_sparse=True, but it
-        # causes crush in dy2stat. Set it to True after fixing it.
-        emb = paddle.fluid.dygraph.Embedding(
-            size=[self.dict_size, self.emb_dim],
-            is_sparse=True,
-            padding_idx=self.padding_idx,
-            param_attr=paddle.ParamAttr(
-                name=self.name, initializer=paddle.nn.initializer.Xavier()))
-
-        return emb
-
-
-class FCLayer(object):
-    """
-    Fully Connect Layer class
-    """
-
-    def __init__(self, fc_dim, act, name="fc"):
-        """
-        initialize
-        """
-        self.fc_dim = fc_dim
-        self.act = act
-        self.name = name
-
-    def ops(self):
-        """
-        operation
-        """
-        fc = FC(size=self.fc_dim,
-                param_attr=paddle.ParamAttr(name="%s.w" % self.name),
-                bias_attr=paddle.ParamAttr(name="%s.b" % self.name),
-                act=self.act)
-        return fc
-
-
-class ConcatLayer(object):
-    """
-    Connection Layer class
-    """
-
-    def __init__(self, axis):
-        """
-        initialize
-        """
-        self.axis = axis
-
-    def ops(self, inputs):
-        """
-        operation
-        """
-        concat = paddle.concat(x=inputs, axis=self.axis)
-        return concat
-
-
-class ReduceMeanLayer(object):
-    """
-    Reduce Mean Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, input):
-        """
-        operation
-        """
-        mean = paddle.reduce_mean(input)
-        return mean
-
-
-class CosSimLayer(object):
-    """
-    Cos Similarly Calculate Layer
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, x, y):
-        """
-        operation
-        """
-        sim = paddle.metric.cos_sim(x, y)
-        return sim
-
-
-class ElementwiseMaxLayer(object):
-    """
-    Elementwise Max Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, x, y):
-        """
-        operation
-        """
-        max = paddle.maximum(x=x, y=y)
-        return max
-
-
-class ElementwiseAddLayer(object):
-    """
-    Elementwise Add Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, x, y):
-        """
-        operation
-        """
-        add = paddle.add(x=x, y=y)
-        return add
-
-
-class ElementwiseSubLayer(object):
-    """
-    Elementwise Add Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, x, y):
-        """
-        operation
-        """
-        sub = paddle.fluid.layers.elementwise_sub(x, y)
-        return sub
-
-
-class ConstantLayer(object):
-    """
-    Generate A Constant Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, input, shape, dtype, value):
-        """
-        operation
-        """
-        shape = list(shape)
-        input_shape = paddle.shape(input)
-        shape[0] = input_shape[0]
-        constant = paddle.fill_constant(shape, dtype, value)
-        return constant
-
-
-class SoftsignLayer(object):
-    """
-    Softsign Layer class
-    """
-
-    def __init__(self):
-        """
-        initialize
-        """
-        pass
-
-    def ops(self, input):
-        """
-        operation
-        """
-        softsign = paddle.nn.functional.softsign(input)
-        return softsign
-
-
-class FC(paddle.nn.Layer):
-    """
-    This interface is used to construct a callable object of the ``FC`` class.
-    For more details, refer to code examples.
-    It creates a fully connected layer in the network. It can take
-    one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
-    which represents a fully connected weight matrix from each input unit to
-    each output unit. The fully connected layer multiplies each input tensor
-    with its corresponding weight to produce an output Tensor with shape [N, `size`],
-    where N is batch size. If multiple input tensors are given, the results of
-    multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
-    is not None, a bias variable will be created and added to the output.
-    Finally, if ``act`` is not None, it will be applied to the output as well.
-    When the input is single ``Tensor`` :
-    .. math::
-        Out = Act({XW + b})
-    When the input are multiple ``Tensor`` :
-    .. math::
-        Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
-    In the above equation:
-    * :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
-    * :math:`X_i`: The i-th input ``Tensor`` .
-    * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
-    * :math:`b`: The bias parameter created by this layer (if needed).
-    * :math:`Act`: The activation function.
-    * :math:`Out`: The output ``Tensor`` .
-    See below for an example.
-    .. code-block:: text
-        Given:
-            data_1.data = [[[0.1, 0.2]]]
-            data_1.shape = (1, 1, 2) # 1 is batch_size
-            data_2.data = [[[0.1, 0.2, 0.3]]]
-            data_2.shape = (1, 1, 3) # 1 is batch_size
-            fc = FC("fc", 2, num_flatten_dims=2)
-            out = fc(input=[data_1, data_2])
-        Then:
-            out.data = [[[0.182996 -0.474117]]]
-            out.shape = (1, 1, 2)
-    Parameters:
-        
-        size(int): The number of output units in this layer.
-        num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
-            two dimensions. If this happens, the multi-dimension tensor will first be flattened
-            into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
-            tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
-            dimensions will be flatten to form the first dimension of the final matrix (height of
-            the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
-            form the second dimension of the final matrix (width of the matrix). For example, suppose
-            `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
-            Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
-        param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
-            weights(Parameter) of this layer. Default: None.
-        bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
-            of this layer. If it is set to False, no bias will be added to the output units.
-            If it is set to None, the bias is initialized zero. Default: None.
-        act (str, optional): Activation to be applied to the output of this layer. Default: None.
-        is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
-        dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
-    Attribute:
-        **weight** (list of Parameter): the learnable weights of this layer.
-        **bias** (Parameter or None): the learnable bias of this layer.
-    Returns:
-        None
-    
-    Examples:
-        .. code-block:: python
-          from paddle.fluid.dygraph.base import to_variable
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph import FC
-          import numpy as np
-          data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
-          with fluid.dygraph.guard():
-              fc = FC("fc", 64, num_flatten_dims=2)
-              data = to_variable(data)
-              conv = fc(data)
-    """
-
-    def __init__(self,
-                 size,
-                 num_flatten_dims=1,
-                 param_attr=None,
-                 bias_attr=None,
-                 act=None,
-                 is_test=False,
-                 dtype="float32"):
-        super(FC, self).__init__(dtype)
-
-        self._size = size
-        self._num_flatten_dims = num_flatten_dims
-        self._dtype = dtype
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self.__w = list()
-
-    def _build_once(self, input):
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            input_shape = inp.shape
-
-            param_shape = [
-                reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
-                       1)
-            ] + [self._size]
-            self.__w.append(
-                self.add_parameter(
-                    '_w%d' % i,
-                    self.create_parameter(
-                        attr=param,
-                        shape=param_shape,
-                        dtype=self._dtype,
-                        is_bias=False)))
-            i += 1
-
-        size = list([self._size])
-        self._b = self.create_parameter(
-            attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
-
-    # TODO(songyouwei): We should remove _w property
-    @property
-    def _w(self, i=0):
-        return self.__w[i]
-
-    @_w.setter
-    def _w(self, value, i=0):
-        assert isinstance(self.__w[i], Variable)
-        self.__w[i].set_value(value)
-
-    @property
-    def weight(self):
-        if len(self.__w) > 1:
-            return self.__w
-        else:
-            return self.__w[0]
-
-    @weight.setter
-    def weight(self, value):
-        if len(self.__w) == 1:
-            self.__w[0] = value
-
-    @property
-    def bias(self):
-        return self._b
-
-    @bias.setter
-    def bias(self, value):
-        self._b = value
-
-    def forward(self, input):
-        mul_results = list()
-        i = 0
-        for inp, param in self._helper.iter_inputs_and_params(input,
-                                                              self._param_attr):
-            tmp = self._helper.create_variable_for_type_inference(self._dtype)
-            self._helper.append_op(
-                type="mul",
-                inputs={"X": inp,
-                        "Y": self.__w[i]},
-                outputs={"Out": tmp},
-                attrs={
-                    "x_num_col_dims": self._num_flatten_dims,
-                    "y_num_col_dims": 1
-                })
-            i += 1
-            mul_results.append(tmp)
-
-        if len(mul_results) == 1:
-            pre_bias = mul_results[0]
-        else:
-            pre_bias = self._helper.create_variable_for_type_inference(
-                self._dtype)
-            self._helper.append_op(
-                type="sum",
-                inputs={"X": mul_results},
-                outputs={"Out": pre_bias},
-                attrs={"use_mkldnn": False})
-
-        if self._b is not None:
-            pre_activation = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype)
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias],
-                        'Y': [self._b]},
-                outputs={'Out': [pre_activation]},
-                attrs={'axis': self._num_flatten_dims})
-        else:
-            pre_activation = pre_bias
-        # Currently, we don't support inplace in dygraph mode
-        return self._helper.append_activation(pre_activation, act=self._act)
-
-
-class HingeLoss(object):
-    """
-    Hing Loss Calculate class
-    """
-
-    def __init__(self, conf_dict):
-        """
-        initialize
-        """
-        self.margin = conf_dict["loss"]["margin"]
-
-    def compute(self, pos, neg):
-        """
-        compute loss
-        """
-        elementwise_max = ElementwiseMaxLayer()
-        elementwise_add = ElementwiseAddLayer()
-        elementwise_sub = ElementwiseSubLayer()
-        constant = ConstantLayer()
-        reduce_mean = ReduceMeanLayer()
-        loss = reduce_mean.ops(
-            elementwise_max.ops(
-                constant.ops(neg, neg.shape, "float32", 0.0),
-                elementwise_add.ops(
-                    elementwise_sub.ops(neg, pos),
-                    constant.ops(neg, neg.shape, "float32", self.margin))))
-        return loss
-
-
-class BOW(paddle.nn.Layer):
-    """
-    BOW
-    """
-
-    def __init__(self, conf_dict):
-        """
-        initialize
-        """
-        super(BOW, self).__init__()
-        self.dict_size = conf_dict["dict_size"]
-        self.task_mode = conf_dict["task_mode"]
-        self.emb_dim = conf_dict["net"]["emb_dim"]
-        self.bow_dim = conf_dict["net"]["bow_dim"]
-        self.seq_len = conf_dict["seq_len"]
-        self.emb_layer = EmbeddingLayer(self.dict_size, self.emb_dim,
-                                        "emb").ops()
-        self.bow_layer = paddle.nn.Linear(
-            in_features=self.bow_dim, out_features=self.bow_dim)
-        self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops()
-        self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops()
-
-    @paddle.fluid.dygraph.declarative
-    def forward(self, left, right):
-        """
-        Forward network
-        """
-
-        # embedding layer
-        left_emb = self.emb_layer(left)
-        right_emb = self.emb_layer(right)
-        left_emb = paddle.fluid.layers.reshape(
-            left_emb, shape=[-1, self.seq_len, self.bow_dim])
-        right_emb = paddle.fluid.layers.reshape(
-            right_emb, shape=[-1, self.seq_len, self.bow_dim])
-
-        bow_left = paddle.reduce_sum(left_emb, dim=1)
-        bow_right = paddle.reduce_sum(right_emb, dim=1)
-        softsign_layer = SoftsignLayer()
-        left_soft = softsign_layer.ops(bow_left)
-        right_soft = softsign_layer.ops(bow_right)
-
-        # matching layer
-        if self.task_mode == "pairwise":
-            left_bow = self.bow_layer(left_soft)
-            right_bow = self.bow_layer(right_soft)
-            cos_sim_layer = CosSimLayer()
-            pred = cos_sim_layer.ops(left_bow, right_bow)
-            return left_bow, pred
-        else:
-            concat_layer = ConcatLayer(1)
-            concat = concat_layer.ops([left_soft, right_soft])
-            concat_fc = self.bow_layer_po(concat)
-            pred = self.softmax_layer(concat_fc)
-            return left_soft, pred
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import, division, print_function
-
-import logging
-import time
-import unittest
-
-import numpy as np
-import paddle
-
-PRINT_STEP = 20
-SEED = 2020
-
-program_translator = paddle.fluid.dygraph.dygraph_to_static.ProgramTranslator()
-
-
-class SimpleLSTMRNN(paddle.fluid.Layer):
-    def __init__(self,
-                 hidden_size,
-                 num_steps,
-                 num_layers=2,
-                 init_scale=0.1,
-                 dropout=None):
-        super(SimpleLSTMRNN, self).__init__()
-        self._hidden_size = hidden_size
-        self._num_layers = num_layers
-        self._init_scale = init_scale
-        self._dropout = dropout
-        self._num_steps = num_steps
-        self.cell_array = []
-        self.hidden_array = []
-
-        self.weight_1_arr = []
-        self.weight_2_arr = []
-        self.bias_arr = []
-        self.mask_array = []
-
-        for i in range(self._num_layers):
-            weight_1 = self.create_parameter(
-                attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
-                    low=-self._init_scale, high=self._init_scale)),
-                shape=[self._hidden_size * 2, self._hidden_size * 4],
-                dtype="float32",
-                default_initializer=paddle.nn.initializer.Uniform(
-                    low=-self._init_scale, high=self._init_scale))
-            self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
-            bias_1 = self.create_parameter(
-                attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
-                    low=-self._init_scale, high=self._init_scale)),
-                shape=[self._hidden_size * 4],
-                dtype="float32",
-                default_initializer=paddle.nn.initializer.Constant(0.0))
-            self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
-
-    def forward(self, input_embedding, init_hidden=None, init_cell=None):
-        cell_array = []
-        hidden_array = []
-
-        for i in range(self._num_layers):
-            hidden_array.append(init_hidden[i])
-            cell_array.append(init_cell[i])
-
-        res = []
-        for index in range(self._num_steps):
-            step_input = input_embedding[:, index, :]
-            for k in range(self._num_layers):
-                pre_hidden = hidden_array[k]
-                pre_cell = cell_array[k]
-                weight_1 = self.weight_1_arr[k]
-                bias = self.bias_arr[k]
-
-                nn = paddle.concat(x=[step_input, pre_hidden], axis=1)
-                gate_input = paddle.matmul(x=nn, y=weight_1)
-
-                gate_input = paddle.add(x=gate_input, y=bias)
-                i, j, f, o = paddle.split(
-                    x=gate_input, num_or_sections=4, axis=-1)
-                c = pre_cell * paddle.nn.functional.sigmoid(
-                    f) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j)
-                m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o)
-                hidden_array[k] = m
-                cell_array[k] = c
-                step_input = m
-
-                if self._dropout is not None and self._dropout > 0.0:
-                    step_input = paddle.fluid.layers.dropout(
-                        step_input,
-                        dropout_prob=self._dropout,
-                        dropout_implementation='upscale_in_train')
-            res.append(step_input)
-        real_res = paddle.concat(x=res, axis=1)
-        real_res = paddle.fluid.layers.reshape(
-            real_res, [-1, self._num_steps, self._hidden_size])
-        last_hidden = paddle.concat(x=hidden_array, axis=1)
-        last_hidden = paddle.fluid.layers.reshape(
-            last_hidden, shape=[-1, self._num_layers, self._hidden_size])
-        last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
-        last_cell = paddle.concat(x=cell_array, axis=1)
-        last_cell = paddle.fluid.layers.reshape(
-            last_cell, shape=[-1, self._num_layers, self._hidden_size])
-        last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
-        return real_res, last_hidden, last_cell
-
-
-class PtbModel(paddle.fluid.Layer):
-    def __init__(self,
-                 hidden_size,
-                 vocab_size,
-                 num_layers=2,
-                 num_steps=20,
-                 init_scale=0.1,
-                 dropout=None):
-        super(PtbModel, self).__init__()
-        self.hidden_size = hidden_size
-        self.vocab_size = vocab_size
-        self.init_scale = init_scale
-        self.num_layers = num_layers
-        self.num_steps = num_steps
-        self.dropout = dropout
-        self.simple_lstm_rnn = SimpleLSTMRNN(
-            hidden_size,
-            num_steps,
-            num_layers=num_layers,
-            init_scale=init_scale,
-            dropout=dropout)
-        self.embedding = paddle.fluid.dygraph.nn.Embedding(
-            size=[vocab_size, hidden_size],
-            dtype='float32',
-            is_sparse=False,
-            param_attr=paddle.ParamAttr(
-                name='embedding_para',
-                initializer=paddle.nn.initializer.Uniform(
-                    low=-init_scale, high=init_scale)))
-        self.softmax_weight = self.create_parameter(
-            attr=paddle.ParamAttr(),
-            shape=[self.hidden_size, self.vocab_size],
-            dtype="float32",
-            default_initializer=paddle.nn.initializer.Uniform(
-                low=-self.init_scale, high=self.init_scale))
-        self.softmax_bias = self.create_parameter(
-            attr=paddle.ParamAttr(),
-            shape=[self.vocab_size],
-            dtype="float32",
-            default_initializer=paddle.nn.initializer.Uniform(
-                low=-self.init_scale, high=self.init_scale))
-
-    def build_once(self, input, label, init_hidden, init_cell):
-        pass
-
-    @paddle.fluid.dygraph.jit.declarative
-    def forward(self, input, label, init_hidden, init_cell):
-
-        init_h = paddle.fluid.layers.reshape(
-            init_hidden, shape=[self.num_layers, -1, self.hidden_size])
-
-        init_c = paddle.fluid.layers.reshape(
-            init_cell, shape=[self.num_layers, -1, self.hidden_size])
-
-        x_emb = self.embedding(input)
-
-        x_emb = paddle.fluid.layers.reshape(
-            x_emb, shape=[-1, self.num_steps, self.hidden_size])
-        if self.dropout is not None and self.dropout > 0.0:
-            x_emb = paddle.fluid.layers.dropout(
-                x_emb,
-                dropout_prob=self.dropout,
-                dropout_implementation='upscale_in_train')
-        rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
-                                                               init_c)
-
-        projection = paddle.matmul(x=rnn_out, y=self.softmax_weight)
-        projection = paddle.add(x=projection, y=self.softmax_bias)
-
-        loss = paddle.nn.functional.softmax_with_cross_entropy(
-            logits=projection, label=label, soft_label=False)
-        loss = paddle.fluid.layers.reshape(loss, shape=[-1, self.num_steps])
-        loss = paddle.reduce_mean(loss, dim=[0])
-        loss = paddle.reduce_sum(loss)
-
-        return loss, last_hidden, last_cell
-
-    def debug_emb(self):
-
-        np.save("emb_grad", self.x_emb.gradient())
-
-
-def train(place):
-
-    num_layers = 1
-    batch_size = 4
-    hidden_size = 10
-    num_steps = 3
-    init_scale = 0.1
-    max_epoch = 1
-    dropout = 0.0
-    vocab_size = 1000
-    batch_num = 200
-
-    paddle.disable_static(place)
-    paddle.manual_seed(SEED)
-    paddle.framework.random._manual_program_seed(SEED)
-    ptb_model = PtbModel(
-        hidden_size=hidden_size,
-        vocab_size=vocab_size,
-        num_layers=num_layers,
-        num_steps=num_steps,
-        init_scale=init_scale,
-        dropout=dropout)
-
-    sgd = paddle.optimizer.SGD(learning_rate=1e-3,
-                               parameters=ptb_model.parameters())
-
-    for epoch_id in range(max_epoch):
-
-        total_loss = 0.0
-        iters = 0.0
-        total_sample = 0
-
-        init_hidden_data = np.zeros(
-            (num_layers, batch_size, hidden_size), dtype='float32')
-        init_cell_data = np.zeros(
-            (num_layers, batch_size, hidden_size), dtype='float32')
-
-        init_hidden = paddle.to_tensor(
-            data=init_hidden_data, dtype=None, place=None, stop_gradient=True)
-        init_cell = paddle.to_tensor(
-            data=init_cell_data, dtype=None, place=None, stop_gradient=True)
-        for step_id in range(batch_num):
-            x_data = np.arange(12).reshape(4, 3).astype('int64')
-            y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
-            y_data = y_data.reshape((-1, 1))
-
-            x_data = x_data.reshape((-1, num_steps, 1))
-            y_data = y_data.reshape((-1, num_steps, 1))
-
-            x = paddle.to_tensor(
-                data=x_data, dtype=None, place=None, stop_gradient=True)
-            y = paddle.to_tensor(
-                data=y_data, dtype=None, place=None, stop_gradient=True)
-
-            dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
-                                                        init_cell)
-            out_loss = dy_loss.numpy()
-
-            dy_loss.backward()
-            sgd.minimize(dy_loss)
-            ptb_model.clear_gradients()
-
-            total_loss += out_loss
-            iters += num_steps
-            total_sample += 1
-            if step_id % PRINT_STEP == 0:
-                if step_id == 0:
-                    logging.info("epoch %d | step %d, loss %0.3f" %
-                                 (epoch_id, step_id, total_loss / total_sample))
-                    avg_batch_time = time.time()
-                else:
-                    speed = PRINT_STEP / (time.time() - avg_batch_time)
-                    logging.info(
-                        "epoch %d | step %d, loss %0.3f, speed %.3f steps/s" %
-                        (epoch_id, step_id, total_loss / total_sample, speed))
-                    avg_batch_time = time.time()
-
-    ret = out_loss, last_hidden.numpy(), last_cell.numpy()
-    paddle.enable_static()
-    return ret
-
-
-def train_dygraph(place):
-    program_translator.enable(False)
-    return train(place)
-
-
-def train_static(place):
-    program_translator.enable(True)
-    return train(place)
-
-
-class TestPtb(unittest.TestCase):
-    def setUp(self):
-        self.place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda() \
-            else paddle.CPUPlace()
-
-    def test_check_result(self):
-        loss_1, hidden_1, cell_1 = train_static(self.place)
-        loss_2, hidden_2, cell_2 = train_dygraph(self.place)
-
-        self.assertTrue(
-            np.allclose(loss_1, loss_2),
-            msg="static loss: {} \ndygraph loss: {}".format(loss_1, loss_2))
-        self.assertTrue(
-            np.allclose(hidden_1, hidden_2),
-            msg="static hidden: {} \ndygraph acc1: {}".format(hidden_1,
-                                                              hidden_2))
-        self.assertTrue(
-            np.allclose(cell_1, cell_2),
-            msg="static cell: {} \ndygraph cell: {}".format(cell_1, cell_2))
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import numpy as np
-import paddle
-import random
-import unittest
-
-from simnet_dygraph_model_v2 import BOW, HingeLoss
-
-SEED = 102
-random.seed(SEED)
-
-
-def create_conf_dict():
-    conf_dict = {}
-    conf_dict["task_mode"] = "pairwise"
-    conf_dict["net"] = {"emb_dim": 128, "bow_dim": 128, "hidden_dim": 128}
-    conf_dict["loss"] = {"margin": 0.1}
-    return conf_dict
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--batch_size",
-        type=int,
-        default=32,
-        help="Total examples' number in batch for training.")
-    parser.add_argument(
-        "--seq_len", type=int, default=32, help="The length of each sentence.")
-    parser.add_argument(
-        "--epoch", type=int, default=1, help="The number of training epoch.")
-    parser.add_argument(
-        "--fake_sample_size",
-        type=int,
-        default=128,
-        help="The number of samples of fake data.")
-    args = parser.parse_args([])
-    return args
-
-
-args = parse_args()
-
-
-def fake_vocabulary():
-    vocab = {}
-    vocab["<unk>"] = 0
-    for i in range(26):
-        c = chr(ord('a') + i)
-        vocab[c] = i + 1
-    return vocab
-
-
-vocab = fake_vocabulary()
-
-
-class FakeReaderProcessor(object):
-    def __init__(self, args, vocab):
-        self.vocab = vocab
-        self.seq_len = args.seq_len
-        self.sample_size = args.fake_sample_size
-        self.data_samples = []
-        for i in range(self.sample_size):
-            query = [random.randint(0, 26) for i in range(self.seq_len)]
-            pos_title = query[:]
-            neg_title = [26 - q for q in query]
-            self.data_samples.append(
-                np.array([query, pos_title, neg_title]).astype(np.int64))
-
-    def get_reader(self, mode, epoch=0):
-        def reader_with_pairwise():
-            if mode == "train":
-                for i in range(self.sample_size):
-                    yield self.data_samples[i]
-
-        return reader_with_pairwise
-
-
-simnet_process = FakeReaderProcessor(args, vocab)
-
-
-def train(conf_dict, to_static):
-    """
-    train process
-    """
-    program_translator = paddle.jit.ProgramTranslator()
-    program_translator.enable(to_static)
-
-    # Get device
-    if paddle.fluid.is_compiled_with_cuda():
-        place = paddle.CUDAPlace(0)
-    else:
-        place = paddle.CPUPlace()
-
-    paddle.disable_static(place)
-    paddle.manual_seed(SEED)
-    paddle.framework.random._manual_program_seed(SEED)
-
-    conf_dict['dict_size'] = len(vocab)
-    conf_dict['seq_len'] = args.seq_len
-
-    net = BOW(conf_dict)
-    loss = HingeLoss(conf_dict)
-    optimizer = paddle.optimizer.Adam(
-        learning_rate=0.001,
-        beta1=0.9,
-        beta2=0.999,
-        epsilon=1e-08,
-        parameters=net.parameters())
-
-    metric = paddle.fluid.metrics.Auc(name="auc")
-
-    global_step = 0
-    losses = []
-
-    train_loader = paddle.io.DataLoader.from_generator(
-        capacity=16, return_list=True, iterable=True, use_double_buffer=True)
-    get_train_examples = simnet_process.get_reader("train", epoch=args.epoch)
-    train_loader.set_sample_list_generator(
-        paddle.batch(
-            get_train_examples, batch_size=args.batch_size), place)
-
-    for left, pos_right, neg_right in train_loader():
-        left = paddle.fluid.layers.reshape(left, shape=[-1, 1])
-        pos_right = paddle.fluid.layers.reshape(pos_right, shape=[-1, 1])
-        neg_right = paddle.fluid.layers.reshape(neg_right, shape=[-1, 1])
-        net.train()
-        global_step += 1
-        left_feat, pos_score = net(left, pos_right)
-        pred = pos_score
-        _, neg_score = net(left, neg_right)
-        avg_cost = loss.compute(pos_score, neg_score)
-        losses.append(np.mean(avg_cost.numpy()))
-        avg_cost.backward()
-        optimizer.minimize(avg_cost)
-        net.clear_gradients()
-    paddle.enable_static()
-    return losses
-
-
-class TestSimnet(unittest.TestCase):
-    def test_dygraph_static_same_loss(self):
-        if paddle.fluid.is_compiled_with_cuda():
-            paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True})
-        conf_dict = create_conf_dict()
-        dygraph_loss = train(conf_dict, to_static=False)
-        static_loss = train(conf_dict, to_static=True)
-
-        self.assertEqual(len(dygraph_loss), len(static_loss))
-        for i in range(len(dygraph_loss)):
-            self.assertAlmostEqual(dygraph_loss[i], static_loss[i])
-
-
-if __name__ == '__main__':
-    unittest.main()