diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index b621330579ca647850f1524fa0e5c4fc891aa914..12f07f17bb54793168d5fac772ccf9c717609cec 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -82,6 +82,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_se_resnext)
 list(REMOVE_ITEM TEST_OPS test_imperative_mnist)
 list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer)
 list(REMOVE_ITEM TEST_OPS test_layers)
+list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model)
 
 # Some ops need to check results when gc is enabled
 # Currently, only ops that register NoNeedBufferVarsInference need to do this test   
@@ -137,7 +138,7 @@ py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mn
         FLAGS_cudnn_deterministic=1)
 py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS
     FLAGS_cudnn_deterministic=1)
-
+py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS FLAGS_cudnn_deterministic=1)
 if(WITH_DISTRIBUTE)
     py_test_modules(test_dist_train MODULES test_dist_train)
     set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..f63c82856bbcc0a0741e563c251f547361432daa
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -0,0 +1,600 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+
+import contextlib
+import unittest
+import numpy as np
+import six
+import os
+from PIL import Image
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
+from paddle.fluid.optimizer import SGDOptimizer
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm, Embedding, GRUUnit
+from paddle.fluid.dygraph.base import to_variable
+from test_imperative_base import new_program_scope
+
+
+class Config(object):
+    '''
+    config for training
+    '''
+    # decoder size for decoder stage
+    decoder_size = 128
+    # size for word embedding
+    word_vector_dim = 128
+    # max length for label padding
+    max_length = 15
+    # optimizer setting
+    LR = 1.0
+    learning_rate_decay = None
+
+    # batch size to train
+    batch_size = 32
+    # class number to classify
+    num_classes = 481
+
+    use_gpu = False
+    # special label for start and end
+    SOS = 0
+    EOS = 1
+    # settings for ctc data, not use in unittest
+    DATA_DIR_NAME = "./dataset/ctc_data/data"
+    TRAIN_DATA_DIR_NAME = "train_images"
+    TRAIN_LIST_FILE_NAME = "train.list"
+
+    # data shape for input image
+    DATA_SHAPE = [1, 48, 384]
+
+
+class ConvBNPool(fluid.dygraph.Layer):
+    def __init__(self,
+                 name_scope,
+                 group,
+                 out_ch,
+                 channels,
+                 act="relu",
+                 is_test=False,
+                 pool=True,
+                 use_cudnn=True):
+        super(ConvBNPool, self).__init__(name_scope)
+        self.group = group
+        self.pool = pool
+
+        filter_size = 3
+        conv_std_0 = (2.0 / (filter_size**2 * channels[0]))**0.5
+        conv_param_0 = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, conv_std_0))
+
+        conv_std_1 = (2.0 / (filter_size**2 * channels[1]))**0.5
+        conv_param_1 = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, conv_std_1))
+
+        self.conv_0_layer = Conv2D(
+            self.full_name(),
+            channels[0],
+            out_ch[0],
+            3,
+            padding=1,
+            param_attr=conv_param_0,
+            bias_attr=None,
+            act=None,
+            use_cudnn=use_cudnn)
+        self.bn_0_layer = BatchNorm(
+            self.full_name(), out_ch[0], act=act, is_test=is_test)
+        self.conv_1_layer = Conv2D(
+            self.full_name(),
+            num_channels=channels[1],
+            num_filters=out_ch[1],
+            filter_size=3,
+            padding=1,
+            param_attr=conv_param_1,
+            bias_attr=None,
+            act=None,
+            use_cudnn=use_cudnn)
+        self.bn_1_layer = BatchNorm(
+            self.full_name(), out_ch[1], act=act, is_test=is_test)
+
+        if self.pool:
+            self.pool_layer = Pool2D(
+                self.full_name(),
+                pool_size=2,
+                pool_type='max',
+                pool_stride=2,
+                use_cudnn=use_cudnn,
+                ceil_mode=True)
+
+    def forward(self, inputs):
+        conv_0 = self.conv_0_layer(inputs)
+        bn_0 = self.bn_0_layer(conv_0)
+        conv_1 = self.conv_1_layer(bn_0)
+        bn_1 = self.bn_1_layer(conv_1)
+        if self.pool:
+            bn_pool = self.pool_layer(bn_1)
+            return bn_pool
+        return bn_1
+
+
+class OCRConv(fluid.dygraph.Layer):
+    def __init__(self, name_scope, is_test=False, use_cudnn=True):
+        super(OCRConv, self).__init__(name_scope)
+        self.conv_bn_pool_1 = ConvBNPool(
+            self.full_name(),
+            2, [16, 16], [1, 16],
+            is_test=is_test,
+            use_cudnn=use_cudnn)
+        self.conv_bn_pool_2 = ConvBNPool(
+            self.full_name(),
+            2, [32, 32], [16, 32],
+            is_test=is_test,
+            use_cudnn=use_cudnn)
+        self.conv_bn_pool_3 = ConvBNPool(
+            self.full_name(),
+            2, [64, 64], [32, 64],
+            is_test=is_test,
+            use_cudnn=use_cudnn)
+        self.conv_bn_pool_4 = ConvBNPool(
+            self.full_name(),
+            2, [128, 128], [64, 128],
+            is_test=is_test,
+            pool=False,
+            use_cudnn=use_cudnn)
+
+    def forward(self, inputs):
+        inputs_1 = self.conv_bn_pool_1(inputs)
+        inputs_2 = self.conv_bn_pool_2(inputs_1)
+        inputs_3 = self.conv_bn_pool_3(inputs_2)
+        inputs_4 = self.conv_bn_pool_4(inputs_3)
+
+        return inputs_4
+
+
+class DynamicGRU(fluid.dygraph.Layer):
+    def __init__(self,
+                 scope_name,
+                 size,
+                 param_attr=None,
+                 bias_attr=None,
+                 is_reverse=False,
+                 gate_activation='sigmoid',
+                 candidate_activation='tanh',
+                 h_0=None,
+                 origin_mode=False):
+        super(DynamicGRU, self).__init__(scope_name)
+
+        self.gru_unit = GRUUnit(
+            self.full_name(),
+            size * 3,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            activation=candidate_activation,
+            gate_activation=gate_activation,
+            origin_mode=origin_mode)
+
+        self.h_0 = h_0
+        self.is_reverse = is_reverse
+
+    def forward(self, inputs):
+        hidden = self.h_0
+        res = []
+        for i in range(inputs.shape[1]):
+            if self.is_reverse:
+                i = inputs.shape[1] - 1 - i
+            input_ = fluid.layers.slice(
+                inputs, axes=[1], starts=[i], ends=[i + 1])
+            input_ = fluid.layers.reshape(
+                input_, [-1, input_.shape[2]], inplace=False)
+            hidden, reset, gate = self.gru_unit(input_, hidden)
+            hidden_ = fluid.layers.reshape(
+                hidden, [-1, 1, hidden.shape[1]], inplace=False)
+            if self.is_reverse:
+                res = [hidden_] + res
+            else:
+                res.append(hidden_)
+        res = fluid.layers.concat(res, axis=1)
+        return res
+
+
+class EncoderNet(fluid.dygraph.Layer):
+    def __init__(self,
+                 scope_name,
+                 rnn_hidden_size=200,
+                 is_test=False,
+                 use_cudnn=True):
+        super(EncoderNet, self).__init__(scope_name)
+        self.rnn_hidden_size = rnn_hidden_size
+        para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0,
+                                                                         0.02))
+        bias_attr = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
+        if fluid.framework.in_dygraph_mode():
+            h_0 = np.zeros(
+                (Config.batch_size, rnn_hidden_size), dtype="float32")
+            h_0 = to_variable(h_0)
+        else:
+            h_0 = fluid.layers.fill_constant(
+                shape=[Config.batch_size, rnn_hidden_size],
+                dtype='float32',
+                value=0)
+        self.ocr_convs = OCRConv(
+            self.full_name(), is_test=is_test, use_cudnn=use_cudnn)
+
+        self.fc_1_layer = FC(self.full_name(),
+                             rnn_hidden_size * 3,
+                             param_attr=para_attr,
+                             bias_attr=False,
+                             num_flatten_dims=2)
+        self.fc_2_layer = FC(self.full_name(),
+                             rnn_hidden_size * 3,
+                             param_attr=para_attr,
+                             bias_attr=False,
+                             num_flatten_dims=2)
+        self.gru_forward_layer = DynamicGRU(
+            self.full_name(),
+            size=rnn_hidden_size,
+            h_0=h_0,
+            param_attr=para_attr,
+            bias_attr=bias_attr,
+            candidate_activation='relu')
+        self.gru_backward_layer = DynamicGRU(
+            self.full_name(),
+            size=rnn_hidden_size,
+            h_0=h_0,
+            param_attr=para_attr,
+            bias_attr=bias_attr,
+            candidate_activation='relu',
+            is_reverse=True)
+
+        self.encoded_proj_fc = FC(self.full_name(),
+                                  Config.decoder_size,
+                                  bias_attr=False,
+                                  num_flatten_dims=2)
+
+    def forward(self, inputs):
+        conv_features = self.ocr_convs(inputs)
+        #sliced_feature = fluid.layers.im2sequence(
+        #    input=conv_features,
+        #    stride=[1, 1],
+        #    filter_size=[conv_features.shape[2], 1])
+
+        transpose_conv_features = fluid.layers.transpose(
+            conv_features, perm=[0, 3, 1, 2])
+
+        sliced_feature = fluid.layers.reshape(
+            transpose_conv_features, [
+                -1, 48, transpose_conv_features.shape[2] *
+                transpose_conv_features.shape[3]
+            ],
+            inplace=False)
+        fc_1 = self.fc_1_layer(sliced_feature)
+        fc_2 = self.fc_2_layer(sliced_feature)
+        gru_forward = self.gru_forward_layer(fc_1)
+
+        gru_backward = self.gru_backward_layer(fc_2)
+
+        encoded_vector = fluid.layers.concat(
+            input=[gru_forward, gru_backward], axis=2)
+
+        encoded_proj = self.encoded_proj_fc(encoded_vector)
+
+        return gru_backward, encoded_vector, encoded_proj
+
+
+class SimpleAttention(fluid.dygraph.Layer):
+    def __init__(self, scope_name, decoder_size):
+        super(SimpleAttention, self).__init__(scope_name)
+
+        self.fc_1 = FC(self.full_name(),
+                       decoder_size,
+                       act=None,
+                       bias_attr=False)
+        self.fc_2 = FC(self.full_name(), 1, act=None, bias_attr=False)
+
+    def _build_once(self, encoder_vec, encoder_proj, decoder_state):
+        pass
+
+    def forward(self, encoder_vec, encoder_proj, decoder_state):
+
+        decoder_state_fc = self.fc_1(decoder_state)
+        decoder_state_proj_reshape = fluid.layers.reshape(
+            decoder_state_fc, [-1, 1, decoder_state_fc.shape[1]], inplace=False)
+        decoder_state_expand = fluid.layers.expand(
+            decoder_state_proj_reshape, [1, encoder_proj.shape[1], 1])
+        concated = fluid.layers.elementwise_add(encoder_proj,
+                                                decoder_state_expand)
+        concated = fluid.layers.tanh(x=concated)
+        attention_weight = self.fc_2(concated)
+        weights_reshape = fluid.layers.reshape(
+            x=attention_weight, shape=[-1], inplace=False)
+        scaled = fluid.layers.elementwise_mul(
+            x=encoder_vec, y=weights_reshape, axis=0)
+        scaled = fluid.layers.transpose(scaled, [0, 2, 1])
+        scaled = fluid.layers.reshape(
+            scaled, [-1, scaled.shape[1], scaled.shape[2], 1], inplace=False)
+        context = fluid.layers.pool2d(
+            input=scaled,
+            pool_size=[scaled.shape[2], scaled.shape[3]],
+            pool_type='avg')
+        context = fluid.layers.reshape(
+            context, [-1, context.shape[1]], inplace=False)
+        return context
+
+
+class GRUDecoderWithAttention(fluid.dygraph.Layer):
+    def __init__(self, scope_name, decoder_size, num_classes):
+        super(GRUDecoderWithAttention, self).__init__(scope_name)
+        self.simple_attention = SimpleAttention(self.full_name(), decoder_size)
+
+        self.fc_1_layer = FC(self.full_name(),
+                             size=decoder_size * 3,
+                             bias_attr=False)
+        self.fc_2_layer = FC(self.full_name(),
+                             size=decoder_size * 3,
+                             bias_attr=False)
+        self.gru_unit = GRUUnit(
+            self.full_name(),
+            size=decoder_size * 3,
+            param_attr=None,
+            bias_attr=None)
+        self.out_layer = FC(self.full_name(),
+                            size=num_classes + 2,
+                            bias_attr=None,
+                            act='softmax')
+
+        self.decoder_size = decoder_size
+
+    def _build_once(self, target_embedding, encoder_vec, encoder_proj,
+                    decoder_boot):
+        pass
+
+    def forward(self, target_embedding, encoder_vec, encoder_proj,
+                decoder_boot):
+        res = []
+        hidden_mem = decoder_boot
+        for i in range(target_embedding.shape[1]):
+            current_word = fluid.layers.slice(
+                target_embedding, axes=[1], starts=[i], ends=[i + 1])
+            current_word = fluid.layers.reshape(
+                current_word, [-1, current_word.shape[2]], inplace=False)
+
+            context = self.simple_attention(encoder_vec, encoder_proj,
+                                            hidden_mem)
+            fc_1 = self.fc_1_layer(context)
+            fc_2 = self.fc_2_layer(current_word)
+            decoder_inputs = fluid.layers.elementwise_add(x=fc_1, y=fc_2)
+
+            h, _, _ = self.gru_unit(decoder_inputs, hidden_mem)
+            hidden_mem = h
+            out = self.out_layer(h)
+            res.append(out)
+
+        res1 = fluid.layers.concat(res, axis=0)
+
+        return res1
+
+
+class OCRAttention(fluid.dygraph.Layer):
+    def __init__(self, scope_name):
+        super(OCRAttention, self).__init__(scope_name)
+        self.encoder_net = EncoderNet(self.full_name())
+        self.fc = FC(self.full_name(),
+                     size=Config.decoder_size,
+                     bias_attr=False,
+                     act='relu')
+        self.embedding = Embedding(
+            self.full_name(), [Config.num_classes + 2, Config.word_vector_dim],
+            dtype='float32')
+        self.gru_decoder_with_attention = GRUDecoderWithAttention(
+            self.full_name(), Config.decoder_size, Config.num_classes)
+
+    def _build_once(self, inputs, label_in):
+        pass
+
+    def forward(self, inputs, label_in):
+        gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs)
+        backward_first = fluid.layers.slice(
+            gru_backward, axes=[1], starts=[0], ends=[1])
+        backward_first = fluid.layers.reshape(
+            backward_first, [-1, backward_first.shape[2]], inplace=False)
+        decoder_boot = self.fc(backward_first)
+        label_in = fluid.layers.reshape(label_in, [-1, 1], inplace=False)
+        trg_embedding = self.embedding(label_in)
+
+        trg_embedding = fluid.layers.reshape(
+            trg_embedding, [-1, Config.max_length, trg_embedding.shape[1]],
+            inplace=False)
+
+        prediction = self.gru_decoder_with_attention(
+            trg_embedding, encoded_vector, encoded_proj, decoder_boot)
+
+        return prediction
+
+
+class TestDygraphOCRAttention(unittest.TestCase):
+    def test_while_op(self):
+        seed = 90
+        epoch_num = 2
+        batch_num = 20
+        np.random.seed = seed
+        image_np = np.random.randn(Config.batch_size, Config.DATA_SHAPE[0],
+                                   Config.DATA_SHAPE[1],
+                                   Config.DATA_SHAPE[2]).astype('float32')
+        label_in_np = np.arange(
+            0, Config.max_length,
+            dtype='int64').reshape([1, Config.max_length])
+        for i in range(2, Config.batch_size + 1):
+            label_in_np = np.vstack((label_in_np, np.arange(
+                (i - 1) * Config.max_length,
+                i * Config.max_length,
+                dtype='int64').reshape([1, Config.max_length])))
+
+        print(label_in_np.shape)
+        label_out_np = np.arange(
+            0, Config.max_length,
+            dtype='int64').reshape([1, Config.max_length])
+        for i in range(2, Config.batch_size + 1):
+            label_out_np = np.vstack((label_out_np, np.arange(
+                (i - 1) * Config.max_length,
+                i * Config.max_length,
+                dtype='int64').reshape([1, Config.max_length])))
+        print(label_out_np.shape)
+        #if Config.use_gpu:
+        #    place = fluid.CUDAPlace(0)
+        #else:
+        #    place = fluid.CPUPlace()
+        with fluid.dygraph.guard():
+            fluid.default_startup_program().random_seed = seed
+            fluid.default_main_program().random_seed = seed
+            backward_strategy = fluid.dygraph.BackwardStrategy()
+            backward_strategy.sort_sum_gradient = True
+            ocr_attention = OCRAttention("ocr_attention")
+
+            if Config.learning_rate_decay == "piecewise_decay":
+                learning_rate = fluid.layers.piecewise_decay(
+                    [50000], [Config.LR, Config.LR * 0.01])
+            else:
+                learning_rate = Config.LR
+            #optimizer = fluid.optimizer.Adadelta(learning_rate=learning_rate,
+            #    epsilon=1.0e-6, rho=0.9)
+            optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+            # place = fluid.CPUPlace()
+            dy_param_init_value = {}
+            for param in ocr_attention.parameters():
+                dy_param_init_value[param.name] = param.numpy()
+            for epoch in range(epoch_num):
+                for batch_id in range(batch_num):
+                    label_in = to_variable(label_in_np)
+                    label_out = to_variable(label_out_np)
+                    label_out._stop_gradient = True
+                    label_out.trainable = False
+                    img = to_variable(image_np)
+                    dy_prediction = ocr_attention(img, label_in)
+                    label_out = fluid.layers.reshape(
+                        label_out, [-1, 1], inplace=False)
+                    loss = fluid.layers.cross_entropy(
+                        input=dy_prediction, label=label_out)
+                    avg_loss = fluid.layers.reduce_sum(loss)
+
+                    dy_out = avg_loss.numpy()
+
+                    if epoch == 0 and batch_id == 0:
+                        for param in ocr_attention.parameters():
+                            if param.name not in dy_param_init_value:
+                                dy_param_init_value[param.name] = param.numpy()
+                    avg_loss.backward(backward_strategy)
+                    dy_grad_value = {}
+                    for param in ocr_attention.parameters():
+                        if param.trainable:
+                            np_array = np.array(param._ivar._grad_ivar().value()
+                                                .get_tensor())
+                            dy_grad_value[param.name + core.grad_var_suffix(
+                            )] = np_array
+
+                    optimizer.minimize(avg_loss)
+                    ocr_attention.clear_gradients()
+                    dy_param_value = {}
+                    for param in ocr_attention.parameters():
+                        dy_param_value[param.name] = param.numpy()
+
+        with new_program_scope():
+            fluid.default_startup_program().random_seed = seed
+            fluid.default_main_program().random_seed = seed
+            # print("static start")
+            exe = fluid.Executor(fluid.CPUPlace(
+            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
+            ocr_attention = OCRAttention("ocr_attention")
+
+            if Config.learning_rate_decay == "piecewise_decay":
+                learning_rate = fluid.layers.piecewise_decay(
+                    [50000], [Config.LR, Config.LR * 0.01])
+            else:
+                learning_rate = Config.LR
+
+            optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+
+            images = fluid.layers.data(
+                name='pixel', shape=Config.DATA_SHAPE, dtype='float32')
+            static_label_in = fluid.layers.data(
+                name='label_in', shape=[1], dtype='int64', lod_level=0)
+            static_label_out = fluid.layers.data(
+                name='label_out', shape=[1], dtype='int64', lod_level=0)
+            static_label_out._stop_gradient = True
+            static_label_out.trainable = False
+
+            static_prediction = ocr_attention(images, static_label_in)
+
+            cost = fluid.layers.cross_entropy(
+                input=static_prediction, label=static_label_out)
+            static_avg_loss = fluid.layers.reduce_sum(cost)
+            # param_grad_list = fluid.backward.append_backward(static_avg_loss)
+            optimizer.minimize(static_avg_loss)
+
+            static_param_init_value = {}
+            static_param_name_list = []
+            static_grad_name_list = []
+            for param in ocr_attention.parameters():
+                static_param_name_list.append(param.name)
+                if param.trainable:
+                    static_grad_name_list.append(param.name +
+                                                 core.grad_var_suffix())
+
+            out = exe.run(fluid.default_startup_program(),
+                          fetch_list=static_param_name_list)
+
+            for i in range(len(static_param_name_list)):
+                static_param_init_value[static_param_name_list[i]] = out[i]
+
+            fetch_list = [static_avg_loss.name]
+            # print(static_test.name)
+            # fetch_list = [static_avg_loss.name, static_test.name]
+            fetch_list.extend(static_param_name_list)
+            fetch_list.extend(static_grad_name_list)
+            for epoch in range(epoch_num):
+                for batch_id in range(batch_num):
+                    static_label_in = label_in_np
+                    static_label_out = label_out_np
+                    static_label_out = static_label_out.reshape((-1, 1))
+                    out = exe.run(fluid.default_main_program(),
+                                  feed={
+                                      "pixel": image_np,
+                                      "label_in": static_label_in,
+                                      "label_out": static_label_out
+                                  },
+                                  fetch_list=fetch_list)
+                    static_param_value = {}
+                    static_grad_value = {}
+                    static_out = out[0]
+                    # static_test_grad = out[1]
+                    for i in range(1, len(static_param_name_list) + 1):
+                        static_param_value[static_param_name_list[i - 1]] = out[
+                            i]
+                    grad_start_pos = len(static_param_name_list) + 1
+                    for i in range(grad_start_pos,
+                                   len(static_grad_name_list) + grad_start_pos):
+                        static_grad_value[static_grad_name_list[
+                            i - grad_start_pos]] = out[i]
+
+        self.assertTrue(np.array_equal(static_out, dy_out))
+
+        for key, value in six.iteritems(static_param_init_value):
+            self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
+
+        for key, value in six.iteritems(static_param_value):
+            self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-20))
+
+
+if __name__ == '__main__':
+    unittest.main()