Fix unittests which takes too long time (#16713)

* fix too long unittest recommit test=develop * add fake_reader.py test=develop

Fix unittests which takes too long time (#16713)
* fix too long unittest recommit test=develop * add fake_reader.py test=develop
674aed6a · Zeng Jinle · GitHub · bf606bce · 674aed6a · 674aed6a
4 changed file
--- a/python/paddle/fluid/tests/unittests/fake_reader.py
+++ b/python/paddle/fluid/tests/unittests/fake_reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import six
+
+
+def fake_imdb_reader(word_dict_size,
+                     sample_num,
+                     lower_seq_len=100,
+                     upper_seq_len=200,
+                     class_dim=2):
+    def __reader__():
+        for _ in six.moves.range(sample_num):
+            length = np.random.random_integers(
+                low=lower_seq_len, high=upper_seq_len, size=[1])[0]
+            ids = np.random.random_integers(
+                low=0, high=word_dict_size - 1, size=[length]).astype('int64')
+            label = np.random.random_integers(
+                low=0, high=class_dim - 1, size=[1]).astype('int64')[0]
+            yield ids, label
+
+    return __reader__
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
@@ -19,7 +19,7 @@ import time
 import six
 import unittest

-EPOCH_NUM = 60
+EPOCH_NUM = 20
 BATCH_SIZE = 32
 CLASS_NUM = 10


--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -22,6 +22,8 @@ import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
 from paddle.fluid import compiler
+import numpy as np
+from fake_reader import fake_imdb_reader


 def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
@@ -35,16 +37,16 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
        )
        return

-    word_dict = paddle.dataset.imdb.word_dict()
-    train_reader = paddle.batch(
-        paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
+    word_dict_size = 5147
+    reader = fake_imdb_reader(word_dict_size, batch_size * 40)
+    train_reader = paddle.batch(reader, batch_size=batch_size)

    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)

    label = fluid.layers.data(name="label", shape=[1], dtype="int64")

-    cost = network(data, label, len(word_dict))
+    cost = network(data, label, word_dict_size)
    cost.persistable = True
    optimizer = fluid.optimizer.Adagrad(learning_rate=0.2)
    optimizer.minimize(cost)

--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -19,6 +19,8 @@ import numpy as np
 import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
+import six
+from fake_reader import fake_imdb_reader


 def bow_net(data,
@@ -48,11 +50,10 @@ def bow_net(data,

 class TestGradientClip(unittest.TestCase):
    def setUp(self):
-        self.word_dict = paddle.dataset.imdb.word_dict()
+        self.word_dict_len = 5147
        self.BATCH_SIZE = 2
-        self.train_data = paddle.batch(
-            paddle.dataset.imdb.train(self.word_dict),
-            batch_size=self.BATCH_SIZE)
+        reader = fake_imdb_reader(self.word_dict_len, self.BATCH_SIZE * 100)
+        self.train_data = paddle.batch(reader, batch_size=self.BATCH_SIZE)

    def get_places(self):
        places = [core.CPUPlace()]
@@ -131,7 +132,7 @@ class TestGradientClip(unittest.TestCase):
            data = fluid.layers.data(
                name="words", shape=[1], dtype="int64", lod_level=1)
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-            cost = bow_net(data, label, len(self.word_dict))
+            cost = bow_net(data, label, self.word_dict_len)

            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))