Open fetch_feed_op test (#15266)

* open fetch_feed_op test test=develop * code refine test=develop * reset timeout for test_parallel_executor_fetch_feed test=develop * disable test_parallel_executor_fetch_feed for windows test=develop * refine unit test test=develop

Open fetch_feed_op test (#15266)
* open fetch_feed_op test test=develop * code refine test=develop * reset timeout for test_parallel_executor_fetch_feed test=develop * disable test_parallel_executor_fetch_feed for windows test=develop * refine unit test test=develop
485d3210 · chengduo · GitHub · 3f687765 · 485d3210 · 485d3210
2 changed file
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -107,7 +107,7 @@ if(WITH_DISTRIBUTE)
 endif()
 py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
 py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
-set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 150)
+set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450)
 py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL)
 if(NOT APPLE)
    py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL)

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -14,13 +14,11 @@
 from __future__ import print_function
-import paddle.dataset.flowers as flowers
 import math
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import unittest
 import numpy as np
-import paddle
 import os
@@ -38,101 +36,82 @@ def Lenet(data, class_dim):
    return fc2
-class TestFetchOp(unittest.TestCase):
+class TestFetchAndFeed(unittest.TestCase):
-    def parallel_exe(self, train_inputs, seed, use_cuda):
+    def parallel_exe(self, use_cuda, run_parallel_exe, seed=1):
-        main = fluid.Program()
+        main_program = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = seed
-        with fluid.program_guard(main, startup):
+        with fluid.program_guard(main_program, startup):
            data = fluid.layers.data(
                name='image', shape=[3, 224, 224], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = Lenet(data, class_dim=102)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            loss = fluid.layers.mean(loss)
            opt = fluid.optimizer.Momentum(
                learning_rate=0.1,
                momentum=0.9,
                regularization=fluid.regularizer.L2Decay(1e-4))
            opt.minimize(loss)
-            # TODO(zcd): I found that onece the memory optimizer is open,
-            # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
-            # conv2d_1.b_0@GRAD. Those variables should not be pruned.
-            # fluid.memory_optimize(main)
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup)
-            feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
        pe = fluid.ParallelExecutor(
-                use_cuda=use_cuda, loss_name=loss.name, main_program=main)
+            use_cuda=use_cuda, loss_name=loss.name, main_program=main_program)
+        run_parallel_exe(main_program, pe, use_cuda, data, label, loss)
+    def run_parallel_exe_with_fetch(self, main, pe, use_cuda, data, label,
+                                    loss):
+        def get_data(batch_size=8):
+            np.random.seed(5)
+            while True:
+                img = np.random.random(
+                    size=[batch_size, 3, 224, 224]).astype(np.float32)
+                l = (np.random.random(size=[batch_size, 1]) *
+                     10).astype(np.int64)
+                yield img, l
+        # TODO(zcd): I found that onece the memory optimizer is open,
+        # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
+        # conv2d_1.b_0@GRAD. Those variables should not be pruned.
+        # fluid.memory_optimize(main)
        fetch_list = []
        all_vars = main.global_block().vars
        for k, v in all_vars.items():
-                if 'tmp' not in k and k[0] is not '_' or v.persistable:
+            if ('tmp' not in k) and (
+                    k[0] is not '_' or v.persistable
+            ) and v.type == core.VarDesc.VarType.LOD_TENSOR:
                fetch_list.append(k)
-            for data in train_inputs:
+        for batch_id, img_label in enumerate(get_data()):
-                ret = pe.run(fetch_list,
+            img, l = img_label
-                             feed=feeder.feed(data),
+            train_inputs = {data.name: img, label.name: l}
-                             return_numpy=True)
+            ret = pe.run(fetch_list, feed=train_inputs, return_numpy=True)
            for i in range(len(fetch_list)):
                assert not math.isnan(np.sum(ret[i])) and \
                       not math.isinf(np.sum(ret[i]))
+            if batch_id == 2:
+                break
-    @unittest.skip(reason="CI timeout")
+    def run_parallel_exe_with_feed(self, main, pe, use_cuda, data, label, loss):
-    def test_fetch_op(self):
+        def get_data(batch_size=8):
-        tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16)
+            np.random.seed(5)
-        tst_reader_iter = tst_reader()
+            while True:
+                train_data = []
-        iters = 3
+                for _ in range(batch_size):
-        train_inputs = []
+                    img = np.random.random(
-        for i in range(iters):
+                        size=[1, 3, 224, 224]).astype(np.float32)
-            train_inputs.append(next(tst_reader_iter))
+                    label = (np.random.random(size=[1, 1]) *
+                             10).astype(np.int64)
-        os.environ['CPU_NUM'] = str(4)
+                    train_data.append([img, label])
-        if core.is_compiled_with_cuda():
+                yield train_data
-            self.parallel_exe(train_inputs, seed=1, use_cuda=True)
-        self.parallel_exe(train_inputs, seed=1, use_cuda=False)
-class TestFeedParallel(unittest.TestCase):
-    def parallel_exe(self, use_cuda, seed):
-        main = fluid.Program()
-        startup = fluid.Program()
-        startup.random_seed = seed
-        with fluid.scope_guard(fluid.core.Scope()):
-            with fluid.program_guard(main, startup):
-                data = fluid.layers.data(
-                    name='image', shape=[3, 224, 224], dtype='float32')
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64')
-                out = Lenet(data, class_dim=102)
-                loss = fluid.layers.cross_entropy(input=out, label=label)
-                loss = fluid.layers.mean(loss)
-                opt = fluid.optimizer.Momentum(
-                    learning_rate=0.1,
-                    momentum=0.9,
-                    regularization=fluid.regularizer.L2Decay(1e-4))
-                opt.minimize(loss)
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
-        reader = feeder.decorate_reader(
+        reader = feeder.decorate_reader(get_data, multi_devices=True)
-            paddle.batch(
-                flowers.train(), batch_size=16), multi_devices=True)
-        exe = fluid.Executor(place)
-        exe.run(startup)
-        pe = fluid.ParallelExecutor(
-            use_cuda=use_cuda, loss_name=loss.name, main_program=main)
        for batch_id, data in enumerate(reader()):
            loss_np = pe.run(feed=data, fetch_list=[loss.name])[0]
@@ -140,12 +119,22 @@ class TestFeedParallel(unittest.TestCase):
            if batch_id == 2:
                break
-    @unittest.skip(reason="CI timeout")
+    def test_fetch(self):
-    def test_feed_op(self):
+        os.environ['CPU_NUM'] = str(4)
+        if core.is_compiled_with_cuda():
+            self.parallel_exe(
+                use_cuda=True,
+                run_parallel_exe=self.run_parallel_exe_with_fetch)
+        self.parallel_exe(
+            use_cuda=False, run_parallel_exe=self.run_parallel_exe_with_fetch)
+    def test_feed(self):
        os.environ['CPU_NUM'] = str(4)
        if core.is_compiled_with_cuda():
-            self.parallel_exe(use_cuda=True, seed=1)
+            self.parallel_exe(
-        self.parallel_exe(use_cuda=False, seed=1)
+                use_cuda=True, run_parallel_exe=self.run_parallel_exe_with_feed)
+        self.parallel_exe(
+            use_cuda=False, run_parallel_exe=self.run_parallel_exe_with_feed)
 if __name__ == '__main__':