diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 4f5e05c4a16754e3c33c8e179c6f78a411dbc2a8..f62a69625f57bcab91c8753d5edb320dc9a78b34 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -458,8 +458,6 @@ if(WITH_DISTRIBUTE) list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec") - # FIXME(seiriosX) will fix this - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_sparse_embedding_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo") py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) @@ -636,9 +634,6 @@ if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_multiprocess_dataloader_static PROPERTIES TIMEOUT 120) endif() -# setting timeout value for old unittests -# set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT 200) - if (NOT WIN32) set_tests_properties(test_multiprocess_reader_exception PROPERTIES TIMEOUT 120) set_tests_properties(test_layers PROPERTIES TIMEOUT 120) @@ -651,6 +646,8 @@ endif() if (WITH_DISTRIBUTE) set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT 120) + set_tests_properties(test_dist_fleet_ctr2 PROPERTIES TIMEOUT 120) + set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT 120) endif() if (WITH_DISTRIBUTE AND NOT APPLE) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 1dfbdef392fb385b1bef160fc5bdb6437fe8474c..92dbf9f2c8ce783133af9f5480a80489a87a037b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -72,10 +72,6 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): self.assertEqual(sends, 0) self.assertEqual(sgds, 0) - fleet.init_worker() - time.sleep(8) - fleet.stop_worker() - def test_a_sync_optimizer_pserver(self): os.environ["TRAINING_ROLE"] = "PSERVER" import paddle.distributed.fleet as fleet diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index 364077ebde833ce5584552c8446da781c34cfcb0..3d35d424bdd88e1d7cb18297d64ed82eec01777e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -32,6 +32,8 @@ import tempfile import unittest import paddle +paddle.enable_static() + import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index dec281180683eedce8ea6fa81bf8582ed38c6a64..1a3ef2b3fda539acb33db6f79bd75b36a0f79b07 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -20,72 +20,6 @@ import tempfile from test_dist_fleet_base import TestFleetBase -class TestDistMnistSync2x2(TestFleetBase): - def _setup_config(self): - self._mode = "sync" - self._reader = "pyreader" - self._need_test = 1 - - def check_with_place(self, - model_file, - delta=1e-3, - check_error_log=False, - need_envs={}): - required_envs = { - "PATH": os.getenv("PATH", ""), - "PYTHONPATH": os.getenv("PYTHONPATH", ""), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), - "FLAGS_rpc_deadline": "5000", # 5sec to fail fast - "http_proxy": "", - "CPU_NUM": "2" - } - - required_envs.update(need_envs) - - if check_error_log: - required_envs["GLOG_v"] = "3" - required_envs["GLOG_logtostderr"] = "1" - - tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) - - def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) - - -@unittest.skip(reason="Skip unstable ut, open it when geo fixed") -class TestDistMnistAuto2x2(TestFleetBase): - def _setup_config(self): - self._mode = "auto" - self._reader = "pyreader" - - def check_with_place(self, - model_file, - delta=1e-3, - check_error_log=False, - need_envs={}): - required_envs = { - "PATH": os.getenv("PATH", ""), - "PYTHONPATH": os.getenv("PYTHONPATH", ""), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), - "FLAGS_rpc_deadline": "5000", # 5sec to fail fast - "http_proxy": "", - "CPU_NUM": "2" - } - - required_envs.update(need_envs) - - if check_error_log: - required_envs["GLOG_v"] = "3" - required_envs["GLOG_logtostderr"] = "1" - - tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) - - def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) - - class TestDistMnistAsync2x2(TestFleetBase): def _setup_config(self): self._mode = "async" @@ -115,44 +49,7 @@ class TestDistMnistAsync2x2(TestFleetBase): def test_dist_train(self): self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) - - -# @unittest.skip(reason="Skip unstable ut, reader need to be rewrite") -class TestDistMnistAsyncDataset2x2(TestFleetBase): - def _setup_config(self): - self._mode = "async" - self._reader = "dataset" - - def check_with_place(self, - model_file, - delta=1e-3, - check_error_log=False, - need_envs={}): - required_envs = { - "PATH": os.getenv("PATH", ""), - "PYTHONPATH": os.getenv("PYTHONPATH", ""), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), - "FLAGS_rpc_deadline": "5000", # 5sec to fail fast - "http_proxy": "", - "SAVE_MODEL": "1", - "dump_param": "concat_0.tmp_0", - "dump_fields": "dnn-fc-3.tmp_0,dnn-fc-3.tmp_0@GRAD", - "dump_fields_path": tempfile.mkdtemp(), - "Debug": "1" - } - - required_envs.update(need_envs) - - if check_error_log: - required_envs["GLOG_v"] = "3" - required_envs["GLOG_logtostderr"] = "1" - - tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) - - def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) class TestDistCtrHalfAsync2x2(TestFleetBase): @@ -187,7 +84,7 @@ class TestDistCtrHalfAsync2x2(TestFleetBase): def test_dist_train(self): self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py new file mode 100644 index 0000000000000000000000000000000000000000..7cec9c936908629188eda3dfbdb51bd1aa88b331 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py @@ -0,0 +1,94 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import unittest +import tempfile +from test_dist_fleet_base import TestFleetBase + + +class TestDistMnistSync2x2(TestFleetBase): + def _setup_config(self): + self._mode = "sync" + self._reader = "pyreader" + self._need_test = 1 + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "CPU_NUM": "2" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + + +# @unittest.skip(reason="Skip unstable ut, reader need to be rewrite") +class TestDistMnistAsyncDataset2x2(TestFleetBase): + def _setup_config(self): + self._mode = "async" + self._reader = "dataset" + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "SAVE_MODEL": "1", + "dump_param": "concat_0.tmp_0", + "dump_fields": "dnn-fc-3.tmp_0,dnn-fc-3.tmp_0@GRAD", + "dump_fields_path": tempfile.mkdtemp(), + "Debug": "1" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py index 7c7253c3745c15662f0fa1125de8d9357de983af..4546c0024b887844ef249032e369c6bc2022c181 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py @@ -19,6 +19,9 @@ import shutil import tempfile import unittest import paddle + +paddle.enable_static() + import paddle.fluid as fluid from test_dist_fleet_base import TestFleetBase @@ -110,7 +113,7 @@ class TestDistMnistAsync2x2WithDecay(TestFleetBase): "FLAGS_rpc_deadline": "5000", # 5sec to fail fast "http_proxy": "", "CPU_NUM": "2", - "DECAY": "1" + "DECAY": "0" } required_envs.update(need_envs) @@ -163,6 +166,7 @@ class TestDistMnistAsync2x2WithUnifrom(TestFleetBase): check_error_log=True) +@unittest.skip(reason="Skip unstable ut, need tensor table to enhance") class TestDistMnistAsync2x2WithGauss(TestFleetBase): def _setup_config(self): self._mode = "async"