diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index af1348b765e404426597c82591fbfd2bff6e3400..7488f28f545b93c30eabc871c70a7c7847d6f6f5 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -8,7 +8,11 @@ if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_simple_dist_transpiler) list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op) LIST(REMOVE_ITEM TEST_OPS test_dist_mnist) - LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_nccl) + LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_dgc_nccl) + LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_hallreduce) + LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_multi_comm) + LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_ring_allreduce) + LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_backward_deps) LIST(REMOVE_ITEM TEST_OPS test_dist_mnist_lars) LIST(REMOVE_ITEM TEST_OPS test_dist_word2vec) LIST(REMOVE_ITEM TEST_OPS test_dist_ctr) @@ -215,7 +219,11 @@ if(WITH_DISTRIBUTE) endif() if(NOT APPLE) set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 350) - set_tests_properties(test_dist_mnist_nccl PROPERTIES TIMEOUT 350) + set_tests_properties(test_dist_mnist_dgc_nccl PROPERTIES TIMEOUT 350) + set_tests_properties(test_dist_mnist_hallreduce PROPERTIES TIMEOUT 350) + set_tests_properties(test_dist_mnist_multi_comm PROPERTIES TIMEOUT 350) + set_tests_properties(test_dist_mnist_ring_allreduce PROPERTIES TIMEOUT 350) + set_tests_properties(test_dist_mnist_backward_deps PROPERTIES TIMEOUT 350) set_tests_properties(test_dist_mnist_lars PROPERTIES TIMEOUT 350) set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 350) py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index 68ffe64ced9cca4acccfb5f509a51ad9db8c263d..5d3c0fbdd0c9aebf7b229f77aadafea5fb8a23c6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -46,7 +46,7 @@ class TestDistMnist2x2(TestFleetBase): def test_dist_train(self): self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py new file mode 100644 index 0000000000000000000000000000000000000000..1f6274ec16488323c9f7e6b14a94e0d9182d7aca --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py @@ -0,0 +1,35 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnistNCCL2BackWardDeps(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + self._enable_backward_deps = True + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py new file mode 100644 index 0000000000000000000000000000000000000000..529bd330ac92a6f06a06db3629f518ba4026b6bd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py @@ -0,0 +1,35 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnistNCCL2DGC(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + self._use_dgc = True + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py new file mode 100644 index 0000000000000000000000000000000000000000..247e4c0500f160588261c525f90a404741635170 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py @@ -0,0 +1,35 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnistNCCL2HAllreduce(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + self._use_hallreduce = True + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py new file mode 100644 index 0000000000000000000000000000000000000000..d0a21fe0dca6024ec2a061749560e2d51358687f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py @@ -0,0 +1,35 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnistNCCL2MultiNCCLComm(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + self._nccl_comm_num = 3 + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_nccl.py deleted file mode 100644 index 8718dce5ee53c2234eba41b635ffc8609cc962fe..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_nccl.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import unittest -from test_dist_base import TestDistBase - - -class TestDistMnistNCCL2(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._use_reduce = False - self._use_reader_alloc = False - self._nccl2_mode = True - - def test_dist_train(self): - import paddle.fluid as fluid - if fluid.core.is_compiled_with_cuda(): - self.check_with_place("dist_mnist.py", delta=1e-5) - - -class TestDistMnistNCCL2MultiNCCLComm(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._use_reduce = False - self._use_reader_alloc = False - self._nccl2_mode = True - self._nccl_comm_num = 3 - - def test_dist_train(self): - import paddle.fluid as fluid - if fluid.core.is_compiled_with_cuda(): - self.check_with_place("dist_mnist.py", delta=1e-5) - - -class TestDistMnistNCCL2DGC(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._use_reduce = False - self._use_reader_alloc = False - self._nccl2_mode = True - self._use_dgc = True - - def test_dist_train(self): - import paddle.fluid as fluid - if fluid.core.is_compiled_with_cuda(): - self.check_with_place("dist_mnist.py", delta=1e-5) - - -class TestDistMnistNCCL2BackWardDeps(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._use_reduce = False - self._use_reader_alloc = False - self._nccl2_mode = True - self._enable_backward_deps = True - - def test_dist_train(self): - import paddle.fluid as fluid - if fluid.core.is_compiled_with_cuda(): - self.check_with_place("dist_mnist.py", delta=1e-5) - - -class TestDistMnistNCCL2HAllreduce(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._use_reduce = False - self._use_reader_alloc = False - self._nccl2_mode = True - self._use_hallreduce = True - - def test_dist_train(self): - import paddle.fluid as fluid - if fluid.core.is_compiled_with_cuda(): - self.check_with_place("dist_mnist.py", delta=1e-5) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py new file mode 100644 index 0000000000000000000000000000000000000000..fd15020275bdce1a6424f3134ff089bd761ee1b1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py @@ -0,0 +1,34 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnistNCCL2(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py index 38f7bb80d2f9144800ef8f8fb1402dcf86925067..3e55efb633d77bd9d93b890e60e844b55a4522ed 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py @@ -56,6 +56,7 @@ class TestDistSeResneXtNCCLMP(TestDistBase): self.check_with_place( "dist_se_resnext.py", delta=1e-5, + check_error_log=True, need_envs={"NCCL_P2P_DISABLE": "1"})