diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index 42922aa6f3a75de2539d03104a188e400e207fc7..0a601417de1478103acec3d5a0a00f1e37b0d599 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -53,7 +53,7 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, static void AllReduce(const framework::SelectedRows &src, framework::SelectedRows *dst, const ParallelStrategy &strategy, cudaStream_t stream) { - VLOG(0) << "SelectedRows AllReduce start"; + VLOG(3) << "SelectedRows AllReduce start"; const auto &src_tensor = src.value(); const auto &place = src_tensor.place(); PADDLE_ENFORCE_EQ( @@ -87,18 +87,10 @@ static void AllReduce(const framework::SelectedRows &src, static_cast(0)); dst->set_height(src.height()); - VLOG(0) << "Gather rows: " << string::join_strings(rows_num_vector, ',') + VLOG(3) << "Gather rows: " << string::join_strings(rows_num_vector, ',') << ", total rows number: " << rows_num << ", height: " << src.height(); - PADDLE_ENFORCE_LE( - rows_num, src.height(), - platform::errors::Unimplemented( - "The gathered SelectedRows's rows number should less than or equal " - "to the SelectedRows's height, but the actual rows number is %d, the " - "SelectedRows's height is %d.", - rows_num, src.height())); - auto *dst_rows = dst->mutable_rows(); dst_rows->resize(rows_num); auto *dst_rows_ptr = dst_rows->CUDAMutableData(place); @@ -130,9 +122,9 @@ static void AllReduce(const framework::SelectedRows &src, } } - VLOG(0) << "Original SelectedRows rows: " + VLOG(3) << "Original SelectedRows rows: " << string::join_strings(src_rows, ','); - VLOG(0) << "Result SelectedRows rows: " + VLOG(3) << "Result SelectedRows rows: " << string::join_strings(*dst_rows, ','); } #endif diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 52950a4d92a71a061431e989842395753f9a4c23..de6912e76ddaf43e2591272308d1c6084e5fbace 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -12,6 +12,7 @@ string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") list(APPEND DIST_TEST_OPS test_parallel_dygraph_mnist) list(APPEND DIST_TEST_OPS test_parallel_dygraph_se_resnext) list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding) +list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) list(APPEND DIST_TEST_OPS test_parallel_dygraph_transformer) list(APPEND DIST_TEST_OPS test_listen_and_serv_op) list(APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer) @@ -127,6 +128,7 @@ if (NOT ${WITH_GPU}) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mnist) # TODO(Yancey1989): parallel dygraph support CPU device in future list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding) + LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) @@ -139,6 +141,7 @@ endif() if (WITH_NCCL) if (${NCCL_VERSION} VERSION_LESS 2212) LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding) + LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_transformer) endif() endif() diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py new file mode 100644 index 0000000000000000000000000000000000000000..61749a24c98216c91e03c23cf38eb09b78865bed --- /dev/null +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py @@ -0,0 +1,52 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from parallel_dygraph_sparse_embedding import SimpleNet, fake_sample_reader, TestSparseEmbedding + +from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase + +# global configs +# using small `vocab_size` to test rows number over height +batch_size = 4 +batch_num = 200 +hidden_size = 10 +vocab_size = 10 +num_steps = 3 +init_scale = 0.1 + + +class TestSparseEmbeddingOverHeight(TestSparseEmbedding): + def get_model(self): + model = SimpleNet( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=True) + + train_reader = paddle.batch( + fake_sample_reader(), batch_size=batch_size, drop_last=True) + + optimizer = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=model.parameters()) + + return model, train_reader, optimizer + + +if __name__ == "__main__": + runtime_main(TestSparseEmbeddingOverHeight) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py new file mode 100644 index 0000000000000000000000000000000000000000..9aca448f16121c05249bc3d07f9cadb490ba2d89 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py @@ -0,0 +1,52 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import sys +import unittest + +import paddle.fluid as fluid +from test_dist_base import TestDistBase +from spawn_runner_base import TestDistSpawnRunner +from parallel_dygraph_sparse_embedding_over_height import TestSparseEmbeddingOverHeight + +flag_name = os.path.splitext(__file__)[0] + + +class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase): + def _setup_config(self): + self._sync_mode = False + self._nccl2_mode = True + self._dygraph = True + + def test_sparse_embedding(self): + if fluid.core.is_compiled_with_cuda(): + self.check_with_place( + "parallel_dygraph_sparse_embedding_over_height.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) + + +class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner): + def test_sparse_embedding_with_spawn(self): + if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): + self.check_dist_result_with_spawn( + test_class=TestSparseEmbeddingOverHeight, delta=1e-5) + + +if __name__ == "__main__": + unittest.main()