未验证 提交 155b4f9b 编写于 作者: C Chen Weihang 提交者: GitHub

Remove selected rows all reduce over height check (#28460)

* remove slelected rows all reduce over height check

* polish unittest
上级 b5e662f8
......@@ -53,7 +53,7 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
static void AllReduce(const framework::SelectedRows &src,
framework::SelectedRows *dst,
const ParallelStrategy &strategy, cudaStream_t stream) {
VLOG(0) << "SelectedRows AllReduce start";
VLOG(3) << "SelectedRows AllReduce start";
const auto &src_tensor = src.value();
const auto &place = src_tensor.place();
PADDLE_ENFORCE_EQ(
......@@ -87,18 +87,10 @@ static void AllReduce(const framework::SelectedRows &src,
static_cast<int64_t>(0));
dst->set_height(src.height());
VLOG(0) << "Gather rows: " << string::join_strings(rows_num_vector, ',')
VLOG(3) << "Gather rows: " << string::join_strings(rows_num_vector, ',')
<< ", total rows number: " << rows_num
<< ", height: " << src.height();
PADDLE_ENFORCE_LE(
rows_num, src.height(),
platform::errors::Unimplemented(
"The gathered SelectedRows's rows number should less than or equal "
"to the SelectedRows's height, but the actual rows number is %d, the "
"SelectedRows's height is %d.",
rows_num, src.height()));
auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num);
auto *dst_rows_ptr = dst_rows->CUDAMutableData(place);
......@@ -130,9 +122,9 @@ static void AllReduce(const framework::SelectedRows &src,
}
}
VLOG(0) << "Original SelectedRows rows: "
VLOG(3) << "Original SelectedRows rows: "
<< string::join_strings(src_rows, ',');
VLOG(0) << "Result SelectedRows rows: "
VLOG(3) << "Result SelectedRows rows: "
<< string::join_strings(*dst_rows, ',');
}
#endif
......
......@@ -12,6 +12,7 @@ string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
list(APPEND DIST_TEST_OPS test_parallel_dygraph_mnist)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_se_resnext)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height)
list(APPEND DIST_TEST_OPS test_parallel_dygraph_transformer)
list(APPEND DIST_TEST_OPS test_listen_and_serv_op)
list(APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer)
......@@ -127,6 +128,7 @@ if (NOT ${WITH_GPU})
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mnist) # TODO(Yancey1989): parallel dygraph support CPU device in future
list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext)
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding)
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height)
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer)
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm)
LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision)
......@@ -139,6 +141,7 @@ endif()
if (WITH_NCCL)
if (${NCCL_VERSION} VERSION_LESS 2212)
LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding)
LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height)
LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_transformer)
endif()
endif()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from parallel_dygraph_sparse_embedding import SimpleNet, fake_sample_reader, TestSparseEmbedding
from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
# global configs
# using small `vocab_size` to test rows number over height
batch_size = 4
batch_num = 200
hidden_size = 10
vocab_size = 10
num_steps = 3
init_scale = 0.1
class TestSparseEmbeddingOverHeight(TestSparseEmbedding):
def get_model(self):
model = SimpleNet(
hidden_size=hidden_size,
vocab_size=vocab_size,
num_steps=num_steps,
init_scale=init_scale,
is_sparse=True)
train_reader = paddle.batch(
fake_sample_reader(), batch_size=batch_size, drop_last=True)
optimizer = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
return model, train_reader, optimizer
if __name__ == "__main__":
runtime_main(TestSparseEmbeddingOverHeight)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import unittest
import paddle.fluid as fluid
from test_dist_base import TestDistBase
from spawn_runner_base import TestDistSpawnRunner
from parallel_dygraph_sparse_embedding_over_height import TestSparseEmbeddingOverHeight
flag_name = os.path.splitext(__file__)[0]
class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase):
def _setup_config(self):
self._sync_mode = False
self._nccl2_mode = True
self._dygraph = True
def test_sparse_embedding(self):
if fluid.core.is_compiled_with_cuda():
self.check_with_place(
"parallel_dygraph_sparse_embedding_over_height.py",
delta=1e-5,
check_error_log=True,
log_name=flag_name)
class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner):
def test_sparse_embedding_with_spawn(self):
if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
self.check_dist_result_with_spawn(
test_class=TestSparseEmbeddingOverHeight, delta=1e-5)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册