From c60f312d1b6692032345bd903e652e715702e96d Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Sun, 24 Mar 2019 23:24:34 +0800 Subject: [PATCH] add trick --- .../distributed/async_sparse_param_update_recorder.h | 2 ++ paddle/fluid/operators/distributed/parameter_recv.cc | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h index 037187ea9..49e7c07e5 100644 --- a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h +++ b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h @@ -57,6 +57,8 @@ class ConcurrentSet { std::future GetAndClear(std::vector* result) { auto task = [this, &result] { result->clear(); + // FIXME(qiao): use a trick to avoid the bug of recv an selected rows + result->push_back(0); for (auto& id : set_) { result->push_back(id); } diff --git a/paddle/fluid/operators/distributed/parameter_recv.cc b/paddle/fluid/operators/distributed/parameter_recv.cc index a5983593c..7e44bfc82 100644 --- a/paddle/fluid/operators/distributed/parameter_recv.cc +++ b/paddle/fluid/operators/distributed/parameter_recv.cc @@ -108,7 +108,9 @@ void ParameterRecv::operator()(const RpcContext &rpc_ctx, VLOG(3) << "recv_slr size: " << recv_slr.rows().size() << " " << sstream.str(); } - for (auto i = 0; i < recv_slr.rows().size(); ++i) { + + // FIXME(qiao): use a trick to avoid the bug of recv an selected rows + for (auto i = 1; i < recv_slr.rows().size(); ++i) { auto row_id = recv_slr.rows()[i] + row_offset; PADDLE_ENFORCE_LT(row_id, recv_dims[1]); memcpy(recv_tensor->data() + row_id * width, -- GitLab