From 02842cfc2508240cca89bac59d1beaac2f5da2b6 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 13 Apr 2018 13:46:17 +0800 Subject: [PATCH] enhance broadcast_op_handle and gather_op_handle --- .../framework/details/broadcast_op_handle.cc | 71 +++++--- .../details/broadcast_op_handle_test.cc | 151 +++++++++--------- .../framework/details/gather_op_handle.cc | 131 +++++++-------- .../details/gather_op_handle_test.cc | 129 ++++++++------- .../fluid/framework/details/op_handle_base.cc | 15 -- .../fluid/framework/details/op_handle_base.h | 8 - 6 files changed, 266 insertions(+), 239 deletions(-) diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 53e8f9f3665..24115cae819 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -18,45 +18,74 @@ namespace paddle { namespace framework { namespace details { +Tensor *GetTensorFromVar(Variable *in_var) { + if (in_var->IsType()) { + return in_var->GetMutable(); + } else if (in_var->IsType()) { + return in_var->GetMutable()->mutable_value(); + } else { + PADDLE_THROW("Var should be LoDTensor or SelectedRows"); + } + return nullptr; +} + BroadcastOpHandle::BroadcastOpHandle(const std::vector &local_scopes, const std::vector &places) : local_scopes_(local_scopes), places_(places) {} void BroadcastOpHandle::RunImpl() { - PADDLE_ENFORCE_EQ(this->inputs_.size(), 1, + // the input may have dummy var. + std::vector in_var_handle; + for (auto *in : inputs_) { + auto *out_handle = dynamic_cast(in); + if (out_handle) { + in_var_handle.push_back(out_handle); + } + } + PADDLE_ENFORCE_EQ(in_var_handle.size(), 1, "The number of input should be one."); + + // the output may have dummy var. + std::vector out_var_handles; + for (auto *out : outputs_) { + auto *out_handle = dynamic_cast(out); + if (out_handle) { + out_var_handles.push_back(out_handle); + } + } + PADDLE_ENFORCE_EQ( - this->outputs_.size(), places_.size(), + out_var_handles.size(), places_.size(), "The number of output should equal to the number of places."); // Wait input done, this Wait is asynchronous operation - auto in_var_handle = static_cast(this->inputs_[0]); - auto &in_place = in_var_handle->place_; - if (inputs_[0]->generated_op_) { - inputs_[0]->generated_op_->Wait(dev_ctxes_[in_place]); - for (auto *out : outputs_) { - auto out_handle = static_cast(out); - auto &out_p = out_handle->place_; - inputs_[0]->generated_op_->Wait(dev_ctxes_[out_p]); + auto &in_place = in_var_handle[0]->place_; + if (in_var_handle[0]->generated_op_) { + in_var_handle[0]->generated_op_->Wait(dev_ctxes_[in_place]); + for (auto *out : out_var_handles) { + auto &out_p = out->place_; + if (platform::is_same_place(in_place, out_p)) continue; + in_var_handle[0]->generated_op_->Wait(dev_ctxes_[out_p]); } } - auto in_scope_idx = in_var_handle->scope_idx_; + // + auto in_scope_idx = in_var_handle[0]->scope_idx_; PADDLE_ENFORCE_LT(in_scope_idx, local_scopes_.size(), "The input(%s) is not in the local_scopes.", - in_var_handle->name_); - auto in_var = local_scopes_[in_scope_idx]->FindVar(in_var_handle->name_); - + in_var_handle[0]->name_); + auto in_var = local_scopes_[in_scope_idx]->FindVar(in_var_handle[0]->name_); Tensor *in_tensor = GetTensorFromVar(in_var); - for (auto *out : outputs_) { - auto out_handle = static_cast(out); - auto &out_p = out_handle->place_; - auto out_scope_idx = out_handle->scope_idx_; + for (auto *out : out_var_handles) { + auto &out_p = out->place_; + + auto out_scope_idx = out->scope_idx_; PADDLE_ENFORCE_LT(out_scope_idx, local_scopes_.size(), - "%s is not in the local_scopes ", out_handle->name_); + "%s is not in the local_scopes ", out->name_); + auto *s = local_scopes_[out_scope_idx]; - auto out_var = s->FindVar(out_handle->name_); + auto out_var = s->FindVar(out->name_); PADDLE_ENFORCE_EQ(out_p.which(), in_place.which(), "The place of input and output should be the same."); @@ -89,7 +118,7 @@ void BroadcastOpHandle::RunImpl() { auto dst_gpu_place = boost::get(out_p); void *dst_ptr = out_tensor->mutable_data(out_p); void *src_ptr = in_tensor->data(); - int64_t size = in_tensor->numel(); + int64_t size = in_tensor->numel() * SizeOfType(in_tensor->type()); memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, reinterpret_cast(dev_ctxes_[out_p]) diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.cc b/paddle/fluid/framework/details/broadcast_op_handle_test.cc index 9bf72f03602..dfc52b012f8 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.cc @@ -27,8 +27,20 @@ namespace p = paddle::platform; // test data amount const f::DDim kDims = {20, 20}; -class BroadcastTester : public ::testing::Test { - public: +struct TestBroadcastOpHandle { + std::vector> ctxs_; + std::vector local_scopes_; + Scope g_scope_; + std::unique_ptr op_handle_; + std::vector> vars_; + std::vector gpu_list_; + + void WaitAll() { + for (size_t j = 0; j < ctxs_.size(); ++j) { + ctxs_[j]->Wait(); + } + } + void InitCtxOnGpu(bool use_gpu) { if (use_gpu) { #ifdef PADDLE_WITH_CUDA @@ -57,61 +69,56 @@ class BroadcastTester : public ::testing::Test { } } - void BroadcastInitOp(int input_scope_idx) { + void InitBroadcastOp(size_t input_scope_idx) { for (size_t j = 0; j < gpu_list_.size(); ++j) { - local_scope_.push_back(&g_scope_.NewScope()); - local_scope_[j]->Var("out"); + local_scopes_.push_back(&(g_scope_.NewScope())); + local_scopes_[j]->Var("out"); } - local_scope_[input_scope_idx]->Var("input"); + local_scopes_[input_scope_idx]->Var("input"); - bc_op_handle_ = new f::details::BroadcastOpHandle(local_scope_, gpu_list_); + op_handle_.reset(new BroadcastOpHandle(local_scopes_, gpu_list_)); - f::details::VarHandle* in_var_handle = new f::details::VarHandle(); + vars_.emplace_back(new VarHandle()); + VarHandle* in_var_handle = static_cast(vars_.back().get()); in_var_handle->place_ = gpu_list_[input_scope_idx]; in_var_handle->name_ = "input"; in_var_handle->version_ = 1; in_var_handle->scope_idx_ = input_scope_idx; in_var_handle->generated_op_ = nullptr; - bc_op_handle_->AddInput(in_var_handle); + op_handle_->AddInput(in_var_handle); + + // add dummy var + vars_.emplace_back(new DummyVarHandle()); + DummyVarHandle* dummy_var_handle = + static_cast(vars_.back().get()); + dummy_var_handle->generated_op_ = nullptr; + op_handle_->AddInput(dummy_var_handle); for (size_t j = 0; j < gpu_list_.size(); ++j) { - bc_op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j]; - f::details::VarHandle* out_var_handle = new f::details::VarHandle(); + op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get(); + vars_.emplace_back(new VarHandle()); + VarHandle* out_var_handle = static_cast(vars_.back().get()); out_var_handle->place_ = gpu_list_[j]; out_var_handle->name_ = "out"; out_var_handle->version_ = 2; out_var_handle->scope_idx_ = j; - bc_op_handle_->AddOutput(out_var_handle); - } - } - void BroadcastOpDestroy() { - for (auto in : bc_op_handle_->inputs_) { - delete in; - } - for (auto out : bc_op_handle_->outputs_) { - delete out; + op_handle_->AddOutput(out_var_handle); } - delete bc_op_handle_; - for (size_t j = 0; j < ctxs_.size(); ++j) { - delete ctxs_[j]; - } - } - void WaitAll() { - for (size_t j = 0; j < ctxs_.size(); ++j) { - ctxs_[j]->Wait(); - } + // add dummy var + vars_.emplace_back(new DummyVarHandle()); + DummyVarHandle* out_dummy_var_handle = + static_cast(vars_.back().get()); + out_dummy_var_handle->generated_op_ = nullptr; + op_handle_->AddOutput(out_dummy_var_handle); } - void TestBroadcastLodTensor() { - int input_scope_idx = 0; - BroadcastInitOp(input_scope_idx); - - auto in_var = local_scope_[input_scope_idx]->Var("input"); + void TestBroadcastLodTensor(size_t input_scope_idx) { + auto in_var = local_scopes_[input_scope_idx]->Var("input"); auto in_lod_tensor = in_var->GetMutable(); in_lod_tensor->mutable_data(kDims, gpu_list_[input_scope_idx]); - std::vector send_vector(f::product(kDims), input_scope_idx + 12); + std::vector send_vector(static_cast(f::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k; } @@ -120,13 +127,13 @@ class BroadcastTester : public ::testing::Test { send_vector, *(ctxs_[input_scope_idx]), in_lod_tensor); in_lod_tensor->set_lod(lod); - bc_op_handle_->Run(false); + op_handle_->Run(false); WaitAll(); p::CPUPlace cpu_place; for (size_t j = 0; j < gpu_list_.size(); ++j) { - auto out_var = local_scope_[j]->Var("out"); + auto out_var = local_scopes_[j]->Var("out"); auto out_tensor = out_var->Get(); PADDLE_ENFORCE_EQ(out_tensor.lod(), lod, "lod is not equal."); @@ -134,42 +141,37 @@ class BroadcastTester : public ::testing::Test { f::TensorCopy(out_tensor, cpu_place, *(ctxs_[j]), &result_tensor); float* ct = result_tensor.mutable_data(cpu_place); - for (int64_t j = 0; j < f::product(kDims); ++j) { - ASSERT_NEAR(ct[j], send_vector[j], 1e-5); + for (int64_t i = 0; i < f::product(kDims); ++i) { + ASSERT_NEAR(ct[i], send_vector[i], 1e-5); } } - - BroadcastOpDestroy(); } - void TestBroadcastSelectedRows() { - int input_scope_idx = 0; - BroadcastInitOp(input_scope_idx); - - auto in_var = local_scope_[input_scope_idx]->Var("input"); + void TestBroadcastSelectedRows(size_t input_scope_idx) { + auto in_var = local_scopes_[input_scope_idx]->Var("input"); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); - int height = kDims[0] * 2; + int height = static_cast(kDims[0]) * 2; std::vector rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; in_selected_rows->set_height(height); in_selected_rows->set_rows(rows); - std::vector send_vector(f::product(kDims)); + std::vector send_vector(static_cast(f::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k; } paddle::framework::TensorFromVector( send_vector, *(ctxs_[input_scope_idx]), value); - bc_op_handle_->Run(false); + op_handle_->Run(false); WaitAll(); p::CPUPlace cpu_place; for (size_t j = 0; j < gpu_list_.size(); ++j) { - auto out_var = local_scope_[j]->Var("out"); + auto out_var = local_scopes_[j]->Var("out"); auto& out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); @@ -183,41 +185,44 @@ class BroadcastTester : public ::testing::Test { f::TensorCopy(rt, cpu_place, *(ctxs_[j]), &result_tensor); float* ct = result_tensor.data(); - for (int64_t j = 0; j < f::product(kDims); ++j) { - ASSERT_NEAR(ct[j], send_vector[j], 1e-5); + for (int64_t i = 0; i < f::product(kDims); ++i) { + ASSERT_NEAR(ct[i], send_vector[i], 1e-5); } } - - BroadcastOpDestroy(); } - - public: - f::Scope g_scope_; - std::vector ctxs_; - std::vector local_scope_; - std::vector gpu_list_; - f::details::BroadcastOpHandle* bc_op_handle_; }; -TEST_F(BroadcastTester, TestCPUBroadcastTestLodTensor) { - InitCtxOnGpu(false); - TestBroadcastLodTensor(); +TEST(BroadcastTester, TestCPUBroadcastTestLodTensor) { + TestBroadcastOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(false); + test_op.InitBroadcastOp(input_scope_idx); + test_op.TestBroadcastLodTensor(input_scope_idx); } -TEST_F(BroadcastTester, TestCPUBroadcastTestSelectedRows) { - InitCtxOnGpu(false); - TestBroadcastSelectedRows(); +TEST(BroadcastTester, TestCPUBroadcastTestSelectedRows) { + TestBroadcastOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(false); + test_op.InitBroadcastOp(input_scope_idx); + test_op.TestBroadcastSelectedRows(input_scope_idx); } #ifdef PADDLE_WITH_CUDA -TEST_F(BroadcastTester, TestGPUBroadcastTestLodTensor) { - InitCtxOnGpu(true); - TestBroadcastLodTensor(); +TEST(BroadcastTester, TestGPUBroadcastTestLodTensor) { + TestBroadcastOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(true); + test_op.InitBroadcastOp(input_scope_idx); + test_op.TestBroadcastLodTensor(input_scope_idx); } -TEST_F(BroadcastTester, TestGPUBroadcastTestSelectedRows) { - InitCtxOnGpu(true); - TestBroadcastSelectedRows(); +TEST(BroadcastTester, TestGPUBroadcastTestSelectedRows) { + TestBroadcastOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(true); + test_op.InitBroadcastOp(input_scope_idx); + test_op.TestBroadcastSelectedRows(input_scope_idx); } #endif diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index f9dfb2f5c69..3c3054c03d9 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -23,32 +23,54 @@ GatherOpHandle::GatherOpHandle(const std::vector &local_scopes, : local_scopes_(local_scopes), places_(places) {} void GatherOpHandle::RunImpl() { + // the input may have dummy var. + std::vector in_var_handles; + for (auto *in : inputs_) { + auto *in_handle = dynamic_cast(in); + if (in_handle) { + in_var_handles.push_back(in_handle); + } + } PADDLE_ENFORCE_EQ( - this->inputs_.size(), places_.size(), - "The number of inputs should be equal to the number of place."); - PADDLE_ENFORCE_EQ(this->outputs_.size(), 1, + in_var_handles.size(), places_.size(), + "The number of output should equal to the number of places."); + + // the output may have dummy var. + std::vector out_var_handles; + for (auto *out : outputs_) { + auto *out_handle = dynamic_cast(out); + if (out_handle) { + out_var_handles.push_back(out_handle); + } + } + PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, "The number of output should be one."); - auto in_0_handle = static_cast(inputs_[0]); + + auto in_0_handle = static_cast(in_var_handles[0]); auto pre_in_var = local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_); + auto pre_place = in_0_handle->place_; + PADDLE_ENFORCE(pre_in_var->IsType(), "Currently, gather_op only can gather SelectedRows."); - auto pre_place = in_0_handle->place_; + + PADDLE_ENFORCE_EQ(out_var_handles[0]->place_.which(), pre_place.which(), + "The place of input and output should be the same."); // Wait input done, this Wait is asynchronous operation - for (auto *in : inputs_) { - if (inputs_[0]->generated_op_) { - auto &p = static_cast(in)->place_; - in->generated_op_->Wait(dev_ctxes_[p]); + for (auto *in : in_var_handles) { + if (in->generated_op_) { + in->generated_op_->Wait(dev_ctxes_[in->place_]); } } std::vector out_rows; - std::vector in_tensors; + std::vector in_tensors; std::vector in_places; + auto &pre_in = pre_in_var->Get(); // gather the inputs - for (auto *in : inputs_) { + for (auto *in : in_var_handles) { auto in_handle = static_cast(in); auto in_p = in_handle->place_; in_places.push_back(in_p); @@ -58,63 +80,46 @@ void GatherOpHandle::RunImpl() { "The place of input should be the same."); auto *s = local_scopes_[in_handle->scope_idx_]; auto in_var = s->FindVar(in_handle->name_); - PADDLE_ENFORCE_EQ(in_var->Type(), pre_in_var->Type(), + + auto &in_sr = in_var->Get(); + + PADDLE_ENFORCE_EQ(in_sr.value().type(), pre_in.value().type(), "The type of input is not consistent."); + PADDLE_ENFORCE_EQ(pre_in.height(), in_sr.height(), + "The height of inputs is not consistent."); + PADDLE_ENFORCE_EQ(pre_in.GetCompleteDims(), in_sr.GetCompleteDims(), , + "The dims of inputs is not consistent."); - if (in_var->IsType()) { - auto &pre_in = pre_in_var->Get(); - auto &in_sr = in_var->Get(); - auto in_sr_rows = in_sr.rows(); - out_rows.insert(out_rows.begin(), in_sr_rows.begin(), in_sr_rows.end()); - PADDLE_ENFORCE_EQ(pre_in.height(), in_sr.height(), - "The height of inputs is not consistent."); - PADDLE_ENFORCE_EQ(pre_in.GetCompleteDims(), in_sr.GetCompleteDims(), , - "The dims of inputs is not consistent."); - } else if (in_var->IsType()) { - auto &pre_in = pre_in_var->Get(); - auto &in_lodtensor = in_var->Get(); - PADDLE_ENFORCE_EQ(in_lodtensor.lod(), pre_in.lod(), - "The lod of inputs is not consistent."); - PADDLE_ENFORCE_EQ(in_lodtensor.dims(), pre_in.dims(), - "The dims of inputs is not consistent."); - } else { - PADDLE_THROW("Var should be LoDTensor or SelectedRows."); - } - in_tensors.push_back(GetTensorFromVar(in_var)); - pre_in_var = in_var; + auto in_sr_rows = in_sr.rows(); + out_rows.insert(out_rows.end(), in_sr_rows.begin(), in_sr_rows.end()); + + in_tensors.emplace_back(in_sr.value()); } // write the output - auto out_handle = static_cast(this->outputs_[0]); - auto &out_place = out_handle->place_; - auto out_scope_idx = out_handle->scope_idx_; - auto out_var = local_scopes_[out_scope_idx]->FindVar(out_handle->name_); - PADDLE_ENFORCE_EQ(out_place.which(), pre_place.which(), - "The place of input and output should be the same."); - if (pre_in_var->IsType()) { - auto &pre_in = pre_in_var->Get(); - auto out = out_var->GetMutable(); - out->set_height(pre_in.height()); - out->set_rows(out_rows); - size_t rows = out_rows.size(); - DDim out_dim = pre_in.GetCompleteDims(); - out_dim[0] = static_cast(rows); - out->mutable_value()->Resize(out_dim); - out->mutable_value()->mutable_data(out_place, pre_in.value().type()); - auto out_tensor = out->mutable_value(); - // copy - int s = 0, e = 0; - for (size_t j = 0; j < in_tensors.size(); ++j) { - e += in_tensors[j]->dims()[0]; - auto sub_out = out_tensor->Slice(s, e); - paddle::framework::TensorCopy(*(in_tensors[j]), out_place, - *(dev_ctxes_[in_places[j]]), &sub_out); - s = e; - } - } else if (pre_in_var->IsType()) { - PADDLE_THROW("Currently, Var only can be SelectedRows."); - } else { - PADDLE_THROW("Var should be SelectedRows."); + auto &out_place = out_var_handles[0]->place_; + auto out_scope_idx = out_var_handles[0]->scope_idx_; + auto out_var = + local_scopes_[out_scope_idx]->FindVar(out_var_handles[0]->name_); + + auto out = out_var->GetMutable(); + out->set_height(pre_in.height()); + out->set_rows(out_rows); + size_t rows = out_rows.size(); + DDim out_dim = pre_in.GetCompleteDims(); + out_dim[0] = static_cast(rows); + out->mutable_value()->Resize(out_dim); + out->mutable_value()->mutable_data(out_place, pre_in.value().type()); + Tensor *out_tensor = out->mutable_value(); + + // copy + int s = 0, e = 0; + for (size_t j = 0; j < in_tensors.size(); ++j) { + e += in_tensors[j].dims()[0]; + auto sub_out = out_tensor->Slice(s, e); + paddle::framework::TensorCopy(in_tensors[j], out_place, + *(dev_ctxes_[in_places[j]]), &sub_out); + s = e; } } diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index 3cf21553207..10839f239d5 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -26,14 +26,26 @@ namespace p = paddle::platform; // test data amount const f::DDim kDims = {20, 20}; -class GatherTester : public ::testing::Test { - public: +struct TestGatherOpHandle { + std::vector> ctxs_; + std::vector local_scopes_; + Scope g_scope_; + std::unique_ptr op_handle_; + std::vector> vars_; + std::vector gpu_list_; + + void WaitAll() { + for (size_t j = 0; j < ctxs_.size(); ++j) { + ctxs_[j]->Wait(); + } + } + void InitCtxOnGpu(bool use_gpu) { if (use_gpu) { #ifdef PADDLE_WITH_CUDA int count = p::GetCUDADeviceCount(); if (count <= 1) { - LOG(WARNING) << "Cannot test multi-gpu Gather, because the CUDA " + LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " "device count is " << count; exit(0); @@ -56,57 +68,51 @@ class GatherTester : public ::testing::Test { } } - void InitGatherOp(int input_scope_idx) { + void InitGatherOp(size_t input_scope_idx) { for (size_t j = 0; j < gpu_list_.size(); ++j) { - local_scope_.push_back(&g_scope_.NewScope()); - local_scope_[j]->Var("input"); + local_scopes_.push_back(&(g_scope_.NewScope())); + local_scopes_[j]->Var("out"); } - local_scope_[input_scope_idx]->Var("out"); - - gather_op_handle_ = new f::details::GatherOpHandle(local_scope_, gpu_list_); - - f::details::VarHandle* out_var_handle = new f::details::VarHandle(); - out_var_handle->place_ = gpu_list_[input_scope_idx]; - out_var_handle->name_ = "out"; - out_var_handle->version_ = 2; - out_var_handle->scope_idx_ = input_scope_idx; - out_var_handle->generated_op_ = gather_op_handle_; - gather_op_handle_->AddOutput(out_var_handle); + local_scopes_[input_scope_idx]->Var("input"); + op_handle_.reset(new GatherOpHandle(local_scopes_, gpu_list_)); + // add input for (size_t j = 0; j < gpu_list_.size(); ++j) { - gather_op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j]; - f::details::VarHandle* in_var_handle = new f::details::VarHandle(); + op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get(); + vars_.emplace_back(new VarHandle()); + VarHandle* in_var_handle = static_cast(vars_.back().get()); in_var_handle->place_ = gpu_list_[j]; in_var_handle->name_ = "input"; in_var_handle->version_ = 1; in_var_handle->scope_idx_ = j; in_var_handle->generated_op_ = nullptr; - gather_op_handle_->AddInput(in_var_handle); - } - } - void GatherOpDestroy() { - for (auto in : gather_op_handle_->inputs_) { - delete in; - } - for (auto out : gather_op_handle_->outputs_) { - delete out; - } - delete gather_op_handle_; - for (size_t j = 0; j < ctxs_.size(); ++j) { - delete ctxs_[j]; + op_handle_->AddInput(in_var_handle); } - } - void WaitAll() { - for (size_t j = 0; j < ctxs_.size(); ++j) { - ctxs_[j]->Wait(); - } - } + // add dummy var + vars_.emplace_back(new DummyVarHandle()); + DummyVarHandle* in_dummy_var_handle = + static_cast(vars_.back().get()); + in_dummy_var_handle->generated_op_ = nullptr; + op_handle_->AddInput(in_dummy_var_handle); + + // add output + vars_.emplace_back(new VarHandle()); + VarHandle* out_var_handle = static_cast(vars_.back().get()); + out_var_handle->place_ = gpu_list_[input_scope_idx]; + out_var_handle->name_ = "out"; + out_var_handle->version_ = 2; + out_var_handle->scope_idx_ = input_scope_idx; + op_handle_->AddOutput(out_var_handle); - void TestGatherSelectedRows() { - int output_scope_idx = 0; - InitGatherOp(output_scope_idx); + // add dummy var + vars_.emplace_back(new DummyVarHandle()); + DummyVarHandle* dummy_var_handle = + static_cast(vars_.back().get()); + op_handle_->AddOutput(dummy_var_handle); + } + void TestGatherSelectedRows(size_t output_scope_idx) { int height = kDims[0] * 2; std::vector rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; @@ -117,7 +123,7 @@ class GatherTester : public ::testing::Test { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { - auto in_var = local_scope_[input_scope_idx]->Var("input"); + auto in_var = local_scopes_[input_scope_idx]->Var("input"); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); @@ -130,13 +136,21 @@ class GatherTester : public ::testing::Test { value->Resize(kDims); } - gather_op_handle_->Run(false); + auto out_var = local_scopes_[output_scope_idx]->Var("out"); + auto out_selected_rows = out_var->GetMutable(); + + auto in_var = local_scopes_[output_scope_idx]->Var("input"); + auto in_selected_rows = in_var->GetMutable(); + + out_selected_rows->mutable_value()->ShareDataWith( + in_selected_rows->value()); + + op_handle_->Run(false); WaitAll(); p::CPUPlace cpu_place; - auto out_var = local_scope_[output_scope_idx]->Var("out"); auto& out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); @@ -152,28 +166,25 @@ class GatherTester : public ::testing::Test { for (int64_t j = 0; j < f::product(kDims); ++j) { ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); } - - GatherOpDestroy(); } - - public: - f::Scope g_scope_; - std::vector ctxs_; - std::vector local_scope_; - std::vector gpu_list_; - f::details::GatherOpHandle* gather_op_handle_; }; -TEST_F(GatherTester, TestCPUGatherTestSelectedRows) { - InitCtxOnGpu(false); - TestGatherSelectedRows(); +TEST(GatherTester, TestCPUGatherTestSelectedRows) { + TestGatherOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(false); + test_op.InitGatherOp(input_scope_idx); + test_op.TestGatherSelectedRows(input_scope_idx); } #ifdef PADDLE_WITH_CUDA -TEST_F(GatherTester, TestGPUGatherTestSelectedRows) { - InitCtxOnGpu(true); - TestGatherSelectedRows(); +TEST(GatherTester, TestGPUGatherTestSelectedRows) { + TestGatherOpHandle test_op; + size_t input_scope_idx = 0; + test_op.InitCtxOnGpu(false); + test_op.InitGatherOp(input_scope_idx); + test_op.TestGatherSelectedRows(input_scope_idx); } #endif } // namespace details diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 0d7fbdfeab4..e4194a7442f 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -17,21 +17,6 @@ namespace paddle { namespace framework { namespace details { - -// GetTensorFromVar is used in broadcast_op handle and gather_op handle, so it -// should be placed in a commonplace. I don't find an appropriate place, so I -// temporarily place it in op_handle_base. -Tensor *GetTensorFromVar(Variable *in_var) { - if (in_var->IsType()) { - return in_var->GetMutable(); - } else if (in_var->IsType()) { - return in_var->GetMutable()->mutable_value(); - } else { - PADDLE_THROW("Var should be LoDTensor or SelectedRows"); - } - return nullptr; -} - std::string OpHandleBase::DebugString() const { std::stringstream ss; ss << "("; diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index b733817dcd8..fbdb54ba8d9 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -17,9 +17,6 @@ #include #include "paddle/fluid/framework/details/var_handle.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" @@ -27,11 +24,6 @@ namespace paddle { namespace framework { namespace details { -// GetTensorFromVar is used in broadcast_op handle and gather_op handle, so it -// should be placed in a commonplace. I don't find an appropriate place, so I -// temporarily place it in op_handle. -Tensor *GetTensorFromVar(Variable *in_var); - constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@"; class OpHandleBase { -- GitLab