From 35074963e359ba9ce5e38279fc1205bcee67157d Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Sun, 27 Sep 2020 12:17:50 +0800 Subject: [PATCH] Refine error msg in paddle/fluid/framework/details [part 2] (#27429) * refine broadcast_op_handle * refine some error messages * refine some files * fix bug * fix bug * fix bug * follow comments * follow comments --- .../framework/details/all_reduce_op_handle.cc | 4 +- .../framework/details/broadcast_op_handle.cc | 32 ++++-- .../details/broadcast_op_handle_test.h | 41 ++++++-- .../fluid/framework/details/build_strategy.cc | 3 +- .../details/eager_deletion_op_handle.cc | 40 +++++--- .../details/fused_all_reduce_op_handle.cc | 85 +++++++++++++--- .../details/fused_broadcast_op_handle.cc | 11 ++- .../details/fused_broadcast_op_handle_test.cc | 5 +- .../framework/details/gather_op_handle.cc | 39 ++++++-- .../details/gather_op_handle_test.cc | 28 +++++- .../fluid/framework/details/nccl_op_handle.h | 61 +++++++----- .../fluid/framework/details/op_handle_base.cc | 49 +++++---- paddle/fluid/framework/details/op_registry.h | 14 +-- .../framework/details/reduce_and_gather.h | 28 +++++- .../framework/details/reduce_op_handle.cc | 57 ++++++++--- .../details/reduce_op_handle_test.cc | 42 ++++++-- .../details/share_tensor_buffer_functor.cc | 9 +- .../details/sparse_all_reduce_op_handle.cc | 99 ++++++++++++++----- 18 files changed, 475 insertions(+), 172 deletions(-) diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 939a2fc8fc..78887f3ac5 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl( platform::errors::InvalidArgument( "The NoDummyInputSize should be equal " "to the number of places, but got NoDummyInputSize is " - "%d and the number of place is %d.", + "%d and the number of places is %d.", in_var_handles.size(), num_places)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), @@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl( platform::errors::InvalidArgument( "The number of local scopes should be equal " "to the number of places, but got the number of local scopes is " - "%d and the number of place is %d.", + "%d and the number of places is %d.", in_var_handles.size(), num_places)); std::vector lod_tensor_data; diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 4c3b0a7c6a..35b1066067 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/broadcast_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/profiler.h" @@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() { auto out_var_handles = DynamicCast(outputs_); PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL, - "The number of input should be one."); - PADDLE_ENFORCE_EQ( - out_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of inputs should be 1, but got %d.", + in_var_handles.size())); + PADDLE_ENFORCE_EQ(out_var_handles.size(), places_.size(), + platform::errors::PreconditionNotMet( + "The number of outputs and the number of places should " + "be equal, but got the number of outputs is %d and the " + "number of places is %d.", + out_var_handles.size(), places_.size())); VarHandle *in_var_handle = in_var_handles[0]; @@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar( const std::vector &var_scopes) { auto *in_var = var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scopes.", + in_var_handle.name())); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); if (UNLIKELY(!in_tensor.IsInitialized())) { VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!"; @@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar( broadcast_calls.emplace_back( [send_recv_buffer, numel, type, root_id, &nccl_ctx] { - PADDLE_ENFORCE(platform::dynload::ncclBcast( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( send_recv_buffer, numel, static_cast(type), root_id, nccl_ctx.comm_, nccl_ctx.stream())); }); @@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar( nccl_ctxs_->DevCtx(p)->Wait(); } #else - PADDLE_THROW("CUDA is not enabled."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } } @@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue( auto t_out_p = out_var_handle->place(); auto *out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, platform::errors::NotFound( + "Variable %s is not found in scopes.", + out_var_handle->name())); if (is_gpu_place(in_tensor.place())) { - PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); } else { t_out_p = platform::CPUPlace(); } diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index e455879a68..4fdc420e1e 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -79,7 +79,8 @@ struct TestBroadcastOpHandle { } nccl_ctxs_.reset(new platform::NCCLContextMap(place_list_)); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { int count = 8; @@ -113,7 +114,8 @@ struct TestBroadcastOpHandle { op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { #if defined(PADDLE_WITH_NCCL) @@ -171,7 +173,9 @@ struct TestBroadcastOpHandle { float val_scalar = 0.0) { auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto lod_tensor = var->GetMutable(); std::vector send_vector(static_cast(f::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { @@ -194,7 +198,9 @@ struct TestBroadcastOpHandle { } auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto selected_rows = var->GetMutable(); auto value = selected_rows->mutable_value(); value->mutable_data(kDims, place_list_[input_scope_idx]); @@ -211,13 +217,24 @@ struct TestBroadcastOpHandle { const std::vector& send_vector, const std::vector& rows, int height) { auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto& selected_rows = var->Get(); auto rt = selected_rows.value(); - PADDLE_ENFORCE_EQ(selected_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(selected_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %ld.", + height, selected_rows.height())); for (size_t k = 0; k < selected_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(selected_rows.rows()[k], rows[k]); + PADDLE_ENFORCE_EQ( + selected_rows.rows()[k], rows[k], + platform::errors::InvalidArgument( + "The item at position %zu of rows of SelectedRows " + "is not equal to the expected, expect %ld, but got %ld.", + k, rows[k], selected_rows.rows()[k])); } p::CPUPlace cpu_place; @@ -235,9 +252,15 @@ struct TestBroadcastOpHandle { framework::Scope* scope) { p::CPUPlace cpu_place; auto var = scope->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto tensor = var->Get(); - PADDLE_ENFORCE_EQ(tensor.lod(), lod, "lod is not equal."); + PADDLE_ENFORCE_EQ(tensor.lod(), lod, + platform::errors::InvalidArgument( + "The LoD of tensor is not equal to " + "the expected, expect %s, but got %s.", + lod, tensor.lod())); f::Tensor result_tensor; f::TensorCopySync(tensor, cpu_place, &result_tensor); float* ct = result_tensor.mutable_data(cpu_place); diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index ecdb8cc9b8..962f968c84 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { AppendPass("reduce_mode_multi_devices_pass").get(); break; default: - PADDLE_THROW("Unknown reduce strategy."); + PADDLE_THROW( + platform::errors::Unimplemented("Unknown reduce strategy.")); } } multi_devices_pass->SetNotOwned("strategy", diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.cc b/paddle/fluid/framework/details/eager_deletion_op_handle.cc index 7735f9720c..266557cb85 100644 --- a/paddle/fluid/framework/details/eager_deletion_op_handle.cc +++ b/paddle/fluid/framework/details/eager_deletion_op_handle.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/details/eager_deletion_op_handle.h" + #include #include #include -#include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/scope.h" @@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle( if (dynamic_cast(gc_)) { platform::CUDADeviceGuard guard( BOOST_GET_CONST(platform::CUDAPlace, place).device); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); - PADDLE_ENFORCE_NOT_NULL(event_); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); + PADDLE_ENFORCE_NOT_NULL(event_, platform::errors::InvalidArgument( + "The cuda envet created is NULL.")); } } #endif - PADDLE_ENFORCE_NE(vars.empty(), true, platform::errors::InvalidArgument( - "Variable names are empty.")); + PADDLE_ENFORCE_NE(vars.empty(), true, + platform::errors::InvalidArgument( + "The variables to be deleted are empty.")); for (auto *var : var_infos_) { - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument( + "The memory optimization info is NULL.")); } } @@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() { if (event_) { auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace()); platform::CUDADeviceGuard guard(gpu_place.device); - PADDLE_ENFORCE(cudaEventDestroy(event_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(event_)); } #endif } @@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() { } void EagerDeletionOpHandle::CallOnce() { - PADDLE_ENFORCE(vars_.empty(), "vars_ must be initialized here"); + PADDLE_ENFORCE_EQ( + vars_.empty(), true, + platform::errors::InvalidArgument( + "The variables to be deleted should be initialized here.")); Scope *exec_scope = local_exec_scopes_[0]; for (auto *var_info : var_infos_) { auto *var = exec_scope->FindVar(var_info->Name()); - PADDLE_ENFORCE_NOT_NULL(var, "Variable %s should not be nullptr", - var_info->Name()); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable(%s) to be inplaced is not found in scope.", + var_info->Name())); vars_.emplace_back(var); } } @@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() { garbages.emplace_back(t.MoveMemoryHolder()); } } else { - PADDLE_THROW("Type %s of %s is not supported eager deletion", - framework::ToTypeName(var->Type()), var_info->Name()); + PADDLE_THROW(platform::errors::Unimplemented( + "The variable(%s) of type %s is not supported in eager deletion.", + framework::ToTypeName(var->Type()), var_info->Name())); } } @@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages( auto callback_stream = reinterpret_cast(gc_)->stream(); auto callback_func = [=]() { - PADDLE_ENFORCE(cudaEventRecord(event_, compute_stream)); - PADDLE_ENFORCE(cudaStreamWaitEvent(callback_stream, event_, 0)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event_, compute_stream)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaStreamWaitEvent(callback_stream, event_, 0)); }; gc_->Add(std::move(*garbages), callback_func); } else { diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index c67e21d5c4..c538811669 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" + #include #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() { size_t place_num = places_.size(); PADDLE_ENFORCE_EQ( in_var_handles.size(), place_num * num_of_all_reduce_, - "The NoDummyInputSize should be equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of input variable handles should be equal to the number " + "of places plus the number of all reduce handles, " + "but got the number of input variable handles is %d, the " + "number of places is %d, and the number of all reduce handles " + "is %d.", + in_var_handles.size(), place_num, num_of_all_reduce_)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); + platform::errors::PreconditionNotMet( + "The number of input variable handles should be equal to the number " + "of output variable handles, but got the number of input variable " + "handles is %d, and the number of output variable handles is %d.", + in_var_handles.size(), out_var_handles.size())); // Note: some gradient op doesn't have CUDAKernel, so the gradients of // those op are in CPUPlace, in this case, the all reduce should not be fused. @@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( dtype = ele_dtype; } - PADDLE_ENFORCE_EQ(ele_dtype, dtype); + PADDLE_ENFORCE_EQ( + ele_dtype, dtype, + platform::errors::InvalidArgument( + "The DataType of grad tensors of fused_all_reduce_op_handle " + "must be consistent. The current dtype is %s, but the " + "previous dtype is %s.", + DataTypeToString(ele_dtype), DataTypeToString(dtype))); // Check whether the address space is contiguous. std::sort( @@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( "input[%d] address: 0X%02x. The offset: %d", k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k, next_address, k, infer_next_address, offset); - PADDLE_ENFORCE_EQ(infer_next_address, next_address, - "The address is not consistent."); + PADDLE_ENFORCE_EQ( + infer_next_address, next_address, + platform::errors::InvalidArgument( + "The infered address of the next tensor should be equal to the " + "real address of the next tensor. But got infered address is %p " + "and real address is %p.", + infer_next_address, next_address)); } } if (!FLAGS_skip_fused_all_reduce_check) { for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) { for (size_t j = 1; j < num_of_all_reduce_; ++j) { - PADDLE_ENFORCE_EQ(grads_tensor.at(0).at(j).first, - grads_tensor.at(scope_idx).at(j).first); + PADDLE_ENFORCE_EQ( + grads_tensor.at(0).at(j).first, + grads_tensor.at(scope_idx).at(j).first, + platform::errors::InvalidArgument( + "The variable name of grad tensors of " + "fused_all_reduce_op_handle " + "must be consistent. The current name is %s, but the " + "previous name is %s.", + grads_tensor.at(0).at(j).first, + grads_tensor.at(scope_idx).at(j).first)); } } } @@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace( for (size_t j = 0; j < in_var_handles.size(); j += place_num) { auto var_name = in_var_handles[j]->name(); auto var = local_scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable '%s' is not found in local scope.", var_name)); auto &lod_tensor = var->Get(); if (!is_same_place(lod_tensor.place(), places_.at(scope_idx))) { return true; @@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor( size_t place_num = places_.size(); for (size_t j = 0; j < in_var_handles.size(); j += place_num) { auto var_name = in_var_handles[j]->name(); - PADDLE_ENFORCE_EQ(var_name, out_var_handles[j]->name()); + PADDLE_ENFORCE_EQ( + var_name, out_var_handles[j]->name(), + platform::errors::InvalidArgument( + "The name of input variable should be equal " + "to the name of output variable. But got the name of input " + "variable is %s and the name of output variable is %s.", + var_name, out_var_handles[j]->name())); auto var = local_scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable '%s' is not found in local scope.", var_name)); auto &lod_tensor = var->Get(); PADDLE_ENFORCE_EQ( platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)), - true, "%s(%d) is not in the right place.", var_name, scope_idx); + true, platform::errors::InvalidArgument( + "The variable '%s' at scope %d is not in the right place.", + var_name, scope_idx)); grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor)); } } @@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( size_t size_of_dtype = 0; for (size_t i = 0; i < grad_tensor.size(); ++i) { // Get dtype - auto ele_type = grad_tensor.at(i).second->type(); + auto ele_dtype = grad_tensor.at(i).second->type(); if (i == 0) { - *dtype = ele_type; - size_of_dtype = framework::SizeOfType(ele_type); + *dtype = ele_dtype; + size_of_dtype = framework::SizeOfType(ele_dtype); } - PADDLE_ENFORCE_EQ(ele_type, *dtype); + PADDLE_ENFORCE_EQ( + ele_dtype, *dtype, + platform::errors::InvalidArgument( + "The DataType of grad tensors of fused_all_reduce_op_handle " + "must be consistent. The current dtype is %s, but the " + "previous dtype is %s.", + DataTypeToString(ele_dtype), DataTypeToString(*dtype))); // Get element number int64_t len = grad_tensor.at(i).second->numel(); - PADDLE_ENFORCE_GT(len, 0); + PADDLE_ENFORCE_GT( + len, 0, platform::errors::InvalidArgument( + "The size of grad tensors of fused_all_reduce_op_handle " + "must be > 0, but got %d.", + len)); *numel += platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype; } diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc index 59c5da0de8..1ae09dcde9 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/profiler.h" @@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() { WaitInputVarGenerated(); size_t place_num = places_.size(); - PADDLE_ENFORCE_EQ(in_var_handles.size() * place_num, out_var_handles.size()); + PADDLE_ENFORCE_EQ( + in_var_handles.size() * place_num, out_var_handles.size(), + platform::errors::PreconditionNotMet( + "The number of input variable handles plus the number " + "of places should be equal to the number of output variable handles, " + "but got the number of input variable handles is %d, the " + "number of places is %d, and the number of output variable handles " + "is %d.", + in_var_handles.size(), place_num, out_var_handles.size())); for (size_t i = 0; i < in_var_handles.size(); ++i) { BroadcastOneVar( diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc index 761a5b5a30..ce7621d4e3 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/details/broadcast_op_handle_test.h" #include "paddle/fluid/framework/details/op_handle_base.h" @@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { op_handle_ = new FusedBroadcastOpHandle( nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else - PADDLE_THROW("CUDA is not supported."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { #if defined(PADDLE_WITH_NCCL) diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index a039c6200e..2d3b2fb39a 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/gather_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() { PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::InvalidArgument( + "The number of input variables should be equal " + "to the number of places, but got the number of input variables is " + "%d and the number of places is %d.", + in_var_handles.size(), places_.size())); VarHandle *out_var_handle; { auto out_var_handles = DynamicCast(this->Outputs()); - PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, - "The number of output should be one."); + PADDLE_ENFORCE_EQ( + out_var_handles.size(), 1, + platform::errors::InvalidArgument( + "The number of output variables should be 1, but got %d.", + out_var_handles.size())); out_var_handle = out_var_handles.front(); } @@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() { auto in_0_handle = in_var_handles[0]; auto pre_in_var = var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); - PADDLE_ENFORCE_NOT_NULL(pre_in_var); + PADDLE_ENFORCE_NOT_NULL( + pre_in_var, + platform::errors::NotFound("The variable '%s' is not found in the scope.", + in_0_handle->name())); - PADDLE_ENFORCE(pre_in_var->IsType(), - "Currently, gather_op only can gather SelectedRows."); + PADDLE_ENFORCE_EQ(pre_in_var->IsType(), true, + platform::errors::Unimplemented( + "Currently, gather_op only supports SelectedRows.")); // Wait input done, this Wait is asynchronous operation WaitInputVarGenerated(); @@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() { for (auto *in_handle : in_var_handles) { auto *in_var = var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, + platform::errors::NotFound( + "The variable '%s' is not found in the scope.", in_handle->name())); VariableVisitor::EnforceShapeAndDTypeEQ(*in_var, *pre_in_var); auto &in_sr_value = in_var->Get(); @@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() { // NOTE: The Places of all input tensor must be all on CPU or all on GPU. platform::Place t_out_p = out_var_handle->place(); if (platform::is_gpu_place(pre_in_value.place())) { - PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); } else { t_out_p = platform::CPUPlace(); } auto out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL( + out_var, + platform::errors::NotFound("The variable '%s' is not found in the scope.", + out_var_handle->name())); auto out_value = out_var->GetMutable(); out_value->set_height(pre_in_value.height()); out_value->set_rows(out_rows); diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index f3fcc1a436..60c1d0d39a 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/gather_op_handle.h" + #include #include + #include "gtest/gtest.h" namespace paddle { @@ -60,7 +62,8 @@ struct TestGatherOpHandle { ctxs_.emplace_back(new p::CUDADeviceContext(p)); } #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { int count = 8; @@ -141,7 +144,9 @@ struct TestGatherOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_.at(input_scope_idx)->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "The variable '%s' is not found in the scope.", "input")); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); @@ -155,7 +160,9 @@ struct TestGatherOpHandle { } auto out_var = param_scopes_.at(output_scope_idx)->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound( + "The variable '%s' is not found in the scope.", "out")); auto out_selected_rows = out_var->GetMutable(); auto in_var = param_scopes_.at(output_scope_idx)->FindVar("input"); @@ -173,9 +180,19 @@ struct TestGatherOpHandle { auto& out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); - PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(out_select_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + height, out_select_rows.height())); + for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); + PADDLE_ENFORCE_EQ( + out_select_rows.rows()[k], rows[k % rows.size()], + platform::errors::InvalidArgument( + "The item at position %d of rows of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + k, rows[k % rows.size()], out_select_rows.rows()[k])); } f::Tensor result_tensor; @@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) { test_op.TestGatherSelectedRows(input_scope_idx); } #endif + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/nccl_op_handle.h b/paddle/fluid/framework/details/nccl_op_handle.h index 2d4d4122a3..22a059773f 100644 --- a/paddle/fluid/framework/details/nccl_op_handle.h +++ b/paddle/fluid/framework/details/nccl_op_handle.h @@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase { } virtual ~NCCLOpHandleBase() { for (auto& ev : inter_events_) { - PADDLE_ENFORCE(cudaEventDestroy(ev.second)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second)); } for (auto& ev : exter_events_) { - PADDLE_ENFORCE(cudaEventDestroy(ev.second)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second)); } } void SetRunEnv(int run_order, bool use_hierarchical_allreduce) { - PADDLE_ENFORCE(run_order >= 0, "run_order must >= 0"); + PADDLE_ENFORCE_GE( + run_order, 0, + platform::errors::InvalidArgument( + "The argument run_order must be >= 0, but got %d.", run_order)); run_order_ = run_order; use_hierarchical_allreduce_ = use_hierarchical_allreduce; @@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase { return; } - PADDLE_ENFORCE(places_.size() == 1, - "HierarchicalAllReduce run one proc with one card mode."); + PADDLE_ENFORCE_EQ(places_.size(), 1, + platform::errors::InvalidArgument( + "HierarchicalAllReduce can only run " + "one proccess with one card mode, but got %d cards.", + places_.size())); for (auto& p : places_) { auto ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order); @@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase { continue; } - PADDLE_ENFORCE(cudaSetDevice(dev_id)); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&inter_events_[dev_id], - cudaEventDisableTiming)); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&exter_events_[dev_id], - cudaEventDisableTiming)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( + &inter_events_[dev_id], cudaEventDisableTiming)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( + &exter_events_[dev_id], cudaEventDisableTiming)); VLOG(10) << "Create events on dev_id:" << dev_id << ", inter_event:" << &inter_events_[dev_id] << ", exter_event:" << &exter_events_[dev_id]; @@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase { void FlatNCCLAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_); int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; auto& nccl_ctx = flat_nccl_ctxs->at(dev_id); @@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase { << ", dev_id:" << dev_id << ", dtype:" << datatype << ", place:" << place; - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( sendbuff, recvbuff, count, datatype, op, comm, stream)); } void NCCLAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); if (!use_hierarchical_allreduce_) { FlatNCCLAllReduce(place, sendbuff, recvbuff, count, datatype, op); return; @@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase { void HierarchicalAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); InterReduce(place, sendbuff, recvbuff, count, datatype, op); // When a trainer is not in exter allreduce ring // they need not to call this. @@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase { << ", dtype:" << datatype << ", place:" << place << ", stream:" << stream; - PADDLE_ENFORCE(platform::dynload::ncclReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce( sendbuff, recvbuff, count, datatype, ncclSum, 0, comm, stream)); cudaEventRecord(inter_events_.at(dev_id), stream); if (FLAGS_sync_nccl_allreduce) { - PADDLE_ENFORCE(cudaStreamSynchronize(stream), - "sync HierarchicalAllReduce inter stream error"); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } } @@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase { void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_); - PADDLE_ENFORCE(nccl_ctxs_, "can't get exter %d nccl_ctxs", run_order_); + PADDLE_ENFORCE_NOT_NULL( + nccl_ctxs_, platform::errors::NotFound( + "Can't get exter %d nccl contexts.", run_order_)); int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; auto& nccl_ctx = nccl_ctxs->at(dev_id); auto stream = nccl_ctx.stream(); @@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase { cudaStreamWaitEvent(stream, inter_events_.at(dev_id), 0); - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( sendbuff, recvbuff, count, datatype, op, comm, stream)); cudaEventRecord(exter_events_.at(dev_id), stream); if (FLAGS_sync_nccl_allreduce) { - PADDLE_ENFORCE(cudaStreamSynchronize(stream), - "sync HierarchicalAllReduce exter stream error"); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } } @@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase { << ", stream:" << stream; cudaStreamWaitEvent(stream, exter_events_.at(dev_id), 0); - PADDLE_ENFORCE(platform::dynload::ncclBcast(sendbuff, count, datatype, 0, - comm, stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( + sendbuff, count, datatype, 0, comm, stream)); } protected: diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 459bcff5c0..105c37192f 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() { #ifdef PADDLE_WITH_CUDA for (auto &p : dev_ctxes_) { int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device; - PADDLE_ENFORCE(cudaSetDevice(dev_id)); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming)); } if (IsMultiDeviceTransfer() && dev_ctxes_.size() > 0) { @@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() { } } } else { - PADDLE_ENFORCE_EQ(dev_ctxes_.size(), 1UL, - "%s should have only one dev_ctx.", Name()); + PADDLE_ENFORCE_EQ( + dev_ctxes_.size(), 1UL, + platform::errors::InvalidArgument( + "Operator %s should have only one dev_ctx, but got %d.", Name(), + dev_ctxes_.size())); auto &place = dev_ctxes_.begin()->first; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; for (auto &out_var : outputs_) { auto *out_var_handle = dynamic_cast(out_var); if (out_var_handle) { - PADDLE_ENFORCE(platform::is_same_place(place, out_var_handle->place()), - "The place of output(%s) is not consistent with the " - "place of current op(%s).", - out_var_handle->Name(), Name()); + PADDLE_ENFORCE_EQ( + platform::is_same_place(place, out_var_handle->place()), true, + platform::errors::InvalidArgument( + "The place of output(%s) is not consistent with the " + "place of current op(%s).", + out_var_handle->Name(), Name())); out_var_handle->SetGenerateEvent(events_.at(dev_id)); } } @@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) { InitCUDA(); } #else - PADDLE_ENFORCE(!use_cuda); + PADDLE_ENFORCE_EQ(use_cuda, false, + platform::errors::InvalidArgument( + "Argument use_cuda should be false when Paddle is not " + "compiled with CUDA.")); #endif // skip running current op, used with inplace_addto_op_pass @@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) { void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { #ifdef PADDLE_WITH_CUDA - PADDLE_ENFORCE_NOT_NULL(waited_ctx); + PADDLE_ENFORCE_NOT_NULL(waited_ctx, platform::errors::InvalidArgument( + "Argument waited_ctx is NULL.")); if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) { for (auto &dev_ctx : dev_ctxes_) { - PADDLE_ENFORCE_NOT_NULL(dev_ctx.second); + PADDLE_ENFORCE_NOT_NULL( + dev_ctx.second, + platform::errors::InvalidArgument("The device context is NULL.")); dev_ctx.second->Wait(); } } else { auto stream = static_cast(waited_ctx)->stream(); for (auto &ev : events_) { - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamWaitEvent(stream, ev.second, 0)); } } #else @@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() { auto stream = static_cast(dev_ctxes_.at(place)) ->stream(); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); #else - PADDLE_THROW("Doesn't compile the GPU."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } // There are nothing to do when the place is CPUPlace. @@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) { auto stream = static_cast( dev_ctxes_.at(in_var_handle->place())) ->stream(); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); #else - PADDLE_THROW("Doesn't compile the GPU."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } // There are nothing to do when the place is CPUPlace. @@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes( auto scopes = GetLocalScopes(); for (auto *scope : scopes) { auto iter = scope_map.find(scope); - PADDLE_ENFORCE(iter != scope_map.end(), "Local scope not found"); + PADDLE_ENFORCE_NE( + iter, scope_map.end(), + platform::errors::NotFound("Local scope not found in scope map.")); local_exec_scopes_.emplace_back(iter->second); } } diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 1e608000e0..453a25166b 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/inplace_op_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" @@ -186,19 +187,20 @@ struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { PADDLE_ENFORCE_EQ(info->proto_, nullptr, platform::errors::AlreadyExists( - "OpProto of %s has been registered", op_type)); + "OpProto of %s has been registered.", op_type)); PADDLE_ENFORCE_EQ(info->checker_, nullptr, platform::errors::AlreadyExists( - "OpAttrChecker of %s has been registered", op_type)); + "OpAttrChecker of %s has been registered.", op_type)); info->proto_ = new proto::OpProto; info->checker_ = new OpAttrChecker(); T maker; maker(info->proto_, info->checker_); info->proto_->set_type(op_type); - PADDLE_ENFORCE( - info->proto_->IsInitialized(), - "Fail to initialize %s's OpProto, because %s is not initialized", - op_type, info->proto_->InitializationErrorString()); + PADDLE_ENFORCE_EQ( + info->proto_->IsInitialized(), true, + platform::errors::PreconditionNotMet( + "Fail to initialize %s's OpProto, because %s is not initialized.", + op_type, info->proto_->InitializationErrorString())); } }; diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index 11c4621fde..9ecb2d8dbd 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" @@ -32,9 +33,13 @@ struct ReduceLoDTensor { template void apply() const { - PADDLE_ENFORCE(!src_tensors_.empty()); + PADDLE_ENFORCE_NE(src_tensors_.empty(), true, + platform::errors::InvalidArgument( + "The number of tensors to be reduced is 0.")); auto &t0 = *src_tensors_[0]; - PADDLE_ENFORCE_NE(t0.numel(), 0); + PADDLE_ENFORCE_NE(t0.numel(), 0, + platform::errors::InvalidArgument( + "The size of first tensor to be reduced is 0.")); dst_tensor_.Resize(t0.dims()); T *dst = dst_tensor_.mutable_data(platform::CPUPlace()); @@ -45,8 +50,19 @@ struct ReduceLoDTensor { continue; } - PADDLE_ENFORCE_EQ(t.dims(), t0.dims()); - PADDLE_ENFORCE_EQ(t.type(), t0.type()); + PADDLE_ENFORCE_EQ(t.dims(), t0.dims(), + platform::errors::InvalidArgument( + "The shape of tensors to be reduced must be " + "consistent. The shape of current tensor is %s, " + "but the shape of the first tensor is %s.", + t.dims(), t0.dims())); + + PADDLE_ENFORCE_EQ(t.type(), t0.type(), + platform::errors::InvalidArgument( + "The type of tensors to be reduced must be " + "consistent. The type of current tensor is %s, " + "but the type of the first tensor is %s.", + t.type(), t0.type())); std::transform(t.data(), t.data() + t.numel(), dst, dst, [](T a, T b) -> T { return a + b; }); } @@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor { in_places_(in_places), out_place_(out_place), dst_selected_rows_(dst_selected_rows) { - PADDLE_ENFORCE_EQ(src_selected_rows.empty(), false); + PADDLE_ENFORCE_NE(src_selected_rows.empty(), true, + platform::errors::InvalidArgument( + "The number of selected_rows to be gathered is 0.")); std::vector out_rows; diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index d8f8cc994c..d7f13f79f6 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/details/reduce_op_handle.h" + #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows( merged_dev_ctx->Wait(); scope->EraseVars(std::vector{gathered_var_name}); - PADDLE_ENFORCE(client->Gather(vars, &remote, *merged_dev_ctx, scope)); - PADDLE_ENFORCE(remote.size() == vars.size()); + PADDLE_ENFORCE_EQ( + client->Gather(vars, &remote, *merged_dev_ctx, scope), true, + platform::errors::PreconditionNotMet("Gather SelectedRows failed.")); + PADDLE_ENFORCE_EQ(remote.size(), vars.size(), + platform::errors::PreconditionNotMet( + "The number of remotes should be equal to the number " + "of variables to be gathered, but got the number of " + "remotes is %d and the number of variables is %d.", + remote.size(), vars.size())); // 4. merged local selected rows. std::vector all; @@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() { PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::InvalidArgument( + "The number of inputs should equal to the number of places, but got " + "the number of inputs is %d and the number of places is %d.", + in_var_handles.size(), places_.size())); VarHandle *out_var_handle; { auto out_var_handles = DynamicCast(outputs_); PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL, - "The number of output should be one."); + platform::errors::InvalidArgument( + "The number of output should be one, but got %d.", + out_var_handles.size())); out_var_handle = out_var_handles.front(); } @@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() { auto pre_in_var = var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); - PADDLE_ENFORCE_NOT_NULL(pre_in_var); + + PADDLE_ENFORCE_NOT_NULL(pre_in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", + in_0_handle->name())); // NOTE: The Places of all input tensor must be all on CPU or all on GPU. std::vector in_places; // used to get dev_ctx @@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() { in_places.emplace_back(in_handle->place()); auto in_var = var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scope.", + in_handle->name())); + VariableVisitor::EnforceShapeAndDTypeEQ(*pre_in_var, *in_var); } auto out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound("Variable %s is not found in scope.", + out_var_handle->name())); // NOTE: The tensors' Place of input and output must be all on GPU or all on // CPU. auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place(); platform::Place t_out_p; if (platform::is_gpu_place(in_p)) { - PADDLE_ENFORCE(platform::is_gpu_place(out_var_handle->place()), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_var_handle->place()), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); t_out_p = out_var_handle->place(); } else { t_out_p = platform::CPUPlace(); @@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() { in_selected_rows, in_places, dev_ctxes_, out_var_handle, t_out_p, out_var->GetMutable()); } else { - PADDLE_THROW("only support double or float when gather SelectedRows"); + PADDLE_THROW(platform::errors::Unimplemented( + "Only support double or float when gather SelectedRows, but got " + "%s.", + framework::DataTypeToString(in_selected_rows[0]->value().type()))); } #endif }); @@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() { size_t numel = static_cast(lod_tensor.numel()); all_reduce_calls.emplace_back( [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] { - PADDLE_ENFORCE(platform::dynload::ncclReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce( buffer, recvbuffer, numel, static_cast(type), ncclSum, root_id, nccl_ctx.comm_, nccl_ctx.stream())); }); @@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() { } }); #else - PADDLE_THROW("CUDA is not enabled."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { - PADDLE_THROW("Place should be CPUPlace or CUDAPlace."); + PADDLE_THROW(platform::errors::InvalidArgument( + "The place of tensor should be CPUPlace or CUDAPlace, but got %s.", + lod_tensors[0]->place())); } } } diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc index d71251b76c..ba03c3a267 100644 --- a/paddle/fluid/framework/details/reduce_op_handle_test.cc +++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/details/reduce_op_handle.h" + #include + #include "gtest/gtest.h" #include "paddle/fluid/platform/device_context.h" @@ -69,7 +71,8 @@ struct TestReduceOpHandle { } nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_)); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { int count = 8; @@ -103,7 +106,8 @@ struct TestReduceOpHandle { op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, gpu_list_, nccl_ctxs_.get())); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { #if defined(PADDLE_WITH_NCCL) @@ -164,7 +168,10 @@ struct TestReduceOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", "input")); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); @@ -178,7 +185,9 @@ struct TestReduceOpHandle { } auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, + platform::errors::NotFound( + "Variable %s is not found in scope.", "out")); auto out_selected_rows = out_var->GetMutable(); auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); @@ -196,9 +205,18 @@ struct TestReduceOpHandle { auto &out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); - PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(out_select_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + height, out_select_rows.height())); for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); + PADDLE_ENFORCE_EQ( + out_select_rows.rows()[k], rows[k % rows.size()], + platform::errors::InvalidArgument( + "The item at position %d of rows of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + k, rows[k % rows.size()], out_select_rows.rows()[k])); } f::Tensor result_tensor; @@ -208,7 +226,7 @@ struct TestReduceOpHandle { for (int64_t j = 0; j < f::product(result_tensor.dims()); ++j) { ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); } - } + } // namespace details void TestReduceLodTensors(size_t output_scope_idx) { std::vector send_vector(static_cast(f::product(kDims))); @@ -220,7 +238,9 @@ struct TestReduceOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", "input")); auto in_lod_tensor = in_var->GetMutable(); in_lod_tensor->mutable_data(kDims, gpu_list_[input_scope_idx]); in_lod_tensor->set_lod(lod); @@ -230,7 +250,9 @@ struct TestReduceOpHandle { } auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, + platform::errors::NotFound( + "Variable %s is not found in scope.", "out")); auto out_lodtensor = out_var->GetMutable(); auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); @@ -254,7 +276,7 @@ struct TestReduceOpHandle { ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5); } } -}; +}; // namespace details TEST(ReduceTester, TestCPUReduceTestSelectedRows) { TestReduceOpHandle test_op; diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index bf93d8f85b..079e9abc89 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() { auto *out_var = exec_scope_->FindVar(out_var_names_[i]); PADDLE_ENFORCE_NOT_NULL( in_var, platform::errors::NotFound( - "The input variable(%s)to be inplaced should not be NULL.", + "The variable(%s) to be inplaced is not found in scope.", in_var_infos_[i]->Name())); PADDLE_ENFORCE_NOT_NULL( - out_var, - platform::errors::NotFound( - "The output variable(%s) to be inplaced should not be NULL.", - out_var_names_[i])); + out_var, platform::errors::NotFound( + "The variable(%s) to be inplaced is not found in scope.", + out_var_names_[i])); PADDLE_ENFORCE_NE( in_var, out_var, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc index 3f9af1c3a1..37399e5ddc 100644 --- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" + #include #include + #include "dgc/dgc.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" @@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle( is_encoded_(is_encoded), nranks_(nranks) { // TODO(gongwb) :polish them! - PADDLE_ENFORCE_EQ(is_encoded, true); + PADDLE_ENFORCE_EQ(is_encoded, true, platform::errors::InvalidArgument( + "The argument is_encoded is false.")); VLOG(1) << "Use dgc allreduce mode" << ", nranks:" << nranks_; - PADDLE_ENFORCE_GT(local_scopes_.size(), 0); + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto nranks_name = g_dgc_nranks; for (size_t i = 0; i < local_scopes_.size(); ++i) { auto *local_scope = local_scopes_[i]; auto nranks_var = local_scope->FindVar(nranks_name); - if (nranks_var == nullptr) { - PADDLE_THROW("not find nranks_var:%s", nranks_name); - } + + PADDLE_ENFORCE_NOT_NULL( + nranks_var, platform::errors::NotFound( + "Variable %s is not found in scope.", nranks_name)); float *dgc_nranks = nranks_var->GetMutable()->data(); *dgc_nranks = nranks; @@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() { auto out_var_handles = DynamicCast(this->Outputs()); PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The NoDummyInputSize should be equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of input variables should be equal to the number of " + "places, but got the number of input variables is %zu and the the " + "number of places is %zu.", + in_var_handles.size(), places_.size())); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); + platform::errors::PreconditionNotMet( + "The number of input variables should be equal to the number of " + "output variables, but got the number of input variables is %zu and " + "the the number of output variables is %zu.", + in_var_handles.size(), out_var_handles.size())); std::vector ins; std::vector gathers; @@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() { auto encode_var_name = original_name + g_dgc_encoded; auto *in_var = local_scope->FindVar(encode_var_name); - PADDLE_ENFORCE_NOT_NULL(in_var, "%s should not be null", encode_var_name); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scope.", + encode_var_name)); auto &in = in_var->Get(); ins.emplace_back(&in); auto gather_var_name = original_name + g_dgc_gather; auto *gather_var = local_scope->FindVar(gather_var_name); - PADDLE_ENFORCE_NOT_NULL(gather_var, "%s should not be null", - gather_var_name); + PADDLE_ENFORCE_NOT_NULL( + gather_var, platform::errors::NotFound( + "Variable %s is not found in scope.", gather_var)); auto *gather = gather_var->GetMutable(); gathers.emplace_back(gather); @@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() { } } - PADDLE_ENFORCE(platform::is_gpu_place(ins[0]->place())); - PADDLE_ENFORCE(platform::is_gpu_place(outs[0]->place())); - PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ins[0]->place()), true, + platform::errors::InvalidArgument( + "The place of input variable should be CUDAPlace, but got %s.", + ins[0]->place())); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(outs[0]->place()), true, + platform::errors::InvalidArgument( + "The place of input variable should be CUDAPlace, but got %s.", + outs[0]->place())); + PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, platform::errors::PreconditionNotMet( + "The nccl contexts are NULL.")); int dtype = -1; size_t in_numel = 0; size_t out_numel = 0; - PADDLE_ENFORCE(nranks_ > 1); + PADDLE_ENFORCE_GT( + nranks_, 1, + platform::errors::PreconditionNotMet( + "The number of ranks should be > 1, but got %d.", nranks_)); std::vector> all_gather_calls; std::vector> sparse_reduce_calls; @@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() { dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype; in_numel = (in_numel == 0) ? static_cast(in.numel()) : in_numel; - PADDLE_ENFORCE(in_numel % 2 == 0); - PADDLE_ENFORCE(in_numel / 2 == static_cast(k)); + PADDLE_ENFORCE_EQ(in_numel % 2, 0, + platform::errors::InvalidArgument( + "The number of elements of input variable should be " + "even, but got %zu.", + in_numel)); + PADDLE_ENFORCE_EQ(in_numel / 2, static_cast(k), + platform::errors::InvalidArgument( + "The number of elements of input variable should be " + "even, but got %zu.", + in_numel)); out_numel = (out_numel == 0) ? static_cast(out.numel()) : out_numel; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; @@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() { PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce( gather_buff, k, out_tensor_buf, static_cast(out_numel), nranks_, stream), - true); + true, platform::errors::Unavailable( + "Calling sparseReduce() failed.")); }); } @@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc( int SparseAllReduceOpHandle::GetKValue(const std::string &grad_name) { auto original_name = paddle::framework::GradOriginalVarName(grad_name); auto var_name = original_name + g_dgc_k; - PADDLE_ENFORCE(local_scopes_.size() > 0); + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto *scope = local_exec_scopes_[0]; auto var = scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + var_name)); auto tensor = var->Get().data(); return *tensor; } @@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() { } auto counter_name = g_dgc_counter_name; auto step_name = g_dgc_rampup_begin_step; - PADDLE_ENFORCE(local_scopes_.size() > 0); + + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto *local_scope = local_exec_scopes_[0]; auto count_var = local_scope->FindVar(counter_name); auto step_var = local_scope->FindVar(step_name); - if (count_var == nullptr || step_var == nullptr) { - PADDLE_THROW("not find count_var:%s or step_var:%s", counter_name, - step_var); - } + + PADDLE_ENFORCE_NOT_NULL( + count_var, platform::errors::NotFound( + "Variable %s is not found in scope.", counter_name)); + PADDLE_ENFORCE_NOT_NULL( + step_var, platform::errors::NotFound("Variable %s is not found in scope.", + step_var)); float count = *count_var->Get().data(); float step = *step_var->Get().data(); -- GitLab