From fb70682f00794498157920061075cf2eea4d4024 Mon Sep 17 00:00:00 2001 From: GaoWei8 <53294385+GaoWei8@users.noreply.github.com> Date: Fri, 3 Jul 2020 10:40:00 +0800 Subject: [PATCH] fix PADDLE_ENFORCE (#25297) * fix PADDLE_ENFORCE and refine the description test=develop --- paddle/fluid/platform/collective_helper.cc | 36 +++++++++++++++---- paddle/fluid/platform/collective_helper.h | 24 +++++++------ paddle/fluid/platform/device_context.h | 4 ++- .../fluid/platform/device_memory_aligment.cc | 3 +- paddle/fluid/platform/device_tracer.cc | 6 ++-- paddle/fluid/platform/dynload/cudnn.cc | 8 +++-- .../fluid/platform/stream_callback_manager.cc | 9 +++-- paddle/fluid/platform/transform.h | 8 +++-- 8 files changed, 69 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index 71482c10062..4cb6ee3143a 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm { NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks, int rank, int dev_id, int ring_id) { - PADDLE_ENFORCE_NOT_NULL(nccl_id); - PADDLE_ENFORCE_GT(nranks, 1); - PADDLE_ENFORCE_GE(rank, 0); - PADDLE_ENFORCE_LT(rank, nranks); - PADDLE_ENFORCE_GE(dev_id, 0); + PADDLE_ENFORCE_NOT_NULL(nccl_id, + platform::errors::InvalidArgument( + "The nccl unique id should not be null.")); + PADDLE_ENFORCE_GT( + nranks, 1, + platform::errors::InvalidArgument( + "Expected nranks > 1. But received nranks is %d.", nranks)); + PADDLE_ENFORCE_GE(rank, 0, + platform::errors::InvalidArgument( + "Expected rank >= 0. But received rank is %d.", rank)); + PADDLE_ENFORCE_LT( + rank, nranks, + platform::errors::InvalidArgument( + "Expected rank < nranks. But received rank is %d, nranks is %d.", + rank, nranks)); + PADDLE_ENFORCE_GE( + dev_id, 0, + platform::errors::InvalidArgument( + "Expected dev_id >= 0. But received dev_id is %d.", dev_id)); ncclComm_t comm = nullptr; PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); @@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks, void NCCLCommContext::CreateAllNCCLComms(const std::vector& dev_ids, int ring_id) { - PADDLE_ENFORCE_GT(dev_ids.size(), 0); + PADDLE_ENFORCE_GT( + dev_ids.size(), 0, + platform::errors::InvalidArgument("Expected the size of dev_ids > 0. But " + "received the size of dev_ids is %d.", + dev_ids.size())); const int kDevices = dev_ids.size(); ncclComm_t comms[kDevices]; PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll( comms, dev_ids.size(), dev_ids.data())); - PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0); + PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0, + platform::errors::InvalidArgument( + "Expected comm_map_.count(ring_id) = 0. But received " + "comm_map_.count(ring_id) is %d.", + comm_map_.count(ring_id))); for (size_t i = 0; i < dev_ids.size(); ++i) { AssignNCCLComm(comms[i], dev_ids.size(), i, dev_ids[i], ring_id); VLOG(1) << "nccl communicator of rank " << i << " in ring " << ring_id diff --git a/paddle/fluid/platform/collective_helper.h b/paddle/fluid/platform/collective_helper.h index 61cad961f51..f632550c651 100644 --- a/paddle/fluid/platform/collective_helper.h +++ b/paddle/fluid/platform/collective_helper.h @@ -78,24 +78,28 @@ class NCCLCommContext { // retrieve a communicator by the ring id in multiprocessing mode NCCLComm* Get(int ring_id) const { - PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0, - "comunicator in ring id %d has not been initialized", - ring_id); + PADDLE_ENFORCE_GT( + comm_map_.count(ring_id), 0, + platform::errors::InvalidArgument( + "Comunicator in ring id %d has not been initialized.", ring_id)); PADDLE_ENFORCE_EQ(comm_map_.at(ring_id).size(), 1, - "you should specify a device id to retrieve from " - "multiple communicators"); + platform::errors::InvalidArgument( + "One device id should be specified to retrieve from " + "multiple communicators.")); return comm_map_.at(ring_id).begin()->second.get(); } // retrieve a communicator by the ring id and the device id NCCLComm* Get(int ring_id, int dev_id) const { - PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0, - "comunicator of ring id %d has not been initialized", - ring_id); + PADDLE_ENFORCE_GT( + comm_map_.count(ring_id), 0, + platform::errors::InvalidArgument( + "Comunicator of ring id %d has not been initialized.", ring_id)); PADDLE_ENFORCE_GT( comm_map_.at(ring_id).count(dev_id), 0, - "comunicator at device id %d has not been initialized in ring %d", - dev_id, ring_id); + platform::errors::InvalidArgument( + "Comunicator at device id %d has not been initialized in ring %d.", + dev_id, ring_id)); return comm_map_.at(ring_id).at(dev_id).get(); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 9393ea3e332..7511edb9ccf 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -515,7 +515,9 @@ class DeviceContextPool { explicit DeviceContextPool(const std::vector& places); static DeviceContextPool& Instance() { - PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); + PADDLE_ENFORCE_NOT_NULL(pool, + platform::errors::PreconditionNotMet( + "Need to Create DeviceContextPool firstly!")); return *pool; } diff --git a/paddle/fluid/platform/device_memory_aligment.cc b/paddle/fluid/platform/device_memory_aligment.cc index 7b901856daa..8b57de93499 100644 --- a/paddle/fluid/platform/device_memory_aligment.cc +++ b/paddle/fluid/platform/device_memory_aligment.cc @@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) { #ifdef PADDLE_WITH_CUDA alignment = GpuMinChunkSize(); #else - PADDLE_THROW("Fluid is not compiled with CUDA"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Fluid is not compiled with CUDA.")); #endif } size_t remaining = size % alignment; diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index d362b841065..0e2d0be2c1f 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, static std::thread::id cupti_thread_id(0); if (cupti_thread_id == std::thread::id(0)) cupti_thread_id = std::this_thread::get_id(); - PADDLE_ENFORCE_EQ(std::this_thread::get_id(), cupti_thread_id, - "Only one thread is allowed to call bufferCompleted()"); + PADDLE_ENFORCE_EQ( + std::this_thread::get_id(), cupti_thread_id, + platform::errors::PermissionDenied( + "Only one thread is allowed to call bufferCompleted().")); CUptiResult status; CUpti_Activity *record = NULL; if (validSize > 0) { diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index edff8761ee1..3dd60dcc62d 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -58,9 +58,11 @@ bool HasCUDNN() { } void EnforceCUDNNLoaded(const char* fn_name) { - PADDLE_ENFORCE(cudnn_dso_handle != nullptr, - "Cannot load cudnn shared library. Cannot invoke method %s", - fn_name); + PADDLE_ENFORCE_NOT_NULL( + cudnn_dso_handle, + platform::errors::PreconditionNotMet( + "Cannot load cudnn shared library. Cannot invoke method %s.", + fn_name)); } #else bool HasCUDNN() { return true; } diff --git a/paddle/fluid/platform/stream_callback_manager.cc b/paddle/fluid/platform/stream_callback_manager.cc index 5a9e24374f6..365216566b2 100644 --- a/paddle/fluid/platform/stream_callback_manager.cc +++ b/paddle/fluid/platform/stream_callback_manager.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/stream_callback_manager.h" +#include #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function callback) const { }); }); #if CUDA_VERSION >= 10000 - PADDLE_ENFORCE(cudaLaunchHostFunc(stream_, StreamCallbackFunc, func)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaLaunchHostFunc(stream_, StreamCallbackFunc, func)); #else - PADDLE_ENFORCE(cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0)); #endif } void StreamCallbackManager::Wait() const { - PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream_)); { std::lock_guard lock(mtx_); if (last_future_.valid()) { diff --git a/paddle/fluid/platform/transform.h b/paddle/fluid/platform/transform.h index 7877d3e41c1..a0e428f0d1a 100644 --- a/paddle/fluid/platform/transform.h +++ b/paddle/fluid/platform/transform.h @@ -83,7 +83,9 @@ struct Transform { void operator()(const platform::CUDADeviceContext& context, InputIter first, InputIter last, OutputIter result, UnaryOperation op) { auto place = context.GetPlace(); - PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); + PADDLE_ENFORCE_EQ(is_gpu_place(place), true, + platform::errors::PreconditionNotMet( + "The CUDA Transform must be used in GPU place.")); thrust::transform(thrust::cuda::par.on(context.stream()), details::CastToCUDATransformIterator(first), details::CastToCUDATransformIterator(last), @@ -96,7 +98,9 @@ struct Transform { InputIter1 last1, InputIter2 first2, OutputIter result, BinaryOperation op) { auto place = context.GetPlace(); - PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); + PADDLE_ENFORCE_EQ(is_gpu_place(place), true, + platform::errors::PreconditionNotMet( + "The CUDA Transform must be used in GPU place.")); thrust::transform(thrust::cuda::par.on(context.stream()), details::CastToCUDATransformIterator(first1), details::CastToCUDATransformIterator(last1), -- GitLab