未验证 提交 fb70682f 编写于 作者: G GaoWei8 提交者: GitHub

fix PADDLE_ENFORCE (#25297)

* fix PADDLE_ENFORCE and refine the description
test=develop
上级 dc17ac91
...@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm { ...@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm {
NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks, NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
int rank, int dev_id, int ring_id) { int rank, int dev_id, int ring_id) {
PADDLE_ENFORCE_NOT_NULL(nccl_id); PADDLE_ENFORCE_NOT_NULL(nccl_id,
PADDLE_ENFORCE_GT(nranks, 1); platform::errors::InvalidArgument(
PADDLE_ENFORCE_GE(rank, 0); "The nccl unique id should not be null."));
PADDLE_ENFORCE_LT(rank, nranks); PADDLE_ENFORCE_GT(
PADDLE_ENFORCE_GE(dev_id, 0); nranks, 1,
platform::errors::InvalidArgument(
"Expected nranks > 1. But received nranks is %d.", nranks));
PADDLE_ENFORCE_GE(rank, 0,
platform::errors::InvalidArgument(
"Expected rank >= 0. But received rank is %d.", rank));
PADDLE_ENFORCE_LT(
rank, nranks,
platform::errors::InvalidArgument(
"Expected rank < nranks. But received rank is %d, nranks is %d.",
rank, nranks));
PADDLE_ENFORCE_GE(
dev_id, 0,
platform::errors::InvalidArgument(
"Expected dev_id >= 0. But received dev_id is %d.", dev_id));
ncclComm_t comm = nullptr; ncclComm_t comm = nullptr;
PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
...@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks, ...@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
void NCCLCommContext::CreateAllNCCLComms(const std::vector<int>& dev_ids, void NCCLCommContext::CreateAllNCCLComms(const std::vector<int>& dev_ids,
int ring_id) { int ring_id) {
PADDLE_ENFORCE_GT(dev_ids.size(), 0); PADDLE_ENFORCE_GT(
dev_ids.size(), 0,
platform::errors::InvalidArgument("Expected the size of dev_ids > 0. But "
"received the size of dev_ids is %d.",
dev_ids.size()));
const int kDevices = dev_ids.size(); const int kDevices = dev_ids.size();
ncclComm_t comms[kDevices]; ncclComm_t comms[kDevices];
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll(
comms, dev_ids.size(), dev_ids.data())); comms, dev_ids.size(), dev_ids.data()));
PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0); PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0,
platform::errors::InvalidArgument(
"Expected comm_map_.count(ring_id) = 0. But received "
"comm_map_.count(ring_id) is %d.",
comm_map_.count(ring_id)));
for (size_t i = 0; i < dev_ids.size(); ++i) { for (size_t i = 0; i < dev_ids.size(); ++i) {
AssignNCCLComm(comms[i], dev_ids.size(), i, dev_ids[i], ring_id); AssignNCCLComm(comms[i], dev_ids.size(), i, dev_ids[i], ring_id);
VLOG(1) << "nccl communicator of rank " << i << " in ring " << ring_id VLOG(1) << "nccl communicator of rank " << i << " in ring " << ring_id
......
...@@ -78,24 +78,28 @@ class NCCLCommContext { ...@@ -78,24 +78,28 @@ class NCCLCommContext {
// retrieve a communicator by the ring id in multiprocessing mode // retrieve a communicator by the ring id in multiprocessing mode
NCCLComm* Get(int ring_id) const { NCCLComm* Get(int ring_id) const {
PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0, PADDLE_ENFORCE_GT(
"comunicator in ring id %d has not been initialized", comm_map_.count(ring_id), 0,
ring_id); platform::errors::InvalidArgument(
"Comunicator in ring id %d has not been initialized.", ring_id));
PADDLE_ENFORCE_EQ(comm_map_.at(ring_id).size(), 1, PADDLE_ENFORCE_EQ(comm_map_.at(ring_id).size(), 1,
"you should specify a device id to retrieve from " platform::errors::InvalidArgument(
"multiple communicators"); "One device id should be specified to retrieve from "
"multiple communicators."));
return comm_map_.at(ring_id).begin()->second.get(); return comm_map_.at(ring_id).begin()->second.get();
} }
// retrieve a communicator by the ring id and the device id // retrieve a communicator by the ring id and the device id
NCCLComm* Get(int ring_id, int dev_id) const { NCCLComm* Get(int ring_id, int dev_id) const {
PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0, PADDLE_ENFORCE_GT(
"comunicator of ring id %d has not been initialized", comm_map_.count(ring_id), 0,
ring_id); platform::errors::InvalidArgument(
"Comunicator of ring id %d has not been initialized.", ring_id));
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
comm_map_.at(ring_id).count(dev_id), 0, comm_map_.at(ring_id).count(dev_id), 0,
"comunicator at device id %d has not been initialized in ring %d", platform::errors::InvalidArgument(
dev_id, ring_id); "Comunicator at device id %d has not been initialized in ring %d.",
dev_id, ring_id));
return comm_map_.at(ring_id).at(dev_id).get(); return comm_map_.at(ring_id).at(dev_id).get();
} }
......
...@@ -515,7 +515,9 @@ class DeviceContextPool { ...@@ -515,7 +515,9 @@ class DeviceContextPool {
explicit DeviceContextPool(const std::vector<platform::Place>& places); explicit DeviceContextPool(const std::vector<platform::Place>& places);
static DeviceContextPool& Instance() { static DeviceContextPool& Instance() {
PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); PADDLE_ENFORCE_NOT_NULL(pool,
platform::errors::PreconditionNotMet(
"Need to Create DeviceContextPool firstly!"));
return *pool; return *pool;
} }
......
...@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) { ...@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
alignment = GpuMinChunkSize(); alignment = GpuMinChunkSize();
#else #else
PADDLE_THROW("Fluid is not compiled with CUDA"); PADDLE_THROW(platform::errors::PreconditionNotMet(
"Fluid is not compiled with CUDA."));
#endif #endif
} }
size_t remaining = size % alignment; size_t remaining = size % alignment;
......
...@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, ...@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
static std::thread::id cupti_thread_id(0); static std::thread::id cupti_thread_id(0);
if (cupti_thread_id == std::thread::id(0)) if (cupti_thread_id == std::thread::id(0))
cupti_thread_id = std::this_thread::get_id(); cupti_thread_id = std::this_thread::get_id();
PADDLE_ENFORCE_EQ(std::this_thread::get_id(), cupti_thread_id, PADDLE_ENFORCE_EQ(
"Only one thread is allowed to call bufferCompleted()"); std::this_thread::get_id(), cupti_thread_id,
platform::errors::PermissionDenied(
"Only one thread is allowed to call bufferCompleted()."));
CUptiResult status; CUptiResult status;
CUpti_Activity *record = NULL; CUpti_Activity *record = NULL;
if (validSize > 0) { if (validSize > 0) {
......
...@@ -58,9 +58,11 @@ bool HasCUDNN() { ...@@ -58,9 +58,11 @@ bool HasCUDNN() {
} }
void EnforceCUDNNLoaded(const char* fn_name) { void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE(cudnn_dso_handle != nullptr, PADDLE_ENFORCE_NOT_NULL(
"Cannot load cudnn shared library. Cannot invoke method %s", cudnn_dso_handle,
fn_name); platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
} }
#else #else
bool HasCUDNN() { return true; } bool HasCUDNN() { return true; }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/platform/stream_callback_manager.h" #include "paddle/fluid/platform/stream_callback_manager.h"
#include <utility>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const { ...@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const {
}); });
}); });
#if CUDA_VERSION >= 10000 #if CUDA_VERSION >= 10000
PADDLE_ENFORCE(cudaLaunchHostFunc(stream_, StreamCallbackFunc, func)); PADDLE_ENFORCE_CUDA_SUCCESS(
cudaLaunchHostFunc(stream_, StreamCallbackFunc, func));
#else #else
PADDLE_ENFORCE(cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0)); PADDLE_ENFORCE_CUDA_SUCCESS(
cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
#endif #endif
} }
void StreamCallbackManager::Wait() const { void StreamCallbackManager::Wait() const {
PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream_));
{ {
std::lock_guard<std::mutex> lock(mtx_); std::lock_guard<std::mutex> lock(mtx_);
if (last_future_.valid()) { if (last_future_.valid()) {
......
...@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> { ...@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> {
void operator()(const platform::CUDADeviceContext& context, InputIter first, void operator()(const platform::CUDADeviceContext& context, InputIter first,
InputIter last, OutputIter result, UnaryOperation op) { InputIter last, OutputIter result, UnaryOperation op) {
auto place = context.GetPlace(); auto place = context.GetPlace();
PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
platform::errors::PreconditionNotMet(
"The CUDA Transform must be used in GPU place."));
thrust::transform(thrust::cuda::par.on(context.stream()), thrust::transform(thrust::cuda::par.on(context.stream()),
details::CastToCUDATransformIterator(first), details::CastToCUDATransformIterator(first),
details::CastToCUDATransformIterator(last), details::CastToCUDATransformIterator(last),
...@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> { ...@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> {
InputIter1 last1, InputIter2 first2, OutputIter result, InputIter1 last1, InputIter2 first2, OutputIter result,
BinaryOperation op) { BinaryOperation op) {
auto place = context.GetPlace(); auto place = context.GetPlace();
PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
platform::errors::PreconditionNotMet(
"The CUDA Transform must be used in GPU place."));
thrust::transform(thrust::cuda::par.on(context.stream()), thrust::transform(thrust::cuda::par.on(context.stream()),
details::CastToCUDATransformIterator(first1), details::CastToCUDATransformIterator(first1),
details::CastToCUDATransformIterator(last1), details::CastToCUDATransformIterator(last1),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册