未验证 提交 fb70682f 编写于 作者: G GaoWei8 提交者: GitHub

fix PADDLE_ENFORCE (#25297)

* fix PADDLE_ENFORCE and refine the description
test=develop
上级 dc17ac91
......@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm {
NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
int rank, int dev_id, int ring_id) {
PADDLE_ENFORCE_NOT_NULL(nccl_id);
PADDLE_ENFORCE_GT(nranks, 1);
PADDLE_ENFORCE_GE(rank, 0);
PADDLE_ENFORCE_LT(rank, nranks);
PADDLE_ENFORCE_GE(dev_id, 0);
PADDLE_ENFORCE_NOT_NULL(nccl_id,
platform::errors::InvalidArgument(
"The nccl unique id should not be null."));
PADDLE_ENFORCE_GT(
nranks, 1,
platform::errors::InvalidArgument(
"Expected nranks > 1. But received nranks is %d.", nranks));
PADDLE_ENFORCE_GE(rank, 0,
platform::errors::InvalidArgument(
"Expected rank >= 0. But received rank is %d.", rank));
PADDLE_ENFORCE_LT(
rank, nranks,
platform::errors::InvalidArgument(
"Expected rank < nranks. But received rank is %d, nranks is %d.",
rank, nranks));
PADDLE_ENFORCE_GE(
dev_id, 0,
platform::errors::InvalidArgument(
"Expected dev_id >= 0. But received dev_id is %d.", dev_id));
ncclComm_t comm = nullptr;
PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
......@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
void NCCLCommContext::CreateAllNCCLComms(const std::vector<int>& dev_ids,
int ring_id) {
PADDLE_ENFORCE_GT(dev_ids.size(), 0);
PADDLE_ENFORCE_GT(
dev_ids.size(), 0,
platform::errors::InvalidArgument("Expected the size of dev_ids > 0. But "
"received the size of dev_ids is %d.",
dev_ids.size()));
const int kDevices = dev_ids.size();
ncclComm_t comms[kDevices];
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll(
comms, dev_ids.size(), dev_ids.data()));
PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0);
PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0,
platform::errors::InvalidArgument(
"Expected comm_map_.count(ring_id) = 0. But received "
"comm_map_.count(ring_id) is %d.",
comm_map_.count(ring_id)));
for (size_t i = 0; i < dev_ids.size(); ++i) {
AssignNCCLComm(comms[i], dev_ids.size(), i, dev_ids[i], ring_id);
VLOG(1) << "nccl communicator of rank " << i << " in ring " << ring_id
......
......@@ -78,24 +78,28 @@ class NCCLCommContext {
// retrieve a communicator by the ring id in multiprocessing mode
NCCLComm* Get(int ring_id) const {
PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0,
"comunicator in ring id %d has not been initialized",
ring_id);
PADDLE_ENFORCE_GT(
comm_map_.count(ring_id), 0,
platform::errors::InvalidArgument(
"Comunicator in ring id %d has not been initialized.", ring_id));
PADDLE_ENFORCE_EQ(comm_map_.at(ring_id).size(), 1,
"you should specify a device id to retrieve from "
"multiple communicators");
platform::errors::InvalidArgument(
"One device id should be specified to retrieve from "
"multiple communicators."));
return comm_map_.at(ring_id).begin()->second.get();
}
// retrieve a communicator by the ring id and the device id
NCCLComm* Get(int ring_id, int dev_id) const {
PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0,
"comunicator of ring id %d has not been initialized",
ring_id);
PADDLE_ENFORCE_GT(
comm_map_.count(ring_id), 0,
platform::errors::InvalidArgument(
"Comunicator of ring id %d has not been initialized.", ring_id));
PADDLE_ENFORCE_GT(
comm_map_.at(ring_id).count(dev_id), 0,
"comunicator at device id %d has not been initialized in ring %d",
dev_id, ring_id);
platform::errors::InvalidArgument(
"Comunicator at device id %d has not been initialized in ring %d.",
dev_id, ring_id));
return comm_map_.at(ring_id).at(dev_id).get();
}
......
......@@ -515,7 +515,9 @@ class DeviceContextPool {
explicit DeviceContextPool(const std::vector<platform::Place>& places);
static DeviceContextPool& Instance() {
PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!");
PADDLE_ENFORCE_NOT_NULL(pool,
platform::errors::PreconditionNotMet(
"Need to Create DeviceContextPool firstly!"));
return *pool;
}
......
......@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) {
#ifdef PADDLE_WITH_CUDA
alignment = GpuMinChunkSize();
#else
PADDLE_THROW("Fluid is not compiled with CUDA");
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Fluid is not compiled with CUDA."));
#endif
}
size_t remaining = size % alignment;
......
......@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
static std::thread::id cupti_thread_id(0);
if (cupti_thread_id == std::thread::id(0))
cupti_thread_id = std::this_thread::get_id();
PADDLE_ENFORCE_EQ(std::this_thread::get_id(), cupti_thread_id,
"Only one thread is allowed to call bufferCompleted()");
PADDLE_ENFORCE_EQ(
std::this_thread::get_id(), cupti_thread_id,
platform::errors::PermissionDenied(
"Only one thread is allowed to call bufferCompleted()."));
CUptiResult status;
CUpti_Activity *record = NULL;
if (validSize > 0) {
......
......@@ -58,9 +58,11 @@ bool HasCUDNN() {
}
void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE(cudnn_dso_handle != nullptr,
"Cannot load cudnn shared library. Cannot invoke method %s",
fn_name);
PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle,
platform::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name));
}
#else
bool HasCUDNN() { return true; }
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/platform/stream_callback_manager.h"
#include <utility>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const {
});
});
#if CUDA_VERSION >= 10000
PADDLE_ENFORCE(cudaLaunchHostFunc(stream_, StreamCallbackFunc, func));
PADDLE_ENFORCE_CUDA_SUCCESS(
cudaLaunchHostFunc(stream_, StreamCallbackFunc, func));
#else
PADDLE_ENFORCE(cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
PADDLE_ENFORCE_CUDA_SUCCESS(
cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
#endif
}
void StreamCallbackManager::Wait() const {
PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream_));
{
std::lock_guard<std::mutex> lock(mtx_);
if (last_future_.valid()) {
......
......@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> {
void operator()(const platform::CUDADeviceContext& context, InputIter first,
InputIter last, OutputIter result, UnaryOperation op) {
auto place = context.GetPlace();
PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place.");
PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
platform::errors::PreconditionNotMet(
"The CUDA Transform must be used in GPU place."));
thrust::transform(thrust::cuda::par.on(context.stream()),
details::CastToCUDATransformIterator(first),
details::CastToCUDATransformIterator(last),
......@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> {
InputIter1 last1, InputIter2 first2, OutputIter result,
BinaryOperation op) {
auto place = context.GetPlace();
PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place.");
PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
platform::errors::PreconditionNotMet(
"The CUDA Transform must be used in GPU place."));
thrust::transform(thrust::cuda::par.on(context.stream()),
details::CastToCUDATransformIterator(first1),
details::CastToCUDATransformIterator(last1),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册