fix PADDLE_ENFORCE (#25297)

* fix PADDLE_ENFORCE and refine the description test=develop

fix PADDLE_ENFORCE (#25297)
* fix PADDLE_ENFORCE and refine the description test=develop
fb70682f · GaoWei8 · GitHub · dc17ac91 · fb70682f · fb70682f
8 changed file
--- a/paddle/fluid/platform/collective_helper.cc
+++ b/paddle/fluid/platform/collective_helper.cc
@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm {

 NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
                                          int rank, int dev_id, int ring_id) {
-  PADDLE_ENFORCE_NOT_NULL(nccl_id);
-  PADDLE_ENFORCE_GT(nranks, 1);
-  PADDLE_ENFORCE_GE(rank, 0);
-  PADDLE_ENFORCE_LT(rank, nranks);
-  PADDLE_ENFORCE_GE(dev_id, 0);
+  PADDLE_ENFORCE_NOT_NULL(nccl_id,
+                          platform::errors::InvalidArgument(
+                              "The nccl unique id should not be null."));
+  PADDLE_ENFORCE_GT(
+      nranks, 1,
+      platform::errors::InvalidArgument(
+          "Expected nranks > 1. But received nranks is %d.", nranks));
+  PADDLE_ENFORCE_GE(rank, 0,
+                    platform::errors::InvalidArgument(
+                        "Expected rank >= 0. But received rank is %d.", rank));
+  PADDLE_ENFORCE_LT(
+      rank, nranks,
+      platform::errors::InvalidArgument(
+          "Expected rank < nranks. But received rank is %d, nranks is %d.",
+          rank, nranks));
+  PADDLE_ENFORCE_GE(
+      dev_id, 0,
+      platform::errors::InvalidArgument(
+          "Expected dev_id >= 0. But received dev_id is %d.", dev_id));

  ncclComm_t comm = nullptr;
  PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,

 void NCCLCommContext::CreateAllNCCLComms(const std::vector<int>& dev_ids,
                                         int ring_id) {
-  PADDLE_ENFORCE_GT(dev_ids.size(), 0);
+  PADDLE_ENFORCE_GT(
+      dev_ids.size(), 0,
+      platform::errors::InvalidArgument("Expected the size of dev_ids > 0. But "
+                                        "received the size of dev_ids is %d.",
+                                        dev_ids.size()));

  const int kDevices = dev_ids.size();
  ncclComm_t comms[kDevices];
  PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclCommInitAll(
      comms, dev_ids.size(), dev_ids.data()));

-  PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0);
+  PADDLE_ENFORCE_EQ(comm_map_.count(ring_id), 0,
+                    platform::errors::InvalidArgument(
+                        "Expected comm_map_.count(ring_id) = 0. But received "
+                        "comm_map_.count(ring_id) is %d.",
+                        comm_map_.count(ring_id)));
  for (size_t i = 0; i < dev_ids.size(); ++i) {
    AssignNCCLComm(comms[i], dev_ids.size(), i, dev_ids[i], ring_id);
    VLOG(1) << "nccl communicator of rank " << i << " in ring " << ring_id

--- a/paddle/fluid/platform/collective_helper.h
+++ b/paddle/fluid/platform/collective_helper.h
@@ -78,24 +78,28 @@ class NCCLCommContext {

  // retrieve a communicator by the ring id in multiprocessing mode
  NCCLComm* Get(int ring_id) const {
-    PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0,
-                      "comunicator in ring id %d has not been initialized",
-                      ring_id);
+    PADDLE_ENFORCE_GT(
+        comm_map_.count(ring_id), 0,
+        platform::errors::InvalidArgument(
+            "Comunicator in ring id %d has not been initialized.", ring_id));
    PADDLE_ENFORCE_EQ(comm_map_.at(ring_id).size(), 1,
-                      "you should specify a device id to retrieve from "
-                      "multiple communicators");
+                      platform::errors::InvalidArgument(
+                          "One device id should be specified to retrieve from "
+                          "multiple communicators."));
    return comm_map_.at(ring_id).begin()->second.get();
  }

  // retrieve a communicator by the ring id and the device id
  NCCLComm* Get(int ring_id, int dev_id) const {
-    PADDLE_ENFORCE_GT(comm_map_.count(ring_id), 0,
-                      "comunicator of ring id %d has not been initialized",
-                      ring_id);
+    PADDLE_ENFORCE_GT(
+        comm_map_.count(ring_id), 0,
+        platform::errors::InvalidArgument(
+            "Comunicator of ring id %d has not been initialized.", ring_id));
    PADDLE_ENFORCE_GT(
        comm_map_.at(ring_id).count(dev_id), 0,
-        "comunicator at device id %d has not been initialized in ring %d",
-        dev_id, ring_id);
+        platform::errors::InvalidArgument(
+            "Comunicator at device id %d has not been initialized in ring %d.",
+            dev_id, ring_id));
    return comm_map_.at(ring_id).at(dev_id).get();
  }


--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -515,7 +515,9 @@ class DeviceContextPool {
  explicit DeviceContextPool(const std::vector<platform::Place>& places);

  static DeviceContextPool& Instance() {
-    PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!");
+    PADDLE_ENFORCE_NOT_NULL(pool,
+                            platform::errors::PreconditionNotMet(
+                                "Need to Create DeviceContextPool firstly!"));
    return *pool;
  }


--- a/paddle/fluid/platform/device_memory_aligment.cc
+++ b/paddle/fluid/platform/device_memory_aligment.cc
@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) {
 #ifdef PADDLE_WITH_CUDA
    alignment = GpuMinChunkSize();
 #else
-    PADDLE_THROW("Fluid is not compiled with CUDA");
+    PADDLE_THROW(platform::errors::PreconditionNotMet(
+        "Fluid is not compiled with CUDA."));
 #endif
  }
  size_t remaining = size % alignment;

--- a/paddle/fluid/platform/device_tracer.cc
+++ b/paddle/fluid/platform/device_tracer.cc
@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
  static std::thread::id cupti_thread_id(0);
  if (cupti_thread_id == std::thread::id(0))
    cupti_thread_id = std::this_thread::get_id();
-  PADDLE_ENFORCE_EQ(std::this_thread::get_id(), cupti_thread_id,
-                    "Only one thread is allowed to call bufferCompleted()");
+  PADDLE_ENFORCE_EQ(
+      std::this_thread::get_id(), cupti_thread_id,
+      platform::errors::PermissionDenied(
+          "Only one thread is allowed to call bufferCompleted()."));
  CUptiResult status;
  CUpti_Activity *record = NULL;
  if (validSize > 0) {

--- a/paddle/fluid/platform/dynload/cudnn.cc
+++ b/paddle/fluid/platform/dynload/cudnn.cc
@@ -58,9 +58,11 @@ bool HasCUDNN() {
 }

 void EnforceCUDNNLoaded(const char* fn_name) {
-  PADDLE_ENFORCE(cudnn_dso_handle != nullptr,
-                 "Cannot load cudnn shared library. Cannot invoke method %s",
-                 fn_name);
+  PADDLE_ENFORCE_NOT_NULL(
+      cudnn_dso_handle,
+      platform::errors::PreconditionNotMet(
+          "Cannot load cudnn shared library. Cannot invoke method %s.",
+          fn_name));
 }
 #else
 bool HasCUDNN() { return true; }

--- a/paddle/fluid/platform/stream_callback_manager.cc
+++ b/paddle/fluid/platform/stream_callback_manager.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "paddle/fluid/platform/stream_callback_manager.h"
+#include <utility>
 #include "paddle/fluid/platform/enforce.h"

 namespace paddle {
@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const {
    });
  });
 #if CUDA_VERSION >= 10000
-  PADDLE_ENFORCE(cudaLaunchHostFunc(stream_, StreamCallbackFunc, func));
+  PADDLE_ENFORCE_CUDA_SUCCESS(
+      cudaLaunchHostFunc(stream_, StreamCallbackFunc, func));
 #else
-  PADDLE_ENFORCE(cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
+  PADDLE_ENFORCE_CUDA_SUCCESS(
+      cudaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
 #endif
 }

 void StreamCallbackManager::Wait() const {
-  PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
+  PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream_));
  {
    std::lock_guard<std::mutex> lock(mtx_);
    if (last_future_.valid()) {

--- a/paddle/fluid/platform/transform.h
+++ b/paddle/fluid/platform/transform.h
@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> {
  void operator()(const platform::CUDADeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
    auto place = context.GetPlace();
-    PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place.");
+    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
+                      platform::errors::PreconditionNotMet(
+                          "The CUDA Transform must be used in GPU place."));
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> {
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
    auto place = context.GetPlace();
-    PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place.");
+    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
+                      platform::errors::PreconditionNotMet(
+                          "The CUDA Transform must be used in GPU place."));
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),