From aae41c6fca67be6a090d4f83bdf6160737d15162 Mon Sep 17 00:00:00 2001
From: Pei Yang <peiyang@baidu.com>
Date: Mon, 14 Sep 2020 12:55:22 +0800
Subject: [PATCH] refine error message related to paddle-TRT (#27256)

---
 paddle/fluid/inference/tensorrt/engine.cc     | 84 ++++++++++++++-----
 paddle/fluid/inference/tensorrt/engine.h      | 16 +++-
 .../tensorrt/plugin/elementwise_op_plugin.cu  | 44 ++++++++--
 .../fluid/inference/tensorrt/test_engine.cc   | 16 +++-
 .../inference/tensorrt/trt_int8_calibrator.cc |  5 +-
 .../operators/tensorrt/tensorrt_engine_op.h   | 20 +++--
 6 files changed, 138 insertions(+), 47 deletions(-)
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 22be877493..754979f77a 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -63,11 +63,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
 void TensorRTEngine::FreezeNetwork() {
   freshDeviceId();
   VLOG(3) << "TRT to freeze network";
-  PADDLE_ENFORCE(infer_builder_ != nullptr,
-                 "Call InitNetwork first to initialize network.");
-  PADDLE_ENFORCE_EQ(network() != nullptr, true,
-                    platform::errors::InvalidArgument(
-                        "Call InitNetwork first to initialize network."));
+  PADDLE_ENFORCE_NOT_NULL(infer_builder_,
+                          platform::errors::InvalidArgument(
+                              "Inference builder of TRT is null. Please make "
+                              "sure you call InitNetwork first."));
+  PADDLE_ENFORCE_NOT_NULL(network(),
+                          platform::errors::InvalidArgument(
+                              "Call InitNetwork first to initialize network."));
   // build engine.
   infer_builder_->setMaxBatchSize(max_batch_);
   infer_builder_->setMaxWorkspaceSize(max_workspace_);
@@ -210,7 +212,10 @@ void TensorRTEngine::FreezeNetwork() {
   } else {
     infer_engine_.reset(infer_builder_->buildCudaEngine(*network()));
   }
-  PADDLE_ENFORCE(infer_engine_ != nullptr, "build cuda engine failed!");
+  PADDLE_ENFORCE_NOT_NULL(
+      infer_engine_, platform::errors::Fatal(
+                         "Build TensorRT cuda engine failed! Please recheck "
+                         "you configurations related to paddle-TensorRT."));
 }
 
 nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
@@ -220,8 +225,16 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
                     platform::errors::InvalidArgument(
                         "The TRT network should be initialized first."));
   auto *input = network()->addInput(name.c_str(), dtype, dims);
-  PADDLE_ENFORCE(input, "infer network add input %s failed", name);
-  PADDLE_ENFORCE(input->isNetworkInput());
+  PADDLE_ENFORCE_NOT_NULL(
+      input, platform::errors::InvalidArgument("Adding input %s failed in "
+                                               "TensorRT inference network. "
+                                               "Please recheck your input.",
+                                               name));
+  PADDLE_ENFORCE_EQ(input->isNetworkInput(), true,
+                    platform::errors::InvalidArgument(
+                        "Input %s is not the input of TRT inference network. "
+                        "Please recheck your input.",
+                        name));
   TensorRTEngine::SetITensor(name, input);
   return input;
 }
@@ -230,31 +243,53 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
                                    const std::string &name) {
   auto *output = layer->getOutput(offset);
   SetITensor(name, output);
-  PADDLE_ENFORCE(output != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(
+      output, platform::errors::InvalidArgument(
+                  "The output %s of TRT engine should not be null.", name));
   output->setName(name.c_str());
-  PADDLE_ENFORCE(!output->isNetworkInput());
+  PADDLE_ENFORCE_EQ(output->isNetworkInput(), false,
+                    platform::errors::InvalidArgument(
+                        "The output %s of TRT engine should not be the input "
+                        "of the network at the same time.",
+                        name));
   network()->markOutput(*output);
-  PADDLE_ENFORCE(output->isNetworkOutput());
+  PADDLE_ENFORCE_EQ(
+      output->isNetworkOutput(), true,
+      platform::errors::InvalidArgument(
+          "The output %s of TRT engine should be the output of the network.",
+          name));
 }
 
 void TensorRTEngine::DeclareOutput(const std::string &name) {
   auto *output = TensorRTEngine::GetITensor(name);
-  PADDLE_ENFORCE(output != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(
+      output, platform::errors::InvalidArgument(
+                  "The output %s of TRT engine should not be null.", name));
   output->setName(name.c_str());
-  PADDLE_ENFORCE(!output->isNetworkInput());
+  PADDLE_ENFORCE_EQ(output->isNetworkInput(), false,
+                    platform::errors::InvalidArgument(
+                        "The output %s of TRT engine should not be the input "
+                        "of the network at the same time.",
+                        name));
   network()->markOutput(*output);
 }
 
 void TensorRTEngine::SetITensor(const std::string &name,
                                 nvinfer1::ITensor *tensor) {
-  PADDLE_ENFORCE(tensor != nullptr);
-  PADDLE_ENFORCE_EQ(0, itensor_map_.count(name), "duplicate ITensor name %s",
-                    name);
+  PADDLE_ENFORCE_NOT_NULL(
+      tensor, platform::errors::InvalidArgument(
+                  "Tensor named %s of TRT engine should not be null.", name));
+  PADDLE_ENFORCE_EQ(
+      0, itensor_map_.count(name),
+      platform::errors::InvalidArgument(
+          "Tensor named %s of TRT engine should not be duplicated", name));
   itensor_map_[name] = tensor;
 }
 
 nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
-  PADDLE_ENFORCE(itensor_map_.count(name), "no ITensor %s", name);
+  PADDLE_ENFORCE_EQ(itensor_map_.count(name), true,
+                    platform::errors::NotFound(
+                        "Tensor named %s is not found in TRT engine", name));
   return itensor_map_[name];
 }
 
@@ -271,11 +306,11 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
   std::string splitter = "__";
   std::string name_with_suffix = name + splitter + name_suffix;
   platform::CPUPlace cpu_place;
-  PADDLE_ENFORCE_EQ(
-      weight_map.count(name_with_suffix), 0,
-      "During TRT Op converter: We set weight %s with the same name "
-      "twice into the weight_map",
-      name_with_suffix);
+  PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0,
+                    platform::errors::AlreadyExists(
+                        "The weight named %s is set into the weight map "
+                        "twice in TRT OP converter.",
+                        name_with_suffix));
   weight_map[name_with_suffix].reset(new framework::Tensor());
   weight_map[name_with_suffix]->Resize(weight_tensor->dims());
   TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get());
@@ -297,7 +332,10 @@ nvinfer1::IPluginLayer *TensorRTEngine::AddPlugin(
 void TensorRTEngine::freshDeviceId() {
   int count;
   cudaGetDeviceCount(&count);
-  PADDLE_ENFORCE_LT(device_id_, count);
+  PADDLE_ENFORCE_LT(device_id_, count,
+                    platform::errors::OutOfRange(
+                        "Device id %d exceeds the current device count: %d.",
+                        device_id_, count));
   cudaSetDevice(device_id_);
 }
 
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 1a3413657c..a85ed483c1 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -196,8 +196,10 @@ class TensorRTEngine {
   }
 
   nvinfer1::IHostMemory* Serialize() {
-    PADDLE_ENFORCE(infer_engine_ != nullptr,
-                   "You should build engine first and then serialize");
+    PADDLE_ENFORCE_NOT_NULL(
+        infer_engine_,
+        platform::errors::InvalidArgument(
+            "The TensorRT engine must be built first before serialization"));
     ihost_memory_.reset(infer_engine_->serialize());
     return ihost_memory_.get();
   }
@@ -222,8 +224,14 @@ class TensorRTEngine {
           engine_serialized_data.c_str(), engine_serialized_data.size(),
           &inference::Singleton<plugin::PluginFactoryTensorRT>::Global()));
     }
-    PADDLE_ENFORCE(infer_engine_ != nullptr,
-                   "build cuda engine failed when deserialize engine info.!");
+    PADDLE_ENFORCE_NOT_NULL(
+        infer_engine_,
+        platform::errors::Fatal(
+            "Building TRT cuda engine failed when deserializing engine info. "
+            "Please check:\n1. Your TRT serialization is generated and loaded "
+            "on the same GPU architecture;\n2. The Paddle Inference version of "
+            "generating serialization file and doing inference are "
+            "consistent."));
   }
 
   void SetRuntimeBatch(size_t batch_size);
diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
index 0ec803fe64..457d9dd873 100644
--- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
@@ -56,14 +56,27 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data,
 
 nvinfer1::Dims ElementWisePlugin::getOutputDimensions(
     int index, const nvinfer1::Dims *input_dims, int num_inputs) {
-  PADDLE_ENFORCE_EQ(index, 0);
-  PADDLE_ENFORCE_EQ(num_inputs, 2);
-  PADDLE_ENFORCE_NOT_NULL(input_dims);
+  PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument(
+                                  "There is only one output in TRT elementwise "
+                                  "op plugin, but got output index: %d.",
+                                  index));
+  PADDLE_ENFORCE_EQ(num_inputs, 2, platform::errors::InvalidArgument(
+                                       "There are 2 inputs in TRT elementwise "
+                                       "op plugin, but got input number: %d.",
+                                       num_inputs));
+  PADDLE_ENFORCE_NOT_NULL(
+      input_dims,
+      platform::errors::InvalidArgument(
+          "The input dims of TRT elementwise op plugin should not be null."));
   return input_dims[0];
 }
 
 int ElementWisePlugin::initialize() {
-  PADDLE_ENFORCE_GT(dims_y_.nbDims, 0);
+  PADDLE_ENFORCE_GT(dims_y_.nbDims, 0,
+                    platform::errors::InvalidArgument(
+                        "The dimension of input Y of TRT elementwise op plugin "
+                        "should be greater than 0, but got %d.",
+                        dims_y_.nbDims));
 
   axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_;
   int trimed_nb_dims = dims_y_.nbDims;
@@ -74,8 +87,18 @@ int ElementWisePlugin::initialize() {
   }
   dims_y_.nbDims = trimed_nb_dims;
 
-  PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_);
-  PADDLE_ENFORCE_LT(axis_, dims_x_.nbDims);
+  PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_,
+                    platform::errors::InvalidArgument(
+                        "We expect [number of x dims] >= [number of y dims + "
+                        "axis] in TRT elementwise op plugin, but got [number "
+                        "of x dims] = %d, [number of y dims + axis] = %d.",
+                        dims_x_.nbDims, dims_y_.nbDims + axis_));
+  PADDLE_ENFORCE_LT(
+      axis_, dims_x_.nbDims,
+      platform::errors::InvalidArgument("We expect [axis] < [number of x dims] "
+                                        "in TRT elementwise op plugin, but got "
+                                        "[axis] = %d, [number of x dims] = %d.",
+                                        axis_, dims_x_.nbDims));
 
   prev_size_ = 1;
   midd_size_ = 1;
@@ -86,7 +109,9 @@ int ElementWisePlugin::initialize() {
 
   for (int i = 0; i < dims_y_.nbDims; ++i) {
     PADDLE_ENFORCE_EQ(dims_x_.d[i + axis_], dims_y_.d[i],
-                      "Broadcast dimension mismatch.");
+                      platform::errors::InvalidArgument(
+                          "Broadcast dimension mismatch. The dims of input Y "
+                          "should be a subsequence of X."));
     midd_size_ *= dims_y_.d[i];
   }
 
@@ -221,7 +246,10 @@ int ElementwisePluginDynamic::enqueue(
     elementwise_kernel<<<block, thread, 0, stream>>>(
         num, x, y, out, prev_size, midd_size, post_size, details::Mul<float>());
   } else {
-    PADDLE_THROW("Not implemented.");
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Paddle-TRT only support elementwise operation: {add, mul} currently, "
+        "but got %s.",
+        type_));
   }
 
   return cudaGetLastError() != cudaSuccess;
diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc
index a03dd45db0..72962c733e 100644
--- a/paddle/fluid/inference/tensorrt/test_engine.cc
+++ b/paddle/fluid/inference/tensorrt/test_engine.cc
@@ -74,7 +74,9 @@ TEST_F(TensorRTEngineTest, add_layer) {
                                   nvinfer1::DimsCHW{1, 1, 1});
   auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size,
                                         weight.get(), bias.get());
-  PADDLE_ENFORCE(fc_layer != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(fc_layer,
+                          platform::errors::InvalidArgument(
+                              "TRT fully connected layer building failed."));
 
   engine_->DeclareOutput(fc_layer, 0, "y");
   LOG(INFO) << "freeze network";
@@ -116,7 +118,9 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
                                   nvinfer1::DimsCHW{1, 2, 1});
   auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
                                         weight.get(), bias.get());
-  PADDLE_ENFORCE(fc_layer != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(fc_layer,
+                          platform::errors::InvalidArgument(
+                              "TRT fully connected layer building failed."));
 
   engine_->DeclareOutput(fc_layer, 0, "y");
   engine_->FreezeNetwork();
@@ -160,7 +164,9 @@ TEST_F(TensorRTEngineTest, test_conv2d) {
   auto *conv_layer =
       TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3},
                            weight.get(), bias.get());
-  PADDLE_ENFORCE(conv_layer != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(conv_layer,
+                          platform::errors::InvalidArgument(
+                              "TRT convolution layer building failed."));
   conv_layer->setStride(nvinfer1::DimsHW{1, 1});
   conv_layer->setPadding(nvinfer1::DimsHW{1, 1});
 
@@ -199,7 +205,9 @@ TEST_F(TensorRTEngineTest, test_pool2d) {
   auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t,
                                           nvinfer1::DimsHW{2, 2});
 
-  PADDLE_ENFORCE(pool_layer != nullptr);
+  PADDLE_ENFORCE_NOT_NULL(
+      pool_layer,
+      platform::errors::InvalidArgument("TRT pooling layer building failed."));
   pool_layer->setStride(nvinfer1::DimsHW{1, 1});
   pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
 
diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
index 34b7072b2e..743f7740e5 100644
--- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
+++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
@@ -83,9 +83,8 @@ bool TRTInt8Calibrator::setBatch(
           engine_name_, it.first));
     }
     const auto& d = dataptr->second;
-    PADDLE_ENFORCE(
-        cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice),
-        "Fail to cudaMemcpy %s for %s", engine_name_, it.first);
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+        cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice));
   }
 
   data_is_set_ = true;
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
index cc6ee7b19e..9cfe47da5d 100644
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -208,8 +208,11 @@ class TensorRTEngineOp : public framework::OperatorBase {
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx).stream();
 
-    PADDLE_ENFORCE_EQ(input_names_.empty(), false,
-                      "should pass at least one input");
+    PADDLE_ENFORCE_EQ(
+        input_names_.empty(), false,
+        platform::errors::PreconditionNotMet(
+            "TensorRT engine needs at least one input, but no input is found. "
+            "Please check if you set the input correctly."));
 
     std::vector<std::string> output_maps =
         Attr<std::vector<std::string>>("output_name_mapping");
@@ -295,12 +298,19 @@ class TensorRTEngineOp : public framework::OperatorBase {
 #endif
       }
       auto *fluid_v = scope.FindVar(y);
-      PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y);
+      PADDLE_ENFORCE_NOT_NULL(
+          fluid_v,
+          platform::errors::NotFound(
+              "Output variable %s is not found in TensorRT subgraph.", y));
       auto *fluid_t = fluid_v->GetMutable<framework::LoDTensor>();
       fluid_t->Resize(framework::make_ddim(ddim));
 
-      PADDLE_ENFORCE(bind_index < num_bindings,
-                     "The bind index should be less than num_bindings");
+      PADDLE_ENFORCE_LT(bind_index, num_bindings,
+                        platform::errors::InvalidArgument(
+                            "The binding index in TRT engine should be less "
+                            "than the number of bindings, but got binding "
+                            "index = %d, number of bindings = %d.",
+                            bind_index, num_bindings));
       buffers[bind_index] = static_cast<void *>(fluid_t->mutable_data<float>(
           BOOST_GET_CONST(platform::CUDAPlace, dev_place)));
 
-- 
GitLab