From aae41c6fca67be6a090d4f83bdf6160737d15162 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Mon, 14 Sep 2020 12:55:22 +0800 Subject: [PATCH] refine error message related to paddle-TRT (#27256) --- paddle/fluid/inference/tensorrt/engine.cc | 84 ++++++++++++++----- paddle/fluid/inference/tensorrt/engine.h | 16 +++- .../tensorrt/plugin/elementwise_op_plugin.cu | 44 ++++++++-- .../fluid/inference/tensorrt/test_engine.cc | 16 +++- .../inference/tensorrt/trt_int8_calibrator.cc | 5 +- .../operators/tensorrt/tensorrt_engine_op.h | 20 +++-- 6 files changed, 138 insertions(+), 47 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 22be8774932..754979f77ac 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -63,11 +63,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector *buffers, void TensorRTEngine::FreezeNetwork() { freshDeviceId(); VLOG(3) << "TRT to freeze network"; - PADDLE_ENFORCE(infer_builder_ != nullptr, - "Call InitNetwork first to initialize network."); - PADDLE_ENFORCE_EQ(network() != nullptr, true, - platform::errors::InvalidArgument( - "Call InitNetwork first to initialize network.")); + PADDLE_ENFORCE_NOT_NULL(infer_builder_, + platform::errors::InvalidArgument( + "Inference builder of TRT is null. Please make " + "sure you call InitNetwork first.")); + PADDLE_ENFORCE_NOT_NULL(network(), + platform::errors::InvalidArgument( + "Call InitNetwork first to initialize network.")); // build engine. infer_builder_->setMaxBatchSize(max_batch_); infer_builder_->setMaxWorkspaceSize(max_workspace_); @@ -210,7 +212,10 @@ void TensorRTEngine::FreezeNetwork() { } else { infer_engine_.reset(infer_builder_->buildCudaEngine(*network())); } - PADDLE_ENFORCE(infer_engine_ != nullptr, "build cuda engine failed!"); + PADDLE_ENFORCE_NOT_NULL( + infer_engine_, platform::errors::Fatal( + "Build TensorRT cuda engine failed! Please recheck " + "you configurations related to paddle-TensorRT.")); } nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name, @@ -220,8 +225,16 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name, platform::errors::InvalidArgument( "The TRT network should be initialized first.")); auto *input = network()->addInput(name.c_str(), dtype, dims); - PADDLE_ENFORCE(input, "infer network add input %s failed", name); - PADDLE_ENFORCE(input->isNetworkInput()); + PADDLE_ENFORCE_NOT_NULL( + input, platform::errors::InvalidArgument("Adding input %s failed in " + "TensorRT inference network. " + "Please recheck your input.", + name)); + PADDLE_ENFORCE_EQ(input->isNetworkInput(), true, + platform::errors::InvalidArgument( + "Input %s is not the input of TRT inference network. " + "Please recheck your input.", + name)); TensorRTEngine::SetITensor(name, input); return input; } @@ -230,31 +243,53 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset, const std::string &name) { auto *output = layer->getOutput(offset); SetITensor(name, output); - PADDLE_ENFORCE(output != nullptr); + PADDLE_ENFORCE_NOT_NULL( + output, platform::errors::InvalidArgument( + "The output %s of TRT engine should not be null.", name)); output->setName(name.c_str()); - PADDLE_ENFORCE(!output->isNetworkInput()); + PADDLE_ENFORCE_EQ(output->isNetworkInput(), false, + platform::errors::InvalidArgument( + "The output %s of TRT engine should not be the input " + "of the network at the same time.", + name)); network()->markOutput(*output); - PADDLE_ENFORCE(output->isNetworkOutput()); + PADDLE_ENFORCE_EQ( + output->isNetworkOutput(), true, + platform::errors::InvalidArgument( + "The output %s of TRT engine should be the output of the network.", + name)); } void TensorRTEngine::DeclareOutput(const std::string &name) { auto *output = TensorRTEngine::GetITensor(name); - PADDLE_ENFORCE(output != nullptr); + PADDLE_ENFORCE_NOT_NULL( + output, platform::errors::InvalidArgument( + "The output %s of TRT engine should not be null.", name)); output->setName(name.c_str()); - PADDLE_ENFORCE(!output->isNetworkInput()); + PADDLE_ENFORCE_EQ(output->isNetworkInput(), false, + platform::errors::InvalidArgument( + "The output %s of TRT engine should not be the input " + "of the network at the same time.", + name)); network()->markOutput(*output); } void TensorRTEngine::SetITensor(const std::string &name, nvinfer1::ITensor *tensor) { - PADDLE_ENFORCE(tensor != nullptr); - PADDLE_ENFORCE_EQ(0, itensor_map_.count(name), "duplicate ITensor name %s", - name); + PADDLE_ENFORCE_NOT_NULL( + tensor, platform::errors::InvalidArgument( + "Tensor named %s of TRT engine should not be null.", name)); + PADDLE_ENFORCE_EQ( + 0, itensor_map_.count(name), + platform::errors::InvalidArgument( + "Tensor named %s of TRT engine should not be duplicated", name)); itensor_map_[name] = tensor; } nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) { - PADDLE_ENFORCE(itensor_map_.count(name), "no ITensor %s", name); + PADDLE_ENFORCE_EQ(itensor_map_.count(name), true, + platform::errors::NotFound( + "Tensor named %s is not found in TRT engine", name)); return itensor_map_[name]; } @@ -271,11 +306,11 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name, std::string splitter = "__"; std::string name_with_suffix = name + splitter + name_suffix; platform::CPUPlace cpu_place; - PADDLE_ENFORCE_EQ( - weight_map.count(name_with_suffix), 0, - "During TRT Op converter: We set weight %s with the same name " - "twice into the weight_map", - name_with_suffix); + PADDLE_ENFORCE_EQ(weight_map.count(name_with_suffix), 0, + platform::errors::AlreadyExists( + "The weight named %s is set into the weight map " + "twice in TRT OP converter.", + name_with_suffix)); weight_map[name_with_suffix].reset(new framework::Tensor()); weight_map[name_with_suffix]->Resize(weight_tensor->dims()); TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get()); @@ -297,7 +332,10 @@ nvinfer1::IPluginLayer *TensorRTEngine::AddPlugin( void TensorRTEngine::freshDeviceId() { int count; cudaGetDeviceCount(&count); - PADDLE_ENFORCE_LT(device_id_, count); + PADDLE_ENFORCE_LT(device_id_, count, + platform::errors::OutOfRange( + "Device id %d exceeds the current device count: %d.", + device_id_, count)); cudaSetDevice(device_id_); } diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 1a3413657ce..a85ed483c1d 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -196,8 +196,10 @@ class TensorRTEngine { } nvinfer1::IHostMemory* Serialize() { - PADDLE_ENFORCE(infer_engine_ != nullptr, - "You should build engine first and then serialize"); + PADDLE_ENFORCE_NOT_NULL( + infer_engine_, + platform::errors::InvalidArgument( + "The TensorRT engine must be built first before serialization")); ihost_memory_.reset(infer_engine_->serialize()); return ihost_memory_.get(); } @@ -222,8 +224,14 @@ class TensorRTEngine { engine_serialized_data.c_str(), engine_serialized_data.size(), &inference::Singleton::Global())); } - PADDLE_ENFORCE(infer_engine_ != nullptr, - "build cuda engine failed when deserialize engine info.!"); + PADDLE_ENFORCE_NOT_NULL( + infer_engine_, + platform::errors::Fatal( + "Building TRT cuda engine failed when deserializing engine info. " + "Please check:\n1. Your TRT serialization is generated and loaded " + "on the same GPU architecture;\n2. The Paddle Inference version of " + "generating serialization file and doing inference are " + "consistent.")); } void SetRuntimeBatch(size_t batch_size); diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu index 0ec803fe64a..457d9dd8737 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu @@ -56,14 +56,27 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data, nvinfer1::Dims ElementWisePlugin::getOutputDimensions( int index, const nvinfer1::Dims *input_dims, int num_inputs) { - PADDLE_ENFORCE_EQ(index, 0); - PADDLE_ENFORCE_EQ(num_inputs, 2); - PADDLE_ENFORCE_NOT_NULL(input_dims); + PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( + "There is only one output in TRT elementwise " + "op plugin, but got output index: %d.", + index)); + PADDLE_ENFORCE_EQ(num_inputs, 2, platform::errors::InvalidArgument( + "There are 2 inputs in TRT elementwise " + "op plugin, but got input number: %d.", + num_inputs)); + PADDLE_ENFORCE_NOT_NULL( + input_dims, + platform::errors::InvalidArgument( + "The input dims of TRT elementwise op plugin should not be null.")); return input_dims[0]; } int ElementWisePlugin::initialize() { - PADDLE_ENFORCE_GT(dims_y_.nbDims, 0); + PADDLE_ENFORCE_GT(dims_y_.nbDims, 0, + platform::errors::InvalidArgument( + "The dimension of input Y of TRT elementwise op plugin " + "should be greater than 0, but got %d.", + dims_y_.nbDims)); axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_; int trimed_nb_dims = dims_y_.nbDims; @@ -74,8 +87,18 @@ int ElementWisePlugin::initialize() { } dims_y_.nbDims = trimed_nb_dims; - PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_); - PADDLE_ENFORCE_LT(axis_, dims_x_.nbDims); + PADDLE_ENFORCE_GE(dims_x_.nbDims, dims_y_.nbDims + axis_, + platform::errors::InvalidArgument( + "We expect [number of x dims] >= [number of y dims + " + "axis] in TRT elementwise op plugin, but got [number " + "of x dims] = %d, [number of y dims + axis] = %d.", + dims_x_.nbDims, dims_y_.nbDims + axis_)); + PADDLE_ENFORCE_LT( + axis_, dims_x_.nbDims, + platform::errors::InvalidArgument("We expect [axis] < [number of x dims] " + "in TRT elementwise op plugin, but got " + "[axis] = %d, [number of x dims] = %d.", + axis_, dims_x_.nbDims)); prev_size_ = 1; midd_size_ = 1; @@ -86,7 +109,9 @@ int ElementWisePlugin::initialize() { for (int i = 0; i < dims_y_.nbDims; ++i) { PADDLE_ENFORCE_EQ(dims_x_.d[i + axis_], dims_y_.d[i], - "Broadcast dimension mismatch."); + platform::errors::InvalidArgument( + "Broadcast dimension mismatch. The dims of input Y " + "should be a subsequence of X.")); midd_size_ *= dims_y_.d[i]; } @@ -221,7 +246,10 @@ int ElementwisePluginDynamic::enqueue( elementwise_kernel<<>>( num, x, y, out, prev_size, midd_size, post_size, details::Mul()); } else { - PADDLE_THROW("Not implemented."); + PADDLE_THROW(platform::errors::Unimplemented( + "Paddle-TRT only support elementwise operation: {add, mul} currently, " + "but got %s.", + type_)); } return cudaGetLastError() != cudaSuccess; diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index a03dd45db0f..72962c733ec 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -74,7 +74,9 @@ TEST_F(TensorRTEngineTest, add_layer) { nvinfer1::DimsCHW{1, 1, 1}); auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, size, weight.get(), bias.get()); - PADDLE_ENFORCE(fc_layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(fc_layer, + platform::errors::InvalidArgument( + "TRT fully connected layer building failed.")); engine_->DeclareOutput(fc_layer, 0, "y"); LOG(INFO) << "freeze network"; @@ -116,7 +118,9 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) { nvinfer1::DimsCHW{1, 2, 1}); auto *fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2, weight.get(), bias.get()); - PADDLE_ENFORCE(fc_layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(fc_layer, + platform::errors::InvalidArgument( + "TRT fully connected layer building failed.")); engine_->DeclareOutput(fc_layer, 0, "y"); engine_->FreezeNetwork(); @@ -160,7 +164,9 @@ TEST_F(TensorRTEngineTest, test_conv2d) { auto *conv_layer = TRT_ENGINE_ADD_LAYER(engine_, Convolution, *x, 1, nvinfer1::DimsHW{3, 3}, weight.get(), bias.get()); - PADDLE_ENFORCE(conv_layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(conv_layer, + platform::errors::InvalidArgument( + "TRT convolution layer building failed.")); conv_layer->setStride(nvinfer1::DimsHW{1, 1}); conv_layer->setPadding(nvinfer1::DimsHW{1, 1}); @@ -199,7 +205,9 @@ TEST_F(TensorRTEngineTest, test_pool2d) { auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *x, pool_t, nvinfer1::DimsHW{2, 2}); - PADDLE_ENFORCE(pool_layer != nullptr); + PADDLE_ENFORCE_NOT_NULL( + pool_layer, + platform::errors::InvalidArgument("TRT pooling layer building failed.")); pool_layer->setStride(nvinfer1::DimsHW{1, 1}); pool_layer->setPadding(nvinfer1::DimsHW{0, 0}); diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc index 34b7072b2ee..743f7740e5f 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc @@ -83,9 +83,8 @@ bool TRTInt8Calibrator::setBatch( engine_name_, it.first)); } const auto& d = dataptr->second; - PADDLE_ENFORCE( - cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice), - "Fail to cudaMemcpy %s for %s", engine_name_, it.first); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice)); } data_is_set_ = true; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index cc6ee7b19ea..9cfe47da5db 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -208,8 +208,11 @@ class TensorRTEngineOp : public framework::OperatorBase { auto stream = reinterpret_cast(dev_ctx).stream(); - PADDLE_ENFORCE_EQ(input_names_.empty(), false, - "should pass at least one input"); + PADDLE_ENFORCE_EQ( + input_names_.empty(), false, + platform::errors::PreconditionNotMet( + "TensorRT engine needs at least one input, but no input is found. " + "Please check if you set the input correctly.")); std::vector output_maps = Attr>("output_name_mapping"); @@ -295,12 +298,19 @@ class TensorRTEngineOp : public framework::OperatorBase { #endif } auto *fluid_v = scope.FindVar(y); - PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); + PADDLE_ENFORCE_NOT_NULL( + fluid_v, + platform::errors::NotFound( + "Output variable %s is not found in TensorRT subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); fluid_t->Resize(framework::make_ddim(ddim)); - PADDLE_ENFORCE(bind_index < num_bindings, - "The bind index should be less than num_bindings"); + PADDLE_ENFORCE_LT(bind_index, num_bindings, + platform::errors::InvalidArgument( + "The binding index in TRT engine should be less " + "than the number of bindings, but got binding " + "index = %d, number of bindings = %d.", + bind_index, num_bindings)); buffers[bind_index] = static_cast(fluid_t->mutable_data( BOOST_GET_CONST(platform::CUDAPlace, dev_place))); -- GitLab