未验证 提交 f4131f9f 编写于 作者: P Pei Yang 提交者: GitHub

Refine error message of leaky_relu, tensorrt_engine, split, prelu op converter (#23661) (#23909)

上级 ca1544a3
...@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter { ...@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
int input_num = op_desc.Input("X").size(); size_t input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1); PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid number of TRT leaky_relu op converter "
"inputs. Expected 1, but received %d",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output // Get output
size_t output_num = op_desc.Output("Out").size(); size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1); PADDLE_ENFORCE_EQ(output_num, 1UL,
platform::errors::InvalidArgument(
"Invalid number of TRT leaky_relu op converter "
"outputs. Expected 1, but received %d",
output_num));
// Get attrs // Get attrs
float alpha = boost::get<float>(op_desc.GetAttr("alpha")); float alpha = boost::get<float>(op_desc.GetAttr("alpha"));
nvinfer1::ILayer* output_layer = nullptr; nvinfer1::ILayer* output_layer = nullptr;
...@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter { ...@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter {
auto* scale_layer = TRT_ENGINE_ADD_LAYER( auto* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, shift.get(), engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, shift.get(),
scale.get(), power.get()); scale.get(), power.get());
PADDLE_ENFORCE(nullptr != scale_layer); PADDLE_ENFORCE_NOT_NULL(
scale_layer, platform::errors::InvalidArgument(
"Invalid scale layer in leaky_relu TRT op converter. "
"The scale layer should not be null."));
// y_relu = (x > 0) : x : 0 // y_relu = (x > 0) : x : 0
auto* relu_layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, *input, auto* relu_layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, *input,
nvinfer1::ActivationType::kRELU); nvinfer1::ActivationType::kRELU);
PADDLE_ENFORCE(nullptr != relu_layer); PADDLE_ENFORCE_NOT_NULL(
relu_layer, platform::errors::InvalidArgument(
"Invalid relu layer in leaky_relu TRT op converter. "
"The relu layer should not be null."));
// //
TensorRTEngine::Weight sub_scale{nvinfer1::DataType::kFLOAT, &alpha_data[1], TensorRTEngine::Weight sub_scale{nvinfer1::DataType::kFLOAT, &alpha_data[1],
1}; 1};
...@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter { ...@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER(engine_, Scale, *(relu_layer->getOutput(0)), TRT_ENGINE_ADD_LAYER(engine_, Scale, *(relu_layer->getOutput(0)),
nvinfer1::ScaleMode::kUNIFORM, shift.get(), nvinfer1::ScaleMode::kUNIFORM, shift.get(),
sub_scale.get(), power.get()); sub_scale.get(), power.get());
PADDLE_ENFORCE(nullptr != scale_relu_layer); PADDLE_ENFORCE_NOT_NULL(
scale_relu_layer,
platform::errors::InvalidArgument(
"Invalid scale_relu layer in leaky_relu TRT op converter. The "
"scale_relu layer should not be null."));
output_layer = output_layer =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *(scale_layer->getOutput(0)), TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *(scale_layer->getOutput(0)),
*(scale_relu_layer->getOutput(0)), *(scale_relu_layer->getOutput(0)),
nvinfer1::ElementWiseOperation::kSUM); nvinfer1::ElementWiseOperation::kSUM);
PADDLE_ENFORCE(nullptr != output_layer); PADDLE_ENFORCE_NOT_NULL(
output_layer, platform::errors::InvalidArgument(
"Invalid output layer in leaky_relu TRT op "
"converter. The output layer should not be null."));
// keep alpha tensor to avoid release it's memory // keep alpha tensor to avoid release it's memory
std::string alpha_name = op_desc.Output("Out")[0] + "_alpha"; std::string alpha_name = op_desc.Output("Out")[0] + "_alpha";
PADDLE_ENFORCE(engine_->weight_map.find(alpha_name) == bool alpha_not_in_weight_map =
engine_->weight_map.end()); (engine_->weight_map.find(alpha_name) == engine_->weight_map.end());
PADDLE_ENFORCE_EQ(alpha_not_in_weight_map, true,
platform::errors::InvalidArgument(
"The name of parameter alpha in leaky_relu TRT op "
"converter is already "
"found in the weight map. The same weight cannot be "
"set twice. Please check if it is already set."));
engine_->SetWeights(alpha_name, std::move(alpha_tensor)); engine_->SetWeights(alpha_name, std::move(alpha_tensor));
#endif #endif
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
......
...@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter { ...@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
int input_num = op_desc.Input("X").size(); size_t input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1); PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid input X's size of prelu TRT converter. "
"Expected 1, received %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output // Get output
size_t output_num = op_desc.Output("Out").size(); size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1); PADDLE_ENFORCE_EQ(output_num, 1UL,
platform::errors::InvalidArgument(
"Invalid output Out's size of prelu TRT converter. "
"Expected 1, received %d.",
output_num));
// Get attrs // Get attrs
std::string mode = boost::get<std::string>(op_desc.GetAttr("mode")); std::string mode = boost::get<std::string>(op_desc.GetAttr("mode"));
// //
auto* alpha_var = scope.FindVar(op_desc.Input("Alpha")[0]); auto* alpha_var = scope.FindVar(op_desc.Input("Alpha")[0]);
PADDLE_ENFORCE_NOT_NULL(alpha_var); PADDLE_ENFORCE_NOT_NULL(
alpha_var, platform::errors::NotFound(
"Variable Alpha of prelu TRT converter is not found."));
auto* alpha_tensor = alpha_var->GetMutable<framework::LoDTensor>(); auto* alpha_tensor = alpha_var->GetMutable<framework::LoDTensor>();
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
......
...@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter { ...@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter {
// Declare inputs // Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
auto input_dims = input->getDimensions(); auto input_dims = input->getDimensions();
int input_num = op_desc.Input("X").size(); size_t input_num = op_desc.Input("X").size();
size_t output_num = op_desc.Output("Out").size(); size_t output_num = op_desc.Output("Out").size();
// Get Attrs // Get Attrs
PADDLE_ENFORCE(input_num == 1); PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid input X's size of split TRT converter. "
"Expected 1, received %d.",
input_num));
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
// split on batch is not supported in TensorRT // split on batch is not supported in TensorRT
PADDLE_ENFORCE(axis != 0); PADDLE_ENFORCE_NE(
axis, 0,
platform::errors::InvalidArgument(
"Invalid split axis. Split on batch is not supported in TensorRT"));
std::vector<int> output_lengths = std::vector<int> output_lengths =
boost::get<std::vector<int>>(op_desc.GetAttr("sections")); boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
...@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter { ...@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter {
"The (%d) dim of input should not be -1", axis)); "The (%d) dim of input should not be -1", axis));
if (num > 0) { if (num > 0) {
int64_t in_axis_dim = input_dims.d[axis]; int64_t in_axis_dim = input_dims.d[axis];
PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, PADDLE_ENFORCE_EQ(
"Tensor split does not result" in_axis_dim % num, 0,
" in an equal division"); platform::errors::InvalidArgument(
"Invalid number to split. Tensor split does not result"
" in an equal division of dimensions. Axis dim = %d %% num = %d "
"!= 0",
in_axis_dim, num));
size_t out_axis_dim = in_axis_dim / num; size_t out_axis_dim = in_axis_dim / num;
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
output_lengths.push_back(out_axis_dim); output_lengths.push_back(out_axis_dim);
......
...@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) { ...@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) {
nvinfer1::Dims SplitPlugin::getOutputDimensions( nvinfer1::Dims SplitPlugin::getOutputDimensions(
int index, const nvinfer1::Dims* input_dims, int num_inputs) { int index, const nvinfer1::Dims* input_dims, int num_inputs) {
PADDLE_ENFORCE_EQ(num_inputs, 1); PADDLE_ENFORCE_EQ(num_inputs, 1,
PADDLE_ENFORCE_LT(index, this->getNbOutputs()); platform::errors::InvalidArgument(
"Invalid number of inputs of split TRT plugin. "
"Expected 1, received %d.",
num_inputs));
PADDLE_ENFORCE_LT(
index, this->getNbOutputs(),
platform::errors::InvalidArgument(
"Index of output should be less than the total number of outputs in "
"split TensorRT plugin. Received index = %d >= total outputs = %d",
index, this->getNbOutputs()));
nvinfer1::Dims output_dims = input_dims[0]; nvinfer1::Dims output_dims = input_dims[0];
output_dims.d[axis_] = output_length_.at(index); output_dims.d[axis_] = output_length_.at(index);
...@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions( ...@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
} }
int SplitPlugin::initialize() { int SplitPlugin::initialize() {
PADDLE_ENFORCE_LE(axis_, nvinfer1::Dims::MAX_DIMS); PADDLE_ENFORCE_LE(axis_, nvinfer1::Dims::MAX_DIMS,
platform::errors::InvalidArgument(
"Axis dimension exceeds max dimension in TensorRT. "
"Received axis = %d > MAX_DIMS = %d",
axis_, nvinfer1::Dims::MAX_DIMS));
// notice input dims is [C, H, W] // notice input dims is [C, H, W]
nvinfer1::Dims dims = this->getInputDims(0); nvinfer1::Dims dims = this->getInputDims(0);
outer_rows_ = 1; outer_rows_ = 1;
...@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, ...@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
float const* input_ptr = reinterpret_cast<float const*>(inputs[0]); float const* input_ptr = reinterpret_cast<float const*>(inputs[0]);
float* const* h_odatas = reinterpret_cast<float* const*>(outputs); float* const* h_odatas = reinterpret_cast<float* const*>(outputs);
float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs_[0]); float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs_[0]);
PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync( PADDLE_ENFORCE_CUDA_SUCCESS(
output_ptrs, h_odatas, d_output_ptrs_.size() * sizeof(float*), cudaMemcpyAsync(output_ptrs, h_odatas,
cudaMemcpyHostToDevice, stream)); d_output_ptrs_.size() * sizeof(float*),
cudaMemcpyHostToDevice, stream),
platform::errors::External(
"CUDA Memcpy failed during split plugin run."));
int outer_rows = outer_rows_ * batchSize; int outer_rows = outer_rows_ * batchSize;
...@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination( ...@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination(
int nb_outputs) { int nb_outputs) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
in_out, platform::errors::InvalidArgument( in_out, platform::errors::InvalidArgument(
"The input of swish plugin shoule not be nullptr.")); "The input of split plugin should not be nullptr."));
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
pos, nb_inputs + nb_outputs, pos, nb_inputs + nb_outputs,
......
...@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
auto t_shape = framework::vectorize<int64_t>(t.dims()); auto t_shape = framework::vectorize<int64_t>(t.dims());
runtime_batch = t_shape[0]; runtime_batch = t_shape[0];
const int bind_index = engine->engine()->getBindingIndex(x.c_str()); const int bind_index = engine->engine()->getBindingIndex(x.c_str());
PADDLE_ENFORCE(bind_index < num_bindings, PADDLE_ENFORCE_LT(
"The bind index should be less than num_bindings"); bind_index, num_bindings,
platform::errors::InvalidArgument(
"Wrong TRT engine input binding index. Expected The "
"binding index of TRT engine input to be less than "
"the number of inputs and outputs. Received binding "
"index=%d >= total inputs and outputs=%d",
bind_index, num_bindings));
if (!engine->with_dynamic_shape()) { if (!engine->with_dynamic_shape()) {
// check if the input shapes are consistent with model. // check if the input shapes are consistent with model.
if (HasAttr(x + "_shape")) { if (HasAttr(x + "_shape")) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册