diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 28afb87a891fb301b1b5108c9762bf6c88cefb96..5d63aa2ace86cb89917126f3a6fef9d0e9839e8c 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter { itensors.push_back(engine_->GetITensor(input_name)); } int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); - PADDLE_ENFORCE(axis > 0, - "The axis attr of Concat op should be large than 0 for trt"); + PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument( + "The axis attr of Concat" + " op should be larger than 0 for trt. " + "But received %d.", + axis)); auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), itensors.size()); diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 10c212c0b4fa394e3c745bf524ef9d081c4bc3c1..aa03bc44bd629513d96cda541c0b7162629bfdc8 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; auto* layer = fadd_layer(const_cast(X), n_output, n_input, nv_ksize, weight, bias); - PADDLE_ENFORCE(layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(layer, + platform::errors::Fatal("TensorRT create conv2d" + " layer error.")); layer->setStride(nv_strides); layer->setPadding(nv_paddings); layer->setNbGroups(groups); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index c4f0855dbb1ca87b40c396692a812a3cbe06a7b8..dfadb28a6520f983986263b38be69fa48335d485 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + PADDLE_ENFORCE_EQ( + op_desc.Input("X").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"X\").size() " + "should equal to 1, but received Input(\"X\").size() = %u.", + op_desc.Input("X").size())); + PADDLE_ENFORCE_EQ( + op_desc.Input("Y").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\").size() " + "should equal to 1, but received Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); // Y is a weight + PADDLE_ENFORCE_EQ( + op_desc.Output("Out").size(), 1, + platform::errors::InvalidArgument( + "The input op's Output(\"Out\").size() " + "should equal to 1, but reveceid Output(\"Out\").size() = %u.", + op_desc.Output("Out").size())); auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); - PADDLE_ENFORCE_NOT_NULL(Y_v); + PADDLE_ENFORCE_NOT_NULL( + Y_v, platform::errors::NotFound("Variable %s not found in scope.", + op_desc.Input("Y").front().c_str())); auto* Y_t = Y_v->GetMutable(); float* weight_data = nullptr; weight_data = @@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); nvinfer1::ILayer* layer = nullptr; - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + PADDLE_ENFORCE_EQ( + op_desc.Input("X").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"X\").size() " + "should equal to 1, but received Input(\"X\").size() = %u.", + op_desc.Input("X").size())); + PADDLE_ENFORCE_EQ( + op_desc.Input("Y").size(), 1, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\").size() " + "should equal to 1, but received Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); // Y is a weight + PADDLE_ENFORCE_EQ( + op_desc.Output("Out").size(), 1, + platform::errors::InvalidArgument( + "The input op's Output(\"Out\").size() " + "should equal to 1, but received Output(\"Out\").size() = %u.", + op_desc.Output("Out").size())); auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc index 854f434d93e81237dc85c5df62debcf3b3824b78..d9cf9e2e860018df594ac4d84a4d9fa9b9ba669f 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc @@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter { // NOTE out is GPU memory. virtual void operator()(const LoDTensor& in, void* out, size_t max_size) override { - PADDLE_ENFORCE(out != nullptr); - PADDLE_ENFORCE(stream_ != nullptr); + PADDLE_ENFORCE_NOT_NULL(out, + platform::errors::InvalidArgument( + "The input param 'out' must not be nullptr.")); + PADDLE_ENFORCE_NOT_NULL(stream_, + platform::errors::PreconditionNotMet( + "You should set up stream_ by SetStream() " + "before you call the operator().")); const auto& place = in.place(); size_t size = in.memory_size(); - PADDLE_ENFORCE_LE(size, max_size); + PADDLE_ENFORCE_LE( + size, max_size, + platform::errors::InvalidArgument( + "The input Tensor in's memory_size shoule be less than or equal to " + "the input max_size. But in's memory_size = %u, max_size = %u.", + size, max_size)); if (is_cpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, - cudaMemcpyHostToDevice, *stream_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync( + out, in.data(), size, cudaMemcpyHostToDevice, *stream_)); } else if (is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data(), size, - cudaMemcpyDeviceToDevice, *stream_)); + PADDLE_ENFORCE_EQ( + 0, cudaMemcpyAsync(out, in.data(), size, + cudaMemcpyDeviceToDevice, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { - PADDLE_THROW("Unknown device for converter"); + PADDLE_THROW(platform::errors::NotFound("Unknown device for converter")); } cudaStreamSynchronize(*stream_); } // NOTE in is GPU memory. virtual void operator()(const void* in, LoDTensor* out, size_t max_size) override { - PADDLE_ENFORCE(in != nullptr); - PADDLE_ENFORCE(stream_ != nullptr); + PADDLE_ENFORCE_NOT_NULL(in, + platform::errors::InvalidArgument( + "The input param 'in' must not be nullptr.")); + PADDLE_ENFORCE_NOT_NULL(stream_, + platform::errors::PreconditionNotMet( + "You should set up stream_ by SetStream() " + "before you call the operator().")); const auto& place = out->place(); size_t size = out->memory_size(); - PADDLE_ENFORCE_LE(size, max_size); + PADDLE_ENFORCE_LE( + size, max_size, + platform::errors::InvalidArgument( + "The input Tensor out's memory_size shoule be less than or equal " + "to the input max_size. " + "But out's memory_size = %u, max_size = %u.", + size, max_size)); if (is_cpu_place(place)) { PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToHost, *stream_)); + cudaMemcpyDeviceToHost, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToHost) error.")); } else if (is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToDevice, *stream_)); + PADDLE_ENFORCE_EQ( + 0, cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToDevice, *stream_), + platform::errors::External( + "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { - PADDLE_THROW("Unknown device for converter"); + PADDLE_THROW(platform::errors::NotFound("Unknown device for converter")); } cudaStreamSynchronize(*stream_); } diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h index 5daa242f6ab802a50fa6105f0102b817b700f461..58c178028b8b275b57f5c298534bd1d31aede234 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h @@ -44,10 +44,14 @@ class EngineIOConverter { static void ConvertInput(const std::string& op_type, const LoDTensor& in, void* out, size_t max_size, cudaStream_t* stream) { - PADDLE_ENFORCE(stream != nullptr); + PADDLE_ENFORCE_NOT_NULL(stream, + platform::errors::InvalidArgument( + "The input stream must not be nullptr.")); auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); - PADDLE_ENFORCE_NOT_NULL(converter); + PADDLE_ENFORCE_NOT_NULL( + converter, platform::errors::Unimplemented( + "The %s in is not supported yet.", op_type.c_str())); converter->SetStream(stream); (*converter)(in, out, max_size); } @@ -55,10 +59,14 @@ class EngineIOConverter { static void ConvertOutput(const std::string& op_type, const void* in, LoDTensor* out, size_t max_size, cudaStream_t* stream) { - PADDLE_ENFORCE(stream != nullptr); + PADDLE_ENFORCE_NOT_NULL(stream, + platform::errors::InvalidArgument( + "The input stream must not be nullptr.")); auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); - PADDLE_ENFORCE_NOT_NULL(converter); + PADDLE_ENFORCE_NOT_NULL( + converter, platform::errors::Unimplemented( + "The %s in not supported yet.", op_type.c_str())); converter->SetStream(stream); (*converter)(in, out, max_size); } diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index f4b0f5f23d8fda064c29534b56868beae79f65c0..ac0a04b9a116d907fd69c0ca58d3ae7e82921dab 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -53,7 +53,12 @@ class OpConverter { OpConverter* it{nullptr}; if (op_desc.Type() == "mul") { - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, + platform::errors::InvalidArgument( + "The input op mul's Input(\"Y\")." + "size() should equal to 1, but reveceid " + "Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); std::string Y = op_desc.Input("Y")[0]; if (parameters.count(Y)) { it = Registry::Global().Lookup("fc"); @@ -66,38 +71,51 @@ class OpConverter { // static std::unordered_set add_weight_op_set {"add", "mul", // "sub", "div"}; static std::unordered_set add_weight_op_set{"add", "mul"}; - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, + platform::errors::InvalidArgument( + "The input op's Input(\"Y\")." + "size() should equal to 1, but reveceid " + "Input(\"Y\").size() = %u.", + op_desc.Input("Y").size())); int op_type_len = op_desc.Type().size(); std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); std::string Y = op_desc.Input("Y")[0]; if (parameters.count(Y)) { - PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, - "Unsupported elementwise type" + op_type); + PADDLE_ENFORCE_GT( + add_weight_op_set.count(op_type), 0, + platform::errors::Unimplemented("Unsupported elementwise type %s", + op_type.c_str())); it = Registry::Global().Lookup("elementwise_" + op_type + "_weight"); - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented( + "no OpConverter for optype [%s]", op_desc.Type())); } else { - PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, - "Unsupported elementwise type" + op_type); + PADDLE_ENFORCE_GT( + add_tensor_op_set.count(op_type), 0, + platform::errors::Unimplemented("Unsupported elementwise type %s", + op_type.c_str())); it = Registry::Global().Lookup("elementwise_" + op_type + "_tensor"); } - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); } if (op_desc.Type() == "depthwise_conv2d") { it = Registry::Global().Lookup("conv2d"); - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); } if (!it) { it = Registry::Global().Lookup(op_desc.Type()); } - PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", - op_desc.Type()); + PADDLE_ENFORCE_NOT_NULL( + it, platform::errors::Unimplemented("no OpConverter for optype [%s]", + op_desc.Type())); it->SetEngine(engine); (*it)(op, scope, test_mode); @@ -149,9 +167,13 @@ class OpConverter { for (auto& input : inputs) { if (parameters.count(input)) continue; auto* var = block_desc->FindVar(input); - PADDLE_ENFORCE(var, "no variable called %s", input); - PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, - "TensorRT engine only takes LoDTensor as input"); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("no variable called %s in block.", + input.c_str())); + PADDLE_ENFORCE_EQ( + var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, + platform::errors::InvalidArgument("TensorRT engine only takes " + "LoDTensor as input")); auto var_shape = var->GetShape(); if (engine->with_dynamic_shape()) { #if IS_TRT_VERSION_GE(6000) diff --git a/paddle/fluid/inference/tensorrt/convert/pad_op.cc b/paddle/fluid/inference/tensorrt/convert/pad_op.cc index a1b0f3b4310a020d4bbf8d7c04c9447d3e0e72f7..dd594404d3316ada6e20624c074368f241ca5cdd 100644 --- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc @@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter { nvinfer1::Dims input_shape = input->getDimensions(); int nbDims = input_shape.nbDims; int pad_size = static_cast(paddings.size()); - PADDLE_ENFORCE_GE(nbDims, 2); - PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size); - PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero."); + PADDLE_ENFORCE_GE( + nbDims, 2, + platform::errors::InvalidArgument( + "Input X[0]'s dimension should greater than or equal to 2. " + "But received %d.", + nbDims)); + PADDLE_ENFORCE_EQ( + (nbDims + 1) * 2, pad_size, + platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for " + "short) should meet the condition:" + "(nbDims + 1) * 2 == pad_size. But " + "received nbDims:%d, pad_size:%d.", + nbDims, pad_size)); + PADDLE_ENFORCE_EQ(pad_value, 0.0, + platform::errors::InvalidArgument( + "The pad layer of TRT only support zero.")); nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]); nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]); @@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter { *const_cast(input), pre_pad, post_pad); - PADDLE_ENFORCE(layer != nullptr); + PADDLE_ENFORCE_NOT_NULL(layer, + platform::errors::External( + "add padding layer to tensorrt engine error")); auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode); } diff --git a/paddle/fluid/inference/tensorrt/convert/swish_op.cc b/paddle/fluid/inference/tensorrt/convert/swish_op.cc index 4b3e1c9e70a4a94808c94c81fcc773482f0574e4..e220d80f0d79da5eab98aa7a18a5093f9f4a55c4 100644 --- a/paddle/fluid/inference/tensorrt/convert/swish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/swish_op.cc @@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs int input_num = op_desc.Input("X").size(); - PADDLE_ENFORCE(input_num == 1); + PADDLE_ENFORCE_EQ(input_num, 1, + platform::errors::InvalidArgument( + "The input X's size must equal to 1 in TRT swish op." + " But received X's size %d.", + input_num)); auto* input = engine_->GetITensor(op_desc.Input("X")[0]); // Get output size_t output_num = op_desc.Output("Out").size(); - PADDLE_ENFORCE(output_num == 1); + PADDLE_ENFORCE_EQ( + output_num, 1UL, + platform::errors::InvalidArgument( + "The ouput Out's size must equal to 1 in TRT swish op. " + "But received Out's size %u.", + output_num)); // Get attrs float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta")); diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 3c48c8192f6b06e5a0ba005738383b46bc550ecb..cfb25eb2ba82763950babda5385649d31d2e9185 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, const platform::DeviceContext& ctx) { auto dims = tensor->dims(); size_t num_elements = analysis::AccuDims(dims, dims.size()); - PADDLE_ENFORCE_GT(num_elements, 0); + PADDLE_ENFORCE_GT( + num_elements, 0UL, + platform::errors::PermissionDenied("RandomizeTensor only can be used for " + "tensor which dims is not zero.")); platform::CPUPlace cpu_place; framework::LoDTensor temp_tensor; @@ -79,7 +82,8 @@ class TRTConvertValidation { scope_(scope), if_add_batch_(if_add_batch), max_batch_size_(max_batch_size) { - PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); + PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0, + platform::errors::External("cudaStreamCreate error.")); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size)); engine_->InitNetwork(); } @@ -154,7 +158,12 @@ class TRTConvertValidation { void Execute(int batch_size, std::unordered_set neglected_output = {}) { // Execute Fluid Op - PADDLE_ENFORCE_LE(batch_size, max_batch_size_); + PADDLE_ENFORCE_LE(batch_size, max_batch_size_, + platform::errors::InvalidArgument( + "Runtime batch_size should be less than or equal to " + "max_batch_size_. " + "But received batch_size:%d, max_batch_size_:%d", + batch_size, max_batch_size_)); platform::CUDADeviceContext ctx(place_); op_->Run(scope_, place_); cudaStreamSynchronize(stream_); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py new file mode 100644 index 0000000000000000000000000000000000000000..060f6c6c5f0446661e886390637714ad7dfc300d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py @@ -0,0 +1,53 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig + + +class PadOpTRTTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[1, 3, 128, 128], dtype="float32") + pad_out = fluid.layers.pad(x=data, + paddings=[0, 0, 0, 0, 0, 1, 1, 2], + pad_value=0.0) + out = fluid.layers.batch_norm(pad_out, is_test=True) + + self.feeds = { + "data": np.random.random((1, 3, 128, 128)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = PadOpTRTTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +if __name__ == "__main__": + unittest.main()