diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index d075656d15747cf9f9740282652366abd509a70e..24644645eee49bff43d61192545a1b572421bfff 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -135,12 +135,6 @@ void TensorRTEngine::FreezeNetwork() { } for (int j = 0; j < layer->getNbOutputs(); j++) { auto *temp_out = layer->getOutput(j); - if (temp_out->isNetworkOutput()) { - VLOG(1) << "Layer(Name: " << layer->getName() - << ") is set to float32 because its output(" - << temp_out->getName() << ") is the output of the network."; - return false; - } if (!temp_out->dynamicRangeIsSet()) { VLOG(1) << "Layer(Name: " << layer->getName() << ") is set to float32 because its output(" @@ -357,6 +351,13 @@ nvinfer1::IPluginV2Layer *TensorRTEngine::AddPluginV2Ext( return network()->addPluginV2(inputs, num_inputs, *plugin); } +nvinfer1::IPluginV2Layer *TensorRTEngine::AddPluginV2IOExt( + nvinfer1::ITensor *const *inputs, int num_inputs, + nvinfer1::IPluginV2IOExt *plugin) { + owned_plugin_v2ioext_.emplace_back(plugin); + return network()->addPluginV2(inputs, num_inputs, *plugin); +} + void TensorRTEngine::freshDeviceId() { int count; cudaGetDeviceCount(&count); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index e22c2488d3b8b63746ad9fd19eaa724ce2efa8f7..edf69dc7aa2b5fcca66baf0a3880760981f838db 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -323,6 +323,10 @@ class TensorRTEngine { int num_inputs, plugin::PluginTensorRTV2Ext* plugin); + nvinfer1::IPluginV2Layer* AddPluginV2IOExt(nvinfer1::ITensor* const* inputs, + int num_inputs, + nvinfer1::IPluginV2IOExt* plugin); + void SetTensorDynamicRange(nvinfer1::ITensor* tensor, float range) { quant_dynamic_range_[tensor] = range; } @@ -429,6 +433,7 @@ class TensorRTEngine { bool with_ernie() { return with_ernie_; } bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; } bool with_dynamic_shape() { return with_dynamic_shape_; } + AnalysisConfig::Precision precision() { return precision_; } #if IS_TRT_VERSION_GE(6000) nvinfer1::IPluginV2Layer* AddDynamicPlugin( @@ -550,6 +555,7 @@ class TensorRTEngine { std::vector> owned_plugin_; std::vector> owned_plugin_v2ext_; + std::vector> owned_plugin_v2ioext_; // TensorRT related internal members template