From 9dd442ab2cd76b4b5796fd5b59551e3bbcba0173 Mon Sep 17 00:00:00 2001 From: wenbin Date: Mon, 1 Nov 2021 20:37:38 +0800 Subject: [PATCH] disable int8 if there is no quant info (#36900) * disable int8 * size_t to int --- paddle/fluid/inference/tensorrt/engine.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 575c0185863..64116b7973e 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -148,12 +148,21 @@ void TensorRTEngine::FreezeNetwork() { // and outputs have scales, // this layer's precision and output type are set to float32. // This step has no effect if this layer is fused during TRT optimization. + int layers_no_int8 = 0; for (int i = 0; i < network()->getNbLayers(); i++) { auto layer = network()->getLayer(i); if (!is_layer_int8(layer)) { layer->setPrecision(nvinfer1::DataType::kFLOAT); + ++layers_no_int8; } } + // Disable int8 or build engine failed if all layers aren't int8 + if (layers_no_int8 == network()->getNbLayers()) { + nvinfer1::BuilderFlags flags = infer_builder_config_->getFlags(); + flags = flags & ~(1U << static_cast(nvinfer1::BuilderFlag::kINT8)); + // reset flags + infer_builder_config_->setFlags(flags); + } #else LOG(WARNING) << "If your TensorRT version is lower than 5.1.2.2, you " "must provide quantization scales for all tensors using " -- GitLab