[cherrypick][inference][trt]remove trt sparse weights flags (#53562) (#53850)

* remove kSPARSE_WEIGHTS * remove kFASTER_DYNAMIC_SHAPES_0805 and add 'TrtMajorVersion' function

[cherrypick][inference][trt]remove trt sparse weights flags (#53562) (#53850)
* remove kSPARSE_WEIGHTS * remove kFASTER_DYNAMIC_SHAPES_0805 and add 'TrtMajorVersion' function
5a69ddb9 · Zhang Jun · GitHub · 2992f787 · 5a69ddb9 · 5a69ddb9
3 changed file
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -556,17 +556,14 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
    opt_input_shape = {};
  }

-  auto to_major_version = [&](int full_version) -> float {
-    return (full_version / 100) / 10.0;
-  };
-  const float compile_time_trt_version = to_major_version(TRT_VERSION);
-  const float run_time_trt_version =
-      to_major_version(tensorrt::GetInferLibVersion());
-  if (compile_time_trt_version != run_time_trt_version) {
+  const float trt_compile_version = tensorrt::TrtMajorVersion(TRT_VERSION);
+  const float trt_runtime_version =
+      tensorrt::TrtMajorVersion(tensorrt::GetInferLibVersion());
+  if (trt_compile_version != trt_runtime_version) {
    LOG_FIRST_N(WARNING, 1)
        << "The Paddle Inference library is compiled with "
-        << compile_time_trt_version << " version TensorRT, "
-        << "but the runtime TensorRT you are using is " << run_time_trt_version
+        << trt_compile_version << " version TensorRT, "
+        << "but the runtime TensorRT you are using is " << trt_runtime_version
        << " version. "
           "This might cause serious compatibility issues. We strongly "
           "recommend using the same TRT version at runtime.";

--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -158,12 +158,6 @@ void TensorRTEngine::FreezeNetwork() {
  infer_builder_config_->setMaxWorkspaceSize(max_workspace_);
 #endif

-#if IS_TRT_VERSION_GE(8500)
-  infer_builder_config_->setPreviewFeature(
-      nvinfer1::PreviewFeature::kFASTER_DYNAMIC_SHAPES_0805, true);
-#else
-#endif
-
  bool enable_fp16 = (precision_ == AnalysisConfig::Precision::kHalf);
  if (enable_fp16) {
    bool support_fp16 = infer_builder_->platformHasFastFp16();
@@ -325,7 +319,6 @@ void TensorRTEngine::FreezeNetwork() {
  infer_engine_.reset(infer_builder_->buildEngineWithConfig(
      *network(), *infer_builder_config_));
 #else
-  infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);
  ihost_memory_.reset(infer_builder_->buildSerializedNetwork(
      *network(), *infer_builder_config_));
  infer_ptr<nvinfer1::IRuntime> runtime(createInferRuntime(&logger_));

--- a/paddle/fluid/inference/tensorrt/helper.h
+++ b/paddle/fluid/inference/tensorrt/helper.h
@@ -96,6 +96,10 @@ static std::tuple<int, int, int> GetTrtCompileVersion() {
      NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH};
 }

+static float TrtMajorVersion(int full_version) {
+  return (full_version / 100) / 10.0;
+}
+
 template <typename T>
 struct Destroyer {
  void operator()(T* x) {