cherry-pick pr (#31043)

656124da · Wilber · GitHub · 25ee1a73 · 656124da · 656124da
4 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -631,6 +631,13 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
        process_level_allocator_enabled = true;
      }

+// TODO(wilber): jetson tx2 may fail to run the model due to insufficient memory
+// under the native_best_fit strategy. Modify the default allocation strategy to
+// auto_growth. todo, find a more appropriate way to solve the problem.
+#ifdef WITH_NV_JETSON
+      gflags.push_back("--allocator_strategy=auto_growth");
+#endif
+
      if (framework::InitGflags(gflags)) {
        VLOG(3) << "The following gpu analysis configurations only take effect "
                   "for the first predictor: ";

--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -95,9 +95,13 @@ const std::vector<std::string> kTRTSubgraphPasses({
      "conv_bn_fuse_pass",                      //
 #if CUDNN_VERSION >= 7100  // To run conv_fusion, the version of cudnn must be
                           // guaranteed at least v7
+// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
+// disable the pass.
+#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100)
      "conv_elementwise_add_act_fuse_pass",   //
      "conv_elementwise_add2_act_fuse_pass",  //
-#endif                                        //
+#endif
+#endif
      "transpose_flatten_concat_fuse_pass",
 });


--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -81,10 +81,35 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
                        "TensorRT's tensor input requires at most 4 "
                        "dimensions, but input %s has %d dims.",
                        input, shape.size()));
+  auto ShapeStr = [](const std::vector<T>& shape) {
+    std::ostringstream os;
+    os << "[";
+    for (size_t i = 0; i < shape.size(); ++i) {
+      if (i == shape.size() - 1) {
+        os << shape[i];
+      } else {
+        os << shape[i] << ",";
+      }
+    }
+    os << "]";
+    return os.str();
+  };
  if (!with_dynamic_shape) {
    if (shape.size() == 4UL) {
+      if (shape[2] == -1 || shape[3] == -1) {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "The input [%s] shape of trt subgraph is %s, please enable "
+            "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
+            input, ShapeStr(shape)));
+      }
      return nvinfer1::DimsCHW(shape[1], shape[2], shape[3]);
    } else if (shape.size() == 3UL) {
+      if (shape[1] == -1 || shape[2] == -1) {
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "The input [%s] shape of trt subgraph is %s, please enable "
+            "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
+            input, ShapeStr(shape)));
+      }
      return nvinfer1::Dims2(shape[1], shape[2]);
    }
    return nvinfer1::DimsCHW(shape[1], 1, 1);

--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -534,7 +534,10 @@ void BindAnalysisConfig(py::module *m) {
           [](AnalysisConfig &self, const std::string &pass) {
             self.pass_builder()->DeletePass(pass);
           })
-      .def("pass_builder", &AnalysisConfig::pass_builder,
+      .def("pass_builder",
+           [](AnalysisConfig &self) {
+             return dynamic_cast<PaddlePassBuilder *>(self.pass_builder());
+           },
           py::return_value_policy::reference);
 }