From 656124dae50413095c43e63799d0dc0ba487974e Mon Sep 17 00:00:00 2001 From: Wilber Date: Fri, 19 Feb 2021 02:23:33 -0600 Subject: [PATCH] cherry-pick pr (#31043) --- .../fluid/inference/api/analysis_predictor.cc | 7 ++++++ .../inference/api/paddle_pass_builder.cc | 6 ++++- paddle/fluid/inference/tensorrt/engine.h | 25 +++++++++++++++++++ paddle/fluid/pybind/inference_api.cc | 5 +++- 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 274ae8afa1f..2ba7ee26940 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -631,6 +631,13 @@ std::unique_ptr CreatePaddlePredictor< process_level_allocator_enabled = true; } +// TODO(wilber): jetson tx2 may fail to run the model due to insufficient memory +// under the native_best_fit strategy. Modify the default allocation strategy to +// auto_growth. todo, find a more appropriate way to solve the problem. +#ifdef WITH_NV_JETSON + gflags.push_back("--allocator_strategy=auto_growth"); +#endif + if (framework::InitGflags(gflags)) { VLOG(3) << "The following gpu analysis configurations only take effect " "for the first predictor: "; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index ed59f1dc7fc..873bf43839c 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -95,9 +95,13 @@ const std::vector kTRTSubgraphPasses({ "conv_bn_fuse_pass", // #if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be // guaranteed at least v7 +// cudnn8.0 has memory leak problem in conv + eltwise + act, so we +// disable the pass. +#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100) "conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", // -#endif // +#endif +#endif "transpose_flatten_concat_fuse_pass", }); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 0a4cffbe7eb..0e399578fa4 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -81,10 +81,35 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, "TensorRT's tensor input requires at most 4 " "dimensions, but input %s has %d dims.", input, shape.size())); + auto ShapeStr = [](const std::vector& shape) { + std::ostringstream os; + os << "["; + for (size_t i = 0; i < shape.size(); ++i) { + if (i == shape.size() - 1) { + os << shape[i]; + } else { + os << shape[i] << ","; + } + } + os << "]"; + return os.str(); + }; if (!with_dynamic_shape) { if (shape.size() == 4UL) { + if (shape[2] == -1 || shape[3] == -1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input [%s] shape of trt subgraph is %s, please enable " + "trt dynamic_shape mode by SetTRTDynamicShapeInfo.", + input, ShapeStr(shape))); + } return nvinfer1::DimsCHW(shape[1], shape[2], shape[3]); } else if (shape.size() == 3UL) { + if (shape[1] == -1 || shape[2] == -1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input [%s] shape of trt subgraph is %s, please enable " + "trt dynamic_shape mode by SetTRTDynamicShapeInfo.", + input, ShapeStr(shape))); + } return nvinfer1::Dims2(shape[1], shape[2]); } return nvinfer1::DimsCHW(shape[1], 1, 1); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index ff021260a9b..1349a15f8a7 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -534,7 +534,10 @@ void BindAnalysisConfig(py::module *m) { [](AnalysisConfig &self, const std::string &pass) { self.pass_builder()->DeletePass(pass); }) - .def("pass_builder", &AnalysisConfig::pass_builder, + .def("pass_builder", + [](AnalysisConfig &self) { + return dynamic_cast(self.pass_builder()); + }, py::return_value_policy::reference); } -- GitLab