diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 274ae8afa1fb6f9cb452d45f786f55193b4511fc..2ba7ee26940c72257b1c9e35199f68b496dc8a87 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -631,6 +631,13 @@ std::unique_ptr CreatePaddlePredictor< process_level_allocator_enabled = true; } +// TODO(wilber): jetson tx2 may fail to run the model due to insufficient memory +// under the native_best_fit strategy. Modify the default allocation strategy to +// auto_growth. todo, find a more appropriate way to solve the problem. +#ifdef WITH_NV_JETSON + gflags.push_back("--allocator_strategy=auto_growth"); +#endif + if (framework::InitGflags(gflags)) { VLOG(3) << "The following gpu analysis configurations only take effect " "for the first predictor: "; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index ed59f1dc7fc5f625383821281ec47406db3b72b0..873bf43839c432583a1049684758db6d3d92cd00 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -95,9 +95,13 @@ const std::vector kTRTSubgraphPasses({ "conv_bn_fuse_pass", // #if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be // guaranteed at least v7 +// cudnn8.0 has memory leak problem in conv + eltwise + act, so we +// disable the pass. +#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100) "conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", // -#endif // +#endif +#endif "transpose_flatten_concat_fuse_pass", }); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 0a4cffbe7ebb78b7dcb7f461cb9484c3ace4c2ca..0e399578fa446793756a23e76013c3ed9a8bb9c4 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -81,10 +81,35 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, "TensorRT's tensor input requires at most 4 " "dimensions, but input %s has %d dims.", input, shape.size())); + auto ShapeStr = [](const std::vector& shape) { + std::ostringstream os; + os << "["; + for (size_t i = 0; i < shape.size(); ++i) { + if (i == shape.size() - 1) { + os << shape[i]; + } else { + os << shape[i] << ","; + } + } + os << "]"; + return os.str(); + }; if (!with_dynamic_shape) { if (shape.size() == 4UL) { + if (shape[2] == -1 || shape[3] == -1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input [%s] shape of trt subgraph is %s, please enable " + "trt dynamic_shape mode by SetTRTDynamicShapeInfo.", + input, ShapeStr(shape))); + } return nvinfer1::DimsCHW(shape[1], shape[2], shape[3]); } else if (shape.size() == 3UL) { + if (shape[1] == -1 || shape[2] == -1) { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input [%s] shape of trt subgraph is %s, please enable " + "trt dynamic_shape mode by SetTRTDynamicShapeInfo.", + input, ShapeStr(shape))); + } return nvinfer1::Dims2(shape[1], shape[2]); } return nvinfer1::DimsCHW(shape[1], 1, 1); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index ff021260a9b7517f3bfa38d0d232eba899868428..1349a15f8a72ef21bde1820c41b660ff5996706a 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -534,7 +534,10 @@ void BindAnalysisConfig(py::module *m) { [](AnalysisConfig &self, const std::string &pass) { self.pass_builder()->DeletePass(pass); }) - .def("pass_builder", &AnalysisConfig::pass_builder, + .def("pass_builder", + [](AnalysisConfig &self) { + return dynamic_cast(self.pass_builder()); + }, py::return_value_policy::reference); }