From df7fabeedc87c663b3d8e285836b3770ceb10957 Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 24 Sep 2020 17:19:20 +0800 Subject: [PATCH] Fix memory leak for mkldnn. (#27493) --- .../fluid/inference/api/analysis_predictor.cc | 34 ++++++++++++++++--- .../fluid/inference/api/analysis_predictor.h | 11 ++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 42e62011f84..6c68b385bcb 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() { void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { #ifdef PADDLE_WITH_MKLDNN - VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" + std::vector> inputs_shape; + for (size_t i = 0; i < inputs.size(); ++i) { + inputs_shape.emplace_back(inputs[i].shape); + } + MkldnnPreSet(inputs_shape); +#endif +} + +void AnalysisPredictor::MkldnnPreSet( + const std::vector> &inputs_shape) { +#ifdef PADDLE_WITH_MKLDNN + VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id=" << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id(); // In cache clearing mode. if (config_.mkldnn_cache_capacity_ > 0) { @@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { config_.mkldnn_cache_capacity_); // Set current_input_shape for caching dynamic shape. std::stringstream ss; - for (size_t i = 0; i < inputs.size(); ++i) { - for (size_t j = 0; j < inputs[i].shape.size(); ++j) { - ss << inputs[i].shape[j] << "-"; + for (size_t i = 0; i < inputs_shape.size(); ++i) { + for (size_t j = 0; j < inputs_shape[i].size(); ++j) { + ss << inputs_shape[i][j] << "-"; } } VLOG(2) << "Set input shape=" << ss.str(); @@ -742,6 +753,18 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) { + std::vector> shape_vector; + auto names = GetInputNames(); + for (size_t i = 0; i < names.size(); ++i) { + auto in_tensor = GetInputTensor(names[i]); + shape_vector.emplace_back(in_tensor->shape()); + } + MkldnnPreSet(shape_vector); + } +#endif + executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); @@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif #if defined(PADDLE_WITH_MKLML) // Frees unused memory allocated by the IntelĀ® MKL Memory Allocator to // avoid memory leak. See: diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 365f86c2110..c4a7173b010 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor { /// \param[in] inputs tensors /// void MkldnnPreSet(const std::vector &inputs); + + /// + /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. + /// + /// Used in AnalysisPredictor::Run(), do not support + /// AnalysisPredictor::ZeroCopyRun() now. + /// + /// \param[in] inputs tensor shape + /// + void MkldnnPreSet(const std::vector> &inputs_shape); + /// /// \brief PostReset for Mkldnn multi-thread and dynamic shape input. /// -- GitLab