diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 42e62011f84c18b875a3fa48b95a05f152fb5791..6c68b385bcbc04844309c581bf3afd848fe4d1aa 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() { void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { #ifdef PADDLE_WITH_MKLDNN - VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" + std::vector> inputs_shape; + for (size_t i = 0; i < inputs.size(); ++i) { + inputs_shape.emplace_back(inputs[i].shape); + } + MkldnnPreSet(inputs_shape); +#endif +} + +void AnalysisPredictor::MkldnnPreSet( + const std::vector> &inputs_shape) { +#ifdef PADDLE_WITH_MKLDNN + VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id=" << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id(); // In cache clearing mode. if (config_.mkldnn_cache_capacity_ > 0) { @@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { config_.mkldnn_cache_capacity_); // Set current_input_shape for caching dynamic shape. std::stringstream ss; - for (size_t i = 0; i < inputs.size(); ++i) { - for (size_t j = 0; j < inputs[i].shape.size(); ++j) { - ss << inputs[i].shape[j] << "-"; + for (size_t i = 0; i < inputs_shape.size(); ++i) { + for (size_t j = 0; j < inputs_shape[i].size(); ++j) { + ss << inputs_shape[i][j] << "-"; } } VLOG(2) << "Set input shape=" << ss.str(); @@ -742,6 +753,18 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) { + std::vector> shape_vector; + auto names = GetInputNames(); + for (size_t i = 0; i < names.size(); ++i) { + auto in_tensor = GetInputTensor(names[i]); + shape_vector.emplace_back(in_tensor->shape()); + } + MkldnnPreSet(shape_vector); + } +#endif + executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); @@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif #if defined(PADDLE_WITH_MKLML) // Frees unused memory allocated by the IntelĀ® MKL Memory Allocator to // avoid memory leak. See: diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 365f86c21105a7f1ffb7c300e0ab38c6aaa230fc..c4a7173b0104b767193e7d7eee3b10f272d396a2 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor { /// \param[in] inputs tensors /// void MkldnnPreSet(const std::vector &inputs); + + /// + /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. + /// + /// Used in AnalysisPredictor::Run(), do not support + /// AnalysisPredictor::ZeroCopyRun() now. + /// + /// \param[in] inputs tensor shape + /// + void MkldnnPreSet(const std::vector> &inputs_shape); + /// /// \brief PostReset for Mkldnn multi-thread and dynamic shape input. ///