diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 42e62011f84c18b875a3fa48b95a05f152fb5791..6c68b385bcbc04844309c581bf3afd848fe4d1aa 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
 
 void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
 #ifdef PADDLE_WITH_MKLDNN
-  VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
+  std::vector<std::vector<int>> inputs_shape;
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs_shape.emplace_back(inputs[i].shape);
+  }
+  MkldnnPreSet(inputs_shape);
+#endif
+}
+
+void AnalysisPredictor::MkldnnPreSet(
+    const std::vector<std::vector<int>> &inputs_shape) {
+#ifdef PADDLE_WITH_MKLDNN
+  VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
           << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
   // In cache clearing mode.
   if (config_.mkldnn_cache_capacity_ > 0) {
@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
         config_.mkldnn_cache_capacity_);
     // Set current_input_shape for caching dynamic shape.
     std::stringstream ss;
-    for (size_t i = 0; i < inputs.size(); ++i) {
-      for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
-        ss << inputs[i].shape[j] << "-";
+    for (size_t i = 0; i < inputs_shape.size(); ++i) {
+      for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
+        ss << inputs_shape[i][j] << "-";
       }
     }
     VLOG(2) << "Set input shape=" << ss.str();
@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
 
 bool AnalysisPredictor::ZeroCopyRun() {
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
+#ifdef PADDLE_WITH_MKLDNN
+  if (config_.use_mkldnn_) {
+    std::vector<std::vector<int>> shape_vector;
+    auto names = GetInputNames();
+    for (size_t i = 0; i < names.size(); ++i) {
+      auto in_tensor = GetInputTensor(names[i]);
+      shape_vector.emplace_back(in_tensor->shape());
+    }
+    MkldnnPreSet(shape_vector);
+  }
+#endif
+
   executor_->Run();
   // Fix TensorArray reuse not cleaned bug.
   tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
   // recover the cpu_math_library_num_threads to 1, in order to avoid thread
   // conflict when integrating it into deployment service.
   paddle::platform::SetNumThreads(1);
+#ifdef PADDLE_WITH_MKLDNN
+  if (config_.use_mkldnn_) MkldnnPostReset();
+#endif
 #if defined(PADDLE_WITH_MKLML)
   // Frees unused memory allocated by the Intel® MKL Memory Allocator to
   // avoid memory leak. See:
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 365f86c21105a7f1ffb7c300e0ab38c6aaa230fc..c4a7173b0104b767193e7d7eee3b10f272d396a2 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
   /// \param[in] inputs tensors
   ///
   void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
+
+  ///
+  /// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
+  ///
+  /// Used in AnalysisPredictor::Run(), do not support
+  /// AnalysisPredictor::ZeroCopyRun() now.
+  ///
+  /// \param[in] inputs tensor shape
+  ///
+  void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);
+
   ///
   /// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
   ///