diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 9feca9620f761d9542fa0629dc2ff1d1be2f5782..17f1396ce8d6a84658b704e4f21f4c6ae50db0d6 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -292,6 +292,7 @@ bool AnalysisPredictor::Init(
     }
   }
 #endif
+  inference::DisplayMemoryInfo(place_, "Init predictor");
   return true;
 }
 
@@ -1050,14 +1051,7 @@ void AnalysisPredictor::PrepareArgument() {
   argument_.SetUseFcPadding(config_.use_fc_padding());
   argument_.SetGPUDeviceId(config_.gpu_device_id());
   argument_.SetEnableAnalysisOptim(config_.enable_ir_optim_);
-  if (model_precision_ == phi::DataType::FLOAT32) {
-    argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
-  } else {
-    // TODO(inference): mixed precision temporarily not support memory_optim
-    LOG_FIRST_N(WARNING, 1) << "mixed precision model temporarily not support "
-                               "memory optim, so we just turn off that.";
-    argument_.SetEnableMemoryOptim(false);
-  }
+  argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
   argument_.SetModelFromMemory(config_.model_from_memory_);
   // Analyze inference_program
   argument_.SetPredictorID(predictor_id_);
@@ -1622,6 +1616,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
 }
 
 bool AnalysisPredictor::ZeroCopyRun() {
+  inference::DisplayMemoryInfo(place_, "before run");
 #if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
   if (config_.dist_config().use_dist_model()) {
     VLOG(3) << "ZeroCopyRun will use the fleet executor.";
@@ -1659,6 +1654,7 @@ bool AnalysisPredictor::ZeroCopyRun() {
 #endif
 
   executor_->Run();
+  inference::DisplayMemoryInfo(place_, "after run");
 
   if (config_.shape_range_info_collected()) {
     CollectShapeRangeInfo();
diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h
index 471b95d525f9b1d6cabccc00568094d9551d83ac..e3b145381280cd017498f9169173239118a3d134 100644
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
@@ -31,7 +31,9 @@
 
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/memory/stats.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/printf.h"
 #include "paddle/phi/backends/dynload/port.h"
 
@@ -421,5 +423,44 @@ static bool IsFileExists(const std::string &path) {
 
 void RegisterAllCustomOperator();
 
+static inline double ToMegaBytes(size_t bytes) {
+  return static_cast<double>(bytes) / (1 << 20);
+}
+
+static inline void DisplayMemoryInfo(platform::Place place,
+                                     const std::string &hint) {
+#ifdef PADDLE_WITH_CUDA
+  // size_t free, total;
+  // cudaSetDevice(place.GetDeviceId());
+  // cudaMemGetInfo(&free, &total);
+  // VLOG(1) << "[" << ToMegaBytes(total - free) << "MB/" << ToMegaBytes(total)
+  // << "MB]";
+
+  VLOG(1) << hint << " : [gpu current allocated memory: "
+          << ToMegaBytes(paddle::memory::DeviceMemoryStatCurrentValue(
+                 "Allocated", place.GetDeviceId()))
+          << "MB], [gpu current reserved memory: "
+          << ToMegaBytes(paddle::memory::DeviceMemoryStatCurrentValue(
+                 "Reserved", place.GetDeviceId()))
+          << "MB], [gpu peak allocated memory: "
+          << ToMegaBytes(paddle::memory::DeviceMemoryStatPeakValue(
+                 "Allocated", place.GetDeviceId()))
+          << "MB], [gpu peak reserved memory: "
+          << ToMegaBytes(paddle::memory::DeviceMemoryStatPeakValue(
+                 "Reserved", place.GetDeviceId()))
+          << "MB]";
+#endif
+  VLOG(1)
+      << hint << " : [cpu current allocated memory: "
+      << ToMegaBytes(paddle::memory::HostMemoryStatCurrentValue("Allocated", 0))
+      << "MB], [cpu current reserved memory: "
+      << ToMegaBytes(paddle::memory::HostMemoryStatCurrentValue("Reserved", 0))
+      << "MB], [cpu peak allocated memory: "
+      << ToMegaBytes(paddle::memory::HostMemoryStatPeakValue("Allocated", 0))
+      << "MB], [cpu peak reserved memory: "
+      << ToMegaBytes(paddle::memory::HostMemoryStatPeakValue("Reserved", 0))
+      << "MB]";
+}
+
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc
index 87d779f9194dba6840aab833ba637a4d222604dd..cf08f5b4affa9f2e4187d4c7d87f61467800b04a 100644
--- a/paddle/phi/backends/gpu/gpu_context.cc
+++ b/paddle/phi/backends/gpu/gpu_context.cc
@@ -575,7 +575,7 @@ struct GPUContext::Impl {
         if (!blas_tensor_core_handle_creator_) {
           phi::InitBlasHandle(&blas_tensor_core_handle_, stream());
         } else {
-          phi::InitBlasHandle(&blas_tensor_core_handle_, stream());
+          blas_tensor_core_handle_ = blas_tensor_core_handle_creator_();
         }
         PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode(
             blas_tensor_core_handle_, CUBLAS_TENSOR_OP_MATH));