diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index bcc20d6efe15ef52b0d3d3be94aa7b27e8f3d7e6..252c32ba63bf93671837afe34e84cfc1624473a5 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -129,10 +129,6 @@ cc_test(version_test SRCS version_test.cc DEPS version)
 
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
 
-if(WITH_NGRAPH)
-  cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
-endif(WITH_NGRAPH)
-
 cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
 nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
 
diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h
index 44554489fdbf38f64fbd6b636bfb7c470068f605..6a8d99f900cf29d5e579a3c9dd5739d2122b7deb 100644
--- a/paddle/fluid/framework/details/execution_strategy.h
+++ b/paddle/fluid/framework/details/execution_strategy.h
@@ -28,7 +28,7 @@ struct ExecutionStrategy {
   // If we set this to 1, we will delete all variables when finish a batch. and
   // this will loss 15%+ performance.
   // Please be aware about this parameters.
-  size_t num_iteration_per_drop_scope_{100};
+  size_t num_iteration_per_drop_scope_{1};
   ExecutorType type_{kDefault};
   bool dry_run_{false};
   size_t num_iteration_per_run_{1};  // only use with async_ssa_graph_executor
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index 88ce61f9b928aba1945bddc1f9f6b785834780ca..71c4a54dea08d9d5e53f182949854981fe36a41a 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -133,7 +133,9 @@ struct Argument {
 
   // Memory optimized related.
   DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
-  DECL_ARGUMENT_FIELD(memory_optim_force_update, MemoryOptimForceUpdate, bool);
+  DECL_ARGUMENT_FIELD(static_memory_optim, StaticMemoryOptim, bool);
+  DECL_ARGUMENT_FIELD(static_memory_optim_force_update,
+                      StaticMemoryOptimForceUpdate, bool);
   // Indicate which kind of sort algorithm is used for operators, the memory
   // optimization relays on the sort algorithm.
   DECL_ARGUMENT_FIELD(memory_optim_sort_kind, MemoryOptimSortKind, int);
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
index 57683c0b727ef1c922e3a308db28d0af4f193602..3d1be9196fdeacd8ff852dbb595473a687352ccf 100644
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
@@ -444,6 +444,26 @@ std::vector<std::map<std::string, std::vector<int>>> DeseralizeBatchVarShapes(
   return batch_shapes;
 }
 
+// Replace the -1 in shape to a real number to fake the shape.
+std::vector<std::map<std::string, std::vector<int>>> FakeBatchVarShapes(
+    const framework::ProgramDesc& program) {
+  std::vector<std::map<std::string, std::vector<int>>> res;
+  res.emplace_back();
+  auto& record = res.front();
+  const int fake_batch_size = 3;
+  for (auto* var : program.Block(0).AllVars()) {
+    if (var->GetType() ==
+        framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
+      auto shape = var->GetShape();
+      for (auto& v : shape) {
+        if (v < 0) v = fake_batch_size;
+      }
+      record[var->Name()].assign(shape.begin(), shape.end());
+    }
+  }
+  return res;
+}
+
 // Calculate the average dim of each tensor from the batch shape cache.
 std::unordered_map<std::string, size_t> GetBatchAverageSize(
     const std::vector<std::map<std::string, std::vector<int>>>& batches) {
@@ -478,6 +498,7 @@ std::vector<std::unordered_set<std::string>> AnalysisBatchShapesByBatchSize(
   std::unordered_map<std::string, std::stringstream> var_batchsize_hashes;
   for (auto& batch : batches) {
     for (auto& ele : batch) {
+      PADDLE_ENFORCE(!ele.second.empty());
       int batch_size = ele.second.front();
       // TODO(Superjomn) might consume large memory here, use combine hash.
       var_batchsize_hashes[ele.first] << batch_size;
@@ -538,9 +559,21 @@ std::vector<std::unordered_set<std::string>> AnalysisBatchShapesBySimilarSize(
 
 std::string MemoryOptimizePass::repr() const { return "memory optimize pass"; }
 
+std::pair<size_t, size_t> GetRange(
+    const std::unordered_map<std::string, size_t>& ave_size) {
+  auto res = std::make_pair(std::numeric_limits<size_t>::max(),
+                            std::numeric_limits<size_t>::min());
+  for (auto& item : ave_size) {
+    res.first = std::min(item.second, res.first);
+    res.second = std::max(item.second, res.second);
+  }
+  return res;
+}
+
 void MemoryOptimizePass::RunImpl(Argument* argument) {
   // When force update, should not optimize memory.
-  if (!argument->enable_memory_optim() || argument->memory_optim_force_update())
+  if (!argument->enable_memory_optim() ||
+      argument->static_memory_optim_force_update())
     return;
   graph_ = argument->main_graph_ptr();
 
@@ -549,21 +582,38 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
       argument->model_program_path_valid() ? argument->model_program_path()
                                            : "");
   VLOG(3) << "Load memory cache from " << path;
-  if (inference::IsFileExists(path)) {
-    VLOG(4) << "Performing memory optimize";
-    auto batches = DeseralizeBatchVarShapes(path);
-    auto var_batch_ave_size = GetBatchAverageSize(batches);
+  std::vector<std::map<std::string, std::vector<int>>> batches;
+
+  if (argument->static_memory_optim() && inference::IsFileExists(path)) {
+    string::PrettyLogInfo("--- Performing static memory optimize");
+    batches = DeseralizeBatchVarShapes(path);
+  } else {
+    string::PrettyLogInfo("--- Performing dynamic memory optimize");
+    batches = FakeBatchVarShapes(argument->main_program());
+  }
+  auto var_batch_ave_size = GetBatchAverageSize(batches);
+
+  // Get min and max memory size.
+  const auto range = GetRange(var_batch_ave_size);
+  const int cluster_size = std::max(
+      static_cast<int>((range.second - range.first) / 100 /*cluster num*/),
+      1024);
+  const int cluster_size1 = std::max(
+      static_cast<int>((range.second - range.first) / 1000 /*cluster num*/),
+      1024);
 
-    std::unordered_map<std::string, Node*> tensor_nodes;
-    space_table_t space_table;
-    CollectVarMemorySize(var_batch_ave_size, &tensor_nodes, &space_table);
+  std::unordered_map<std::string, Node*> tensor_nodes;
+  space_table_t space_table;
+  CollectVarMemorySize(var_batch_ave_size, &tensor_nodes, &space_table);
 
-    std::unordered_map<std::string, std::string> reuse_table;
-    double max_saving_ratio = 0.;
+  std::unordered_map<std::string, std::string> reuse_table;
+  double max_saving_ratio = 0.;
 
-    std::vector<std::function<MemoryAllocation()>> strategies;
+  std::vector<std::function<MemoryAllocation()>> strategies;
 
-    for (int sort_kind = 0; sort_kind < 2; sort_kind++) {
+  for (int sort_kind = 0; sort_kind < 2; sort_kind++) {
+    if (argument->static_memory_optim()) {
+      // This strategy only make scene in static memory optimize.
       strategies.emplace_back([&, sort_kind] {
         auto clustered_vars_by_batch_size =
             AnalysisBatchShapesByBatchSize(batches);
@@ -572,71 +622,67 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
                       space_table, &reuse_table, sort_kind, &allocation);
         return allocation;
       });
+    }
 
-      strategies.emplace_back([&, sort_kind] {
-        auto clustered_vars_by_ave_size = AnalysisBatchShapesBySimilarSize(
-            space_table, batches, 1024);  // interval 1kb
-        MemoryAllocation allocation;
-        MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size,
-                      space_table, &reuse_table, sort_kind, &allocation);
-        return allocation;
-      });
+    strategies.emplace_back([&, sort_kind] {
+      auto clustered_vars_by_ave_size =
+          AnalysisBatchShapesBySimilarSize(space_table, batches, cluster_size);
+      MemoryAllocation allocation;
+      MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size, space_table,
+                    &reuse_table, sort_kind, &allocation);
+      return allocation;
+    });
+
+    strategies.emplace_back([&, sort_kind] {
+      auto clustered_vars_by_ave_size =
+          AnalysisBatchShapesBySimilarSize(space_table, batches, cluster_size1);
+      MemoryAllocation allocation;
+      MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size, space_table,
+                    &reuse_table, sort_kind, &allocation);
+      return allocation;
+    });
+
+    strategies.emplace_back([&, sort_kind] {
+      auto clustered_vars_by_ave_size = AnalysisBatchShapesBySimilarSize(
+          space_table, batches,
+          std::numeric_limits<int>::max());  // no intervals
+      MemoryAllocation allocation;
+      MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size, space_table,
+                    &reuse_table, sort_kind, &allocation);
+      return allocation;
+    });
+  }
 
-      strategies.emplace_back([&, sort_kind] {
-        auto clustered_vars_by_ave_size = AnalysisBatchShapesBySimilarSize(
-            space_table, batches, 1024 * 1024);  // interval 1MB
-        MemoryAllocation allocation;
-        MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size,
-                      space_table, &reuse_table, sort_kind, &allocation);
-        return allocation;
-      });
+  std::function<MemoryAllocation()>* best_strategy{nullptr};
 
-      strategies.emplace_back([&, sort_kind] {
-        auto clustered_vars_by_ave_size = AnalysisBatchShapesBySimilarSize(
-            space_table, batches,
-            std::numeric_limits<int>::max());  // no intervals
-        MemoryAllocation allocation;
-        MakeReusePlan(clustered_vars_by_ave_size, var_batch_ave_size,
-                      space_table, &reuse_table, sort_kind, &allocation);
-        return allocation;
-      });
+  // Try all strategies to get the best result.
+  for (auto& strategy : strategies) {
+    auto allocation = strategy();
+    string::PrettyLogDetail("--- get strategy saving %f memory for workspace",
+                            allocation.GetSavingRatio());
+    if (allocation.GetSavingRatio() > max_saving_ratio) {
+      max_saving_ratio = allocation.GetSavingRatio();
+      best_strategy = &strategy;
     }
+  }
+  if (!best_strategy) {
+    LOG(ERROR) << "This model makes poor memory optimize, skip memory optimize";
+    return;
+  }
+  auto memory_allocation = (*best_strategy)();
 
-    std::function<MemoryAllocation()>* best_strategy{nullptr};
+  string::PrettyLogInfo(
+      "--- Saved %.2f%s memory for workspace(temporary variables)",
+      memory_allocation.GetSavingRatio() * 100, "%");
 
-    // Try all strategies to get the best result.
-    for (auto& strategy : strategies) {
-      auto allocation = strategy();
-      string::PrettyLogDetail("--- get strategy saving %f memory for workspace",
-                              allocation.GetSavingRatio());
-      if (allocation.GetSavingRatio() > max_saving_ratio) {
-        max_saving_ratio = allocation.GetSavingRatio();
-        best_strategy = &strategy;
-      }
-    }
-    if (!best_strategy) {
-      LOG(ERROR)
-          << "This model makes poor memory optimize, skip memory optimize";
-      return;
-    }
-    auto memory_allocation = (*best_strategy)();
-
-    string::PrettyLogH2(
-        "--- Saved %.2f%s memory for workspace(temporary variables)",
-        memory_allocation.GetSavingRatio() * 100, "%");
-    string::PrettyLogDetail("--- Allocated %d MB",
-                            memory_allocation.allocated / 1024. / 1024.);
-    string::PrettyLogDetail("--- Saved %d MB",
-                            memory_allocation.saved / 1024. / 1024.);
-    argument->main_graph().Set(framework::ir::kGraphToProgramVarsToRemove,
-                               new std::unordered_set<std::string>);
-    auto& vars2remove =
-        argument->main_graph().Get<std::unordered_set<std::string>>(
-            framework::ir::kGraphToProgramVarsToRemove);
-
-    PerformReusePlan(reuse_table, memory_allocation.sort_kind, &vars2remove);
-    argument->SetMemoryOptimSortKind(memory_allocation.sort_kind);
-  }
+  argument->main_graph().Set(framework::ir::kGraphToProgramVarsToRemove,
+                             new std::unordered_set<std::string>);
+  auto& vars2remove =
+      argument->main_graph().Get<std::unordered_set<std::string>>(
+          framework::ir::kGraphToProgramVarsToRemove);
+
+  PerformReusePlan(reuse_table, memory_allocation.sort_kind, &vars2remove);
+  argument->SetMemoryOptimSortKind(memory_allocation.sort_kind);
 }
 
 float MemoryOptimizePass::MemoryAllocation::GetSavingRatio() const {
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
index fa1ad9c8c6aeff60ec4468f41140c57be790af7f..216f416de0d1003b944337ee98fb4e6a22c66fc5 100644
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
+#include "paddle/fluid/platform/port.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index f9da3004ed8306ef08144d096afa4f86133e492d..e6008ba335ed89222247fc00033d1afbd6b28f16 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -95,7 +95,8 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
   CP_MEMBER(memory_pool_init_size_mb_);
 
   CP_MEMBER(enable_memory_optim_);
-  CP_MEMBER(memory_optim_force_update_);
+  CP_MEMBER(static_memory_optim_);
+  CP_MEMBER(static_memory_optim_force_update_);
   // TensorRT releated.
   CP_MEMBER(use_tensorrt_);
   CP_MEMBER(tensorrt_workspace_size_);
@@ -238,7 +239,8 @@ std::string contrib::AnalysisConfig::SerializeInfoCache() {
   ss << tensorrt_min_subgraph_size_;
 
   ss << enable_memory_optim_;
-  ss << memory_optim_force_update_;
+  ss << static_memory_optim_;
+  ss << static_memory_optim_force_update_;
 
   ss << use_mkldnn_;
   for (auto &item : mkldnn_enabled_op_types_) ss << item;
@@ -278,9 +280,11 @@ float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
 #endif
 }
 
-void contrib::AnalysisConfig::EnableMemoryOptim(bool force_update_cache) {
+void contrib::AnalysisConfig::EnableMemoryOptim(
+    bool static_optim, bool force_update_static_cache) {
   enable_memory_optim_ = true;
-  memory_optim_force_update_ = force_update_cache;
+  static_memory_optim_ = static_optim;
+  static_memory_optim_force_update_ = force_update_static_cache;
 
   Update();
 }
@@ -300,4 +304,16 @@ void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer,
   Update();
 }
 
+NativeConfig contrib::AnalysisConfig::ToNativeConfig() const {
+  NativeConfig config;
+  config.model_dir = model_dir_;
+  config.prog_file = prog_file_;
+  config.param_file = params_file_;
+  config.use_gpu = use_gpu_;
+  config.device = device_id_;
+  config.fraction_of_gpu_memory = fraction_of_gpu_memory_for_pool();
+  config.specify_input_name = specify_input_name_;
+  return config;
+}
+
 }  // namespace paddle
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 2b0cad5faa0e31cb7546d405e05e36754915f653..9f8a78f7abc37d17b9806ea766da132f9bf4b28d 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -298,15 +298,15 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
 bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                  framework::Scope *scope) {
   VLOG(3) << "Predictor::get_fetch";
-  outputs->resize(fetchs_.size());
-  for (size_t i = 0; i < fetchs_.size(); ++i) {
-    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
+  outputs->resize(fetches_.size());
+  for (size_t i = 0; i < fetches_.size(); ++i) {
+    int idx = boost::get<int>(fetches_[i]->GetAttr("col"));
     PADDLE_ENFORCE((size_t)idx == i);
     framework::LoDTensor &fetch =
         framework::GetFetchVariable(*scope, "fetch", idx);
     auto type = fetch.type();
     auto output = &(outputs->at(i));
-    output->name = fetchs_[idx]->Input("X")[0];
+    output->name = fetches_[idx]->Input("X")[0];
     if (type == framework::proto::VarType::FP32) {
       GetFetchOne<float>(fetch, output);
       output->dtype = PaddleDType::FLOAT32;
@@ -327,7 +327,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
   argument_.SetUseGPU(config_.use_gpu());
   argument_.SetGPUDeviceId(config_.gpu_device_id());
   argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
-  argument_.SetMemoryOptimForceUpdate(config_.memory_optim_force_update_);
+  argument_.SetStaticMemoryOptim(config_.static_memory_optim_);
+  argument_.SetStaticMemoryOptimForceUpdate(
+      config_.static_memory_optim_force_update_);
   argument_.SetModelFromMemory(config_.model_from_memory_);
   // Analyze inference_program
   if (!config_.model_dir().empty()) {
@@ -422,10 +424,10 @@ void AnalysisPredictor::PrepareFeedFetch() {
       feed_names_[op->Output("Out")[0]] = idx;
     } else if (op->Type() == "fetch") {
       int idx = boost::get<int>(op->GetAttr("col"));
-      if (fetchs_.size() <= static_cast<size_t>(idx)) {
-        fetchs_.resize(idx + 1);
+      if (fetches_.size() <= static_cast<size_t>(idx)) {
+        fetches_.resize(idx + 1);
       }
-      fetchs_[idx] = op;
+      fetches_[idx] = op;
     }
   }
 }
@@ -638,12 +640,12 @@ bool AnalysisPredictor::need_collect_var_shapes_for_memory_optim() {
   // check if the cache exists
   if (!config_.enable_memory_optim()) {
     need = false;
-  } else if (config_.enable_memory_optim() &&
+  } else if (config_.static_memory_optim_ &&
              !inference::IsFileExists(inference::analysis::GetMemoryCachePath(
                  config_.model_dir(), config_.prog_file()))) {
     need = true;
-  } else if (config_.enable_memory_optim() &&
-             config_.memory_optim_force_update_) {
+  } else if (config_.static_memory_optim_ &&
+             config_.static_memory_optim_force_update_) {
     need = true;
   }
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 9095b6ec1af6794c19e94fc9326a48239b3ba145..a8ea67d4bd332b5614f4f6593e8397829d28c5a6 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -115,7 +115,7 @@ class AnalysisPredictor : public PaddlePredictor {
   std::shared_ptr<framework::ProgramDesc> inference_program_;
   std::vector<framework::OpDesc *> feeds_;
   std::map<std::string, size_t> feed_names_;
-  std::vector<framework::OpDesc *> fetchs_;
+  std::vector<framework::OpDesc *> fetches_;
   // Memory buffer for feed inputs. The temporary LoDTensor will cause serious
   // concurrency problems, wrong results and memory leak, so cache them.
   std::vector<framework::LoDTensor> feed_tensors_;
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index 1cee8904500636d7b49e6b4e54595dbce6a79954..f89eaeaadcc50fd7979d6807e8f2c7556e048e6c 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -162,17 +162,7 @@ struct AnalysisConfig {
 
   /** Transform the AnalysisConfig to NativeConfig.
    */
-  NativeConfig ToNativeConfig() const {
-    NativeConfig config;
-    config.model_dir = model_dir_;
-    config.prog_file = prog_file_;
-    config.param_file = params_file_;
-    config.use_gpu = use_gpu_;
-    config.device = device_id_;
-    config.fraction_of_gpu_memory = fraction_of_gpu_memory_for_pool();
-    config.specify_input_name = specify_input_name_;
-    return config;
-  }
+  NativeConfig ToNativeConfig() const;
   /** Specify the operator type list to use MKLDNN acceleration.
    * @param op_list the operator type list.
    */
@@ -195,7 +185,8 @@ struct AnalysisConfig {
   /** Turn on memory optimize
    * NOTE still in development, will release latter.
    */
-  void EnableMemoryOptim(bool force_update_cache = false);
+  void EnableMemoryOptim(bool static_optim = false,
+                         bool force_update_static_cache = false);
   /** Tell whether the memory optimization is activated. */
   bool enable_memory_optim() const;
 
@@ -241,7 +232,8 @@ struct AnalysisConfig {
 
   // memory reuse related.
   bool enable_memory_optim_{false};
-  bool memory_optim_force_update_{false};
+  bool static_memory_optim_{false};
+  bool static_memory_optim_force_update_{false};
 
   bool use_mkldnn_{false};
   std::unordered_set<std::string> mkldnn_enabled_op_types_;
diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
index 4ec9404ab42bcd9cc0608f033cb2777106a29583..e78ab942d113323fecf5510dca85fb5db734efc8 100644
--- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
@@ -253,7 +253,7 @@ void compare(bool use_mkldnn = false) {
 }
 
 // Compare result of NativeConfig and AnalysisConfig with memory optimization.
-TEST(Analyzer_dam, compare_with_memory_optim) {
+TEST(Analyzer_dam, compare_with_static_memory_optim) {
   // The small dam will core in CI, but works in local.
   if (FLAGS_max_turn_num == 9) {
     contrib::AnalysisConfig cfg, cfg1;
@@ -263,7 +263,7 @@ TEST(Analyzer_dam, compare_with_memory_optim) {
     SetInput(&input_slots_all);
     // Run the first time to force to update memory cache
     SetConfig(&cfg);
-    cfg.EnableMemoryOptim(true);
+    cfg.EnableMemoryOptim(true, true /*force update*/);
 
     CompareNativeAndAnalysis(
         reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
@@ -271,7 +271,7 @@ TEST(Analyzer_dam, compare_with_memory_optim) {
 
     // Run second time to use the memory cache and perform memory optimization.
     SetConfig(&cfg1);
-    cfg1.EnableMemoryOptim();
+    cfg1.EnableMemoryOptim(true, false /*do not force update*/);
 
     CompareNativeAndAnalysis(
         reinterpret_cast<const PaddlePredictor::Config *>(&cfg1),
@@ -279,6 +279,24 @@ TEST(Analyzer_dam, compare_with_memory_optim) {
   }
 }
 
+TEST(Analyzer_dam, compare_with_dynamic_memory_optim) {
+  // The small dam will core in CI, but works in local.
+  if (FLAGS_max_turn_num == 9) {
+    contrib::AnalysisConfig cfg, cfg1;
+    DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
+
+    std::vector<std::vector<PaddleTensor>> input_slots_all;
+    SetInput(&input_slots_all);
+    // Run the first time to force to update memory cache
+    SetConfig(&cfg);
+    cfg.EnableMemoryOptim();
+
+    CompareNativeAndAnalysis(
+        reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
+        input_slots_all);
+  }
+}
+
 TEST(Analyzer_dam, compare) { compare(); }
 
 #ifdef PADDLE_WITH_MKLDNN
diff --git a/paddle/fluid/operators/ngraph/CMakeLists.txt b/paddle/fluid/operators/ngraph/CMakeLists.txt
index 83f78d505d7444cd12105aee40b0d03349b07be3..6b256ef02666c21ec1db3f6922b56bb23363b4a0 100644
--- a/paddle/fluid/operators/ngraph/CMakeLists.txt
+++ b/paddle/fluid/operators/ngraph/CMakeLists.txt
@@ -1,4 +1,5 @@
 if(WITH_NGRAPH)
+  cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
   cc_library(ngraph_engine SRCS ngraph_engine.cc DEPS ngraph_bridge framework_proto)
   op_library(ngraph_engine_op DEPS ngraph_engine op_registry op_info device_context)
 endif()
diff --git a/paddle/fluid/framework/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
similarity index 55%
rename from paddle/fluid/framework/ngraph_bridge.cc
rename to paddle/fluid/operators/ngraph/ngraph_bridge.cc
index 365870c54eb3861ad6c273d3866dcd32d1c4166a..d6e897ed4666261cdd0bd6565f61abb218d971e5 100644
--- a/paddle/fluid/framework/ngraph_bridge.cc
+++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
@@ -17,39 +17,39 @@ limitations under the License. */
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
-#include "paddle/fluid/framework/ngraph_bridge.h"
-#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/operators/ngraph/ngraph_bridge.h"
 #include "paddle/fluid/operators/ngraph/ngraph_ops.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/ngraph_helper.h"
 
 namespace paddle {
-namespace framework {
+namespace operators {
 
 namespace NG_OPS = paddle::operators::ngraphs;
 std::map<std::string,
-         std::function<void(const std::shared_ptr<OperatorBase>&,
+         std::function<void(const std::shared_ptr<framework::OperatorBase>&,
                             std::shared_ptr<std::unordered_map<
                                 std::string, std::shared_ptr<ngraph::Node>>>)>>
     NgraphBridge::NG_NODE_MAP = {
         {"elementwise_add", NG_OPS::BuildElementwiseAddNode},
         {"elementwise_add_grad", NG_OPS::BuildElementwiseAddGradNode},
-        {"fill_constant", paddle::operators::ngraphs::BuildFillConstantNode},
-        {"mean", paddle::operators::ngraphs::BuildMeanNode},
-        {"mean_grad", paddle::operators::ngraphs::BuildMeanGradNode},
-        {"mul", paddle::operators::ngraphs::BuildMulNode},
-        {"mul_grad", paddle::operators::ngraphs::BuildMulGradNode},
-        {"softmax", paddle::operators::ngraphs::BuildSoftmaxNode},
-        {"softmax_grad", paddle::operators::ngraphs::BuildSoftmaxGradNode},
-        {"scale", paddle::operators::ngraphs::BuildScaleNode},
-        {"relu", paddle::operators::ngraphs::BuildUnaryNode<ngraph::op::Relu>},
-        {"tanh", paddle::operators::ngraphs::BuildUnaryNode<ngraph::op::Tanh>},
-        {"top_k", paddle::operators::ngraphs::BuildTopKNode}};
-
-void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) {
+        {"fill_constant", NG_OPS::BuildFillConstantNode},
+        {"mean", NG_OPS::BuildMeanNode},
+        {"mean_grad", NG_OPS::BuildMeanGradNode},
+        {"mul", NG_OPS::BuildMulNode},
+        {"mul_grad", NG_OPS::BuildMulGradNode},
+        {"softmax", NG_OPS::BuildSoftmaxNode},
+        {"softmax_grad", NG_OPS::BuildSoftmaxGradNode},
+        {"scale", NG_OPS::BuildScaleNode},
+        {"relu", NG_OPS::BuildUnaryNode<ngraph::op::Relu>},
+        {"tanh", NG_OPS::BuildUnaryNode<ngraph::op::Tanh>},
+        {"top_k", NG_OPS::BuildTopKNode}};
+
+void NgraphBridge::BuildNgNode(
+    const std::shared_ptr<framework::OperatorBase>& op) {
   auto& op_type = op->Type();
   NG_NODE_MAP[op_type](op, ngb_node_map_);
 }
 
-}  // namespace framework
+}  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ngraph_bridge.h b/paddle/fluid/operators/ngraph/ngraph_bridge.h
similarity index 84%
rename from paddle/fluid/framework/ngraph_bridge.h
rename to paddle/fluid/operators/ngraph/ngraph_bridge.h
index 5ad7b8daeb6a782515e50fc87ca7188b46308390..c57988f8f6322e76678c572aa21ff5b17b9e3c22 100644
--- a/paddle/fluid/framework/ngraph_bridge.h
+++ b/paddle/fluid/operators/ngraph/ngraph_bridge.h
@@ -21,16 +21,16 @@ limitations under the License. */
 
 #include "ngraph/node.hpp"
 
-namespace paddle {
-namespace framework {
+#include "paddle/fluid/framework/operator.h"
 
-class OperatorBase;
+namespace paddle {
+namespace operators {
 
 class NgraphBridge {
  public:
   static std::map<
       std::string,
-      std::function<void(const std::shared_ptr<OperatorBase>&,
+      std::function<void(const std::shared_ptr<framework::OperatorBase>&,
                          std::shared_ptr<std::unordered_map<
                              std::string, std::shared_ptr<ngraph::Node>>>)>>
       NG_NODE_MAP;
@@ -41,7 +41,7 @@ class NgraphBridge {
           var_node_map)
       : ngb_node_map_(var_node_map) {}
 
-  void BuildNgNode(const std::shared_ptr<OperatorBase>& op);
+  void BuildNgNode(const std::shared_ptr<framework::OperatorBase>& op);
 
  private:
   std::shared_ptr<
@@ -49,5 +49,5 @@ class NgraphBridge {
       ngb_node_map_;
 };
 
-}  // namespace framework
+}  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/ngraph/ngraph_engine.cc b/paddle/fluid/operators/ngraph/ngraph_engine.cc
index fde3a5ba55bf13a6dc60cce0915d71f27f640e90..bec4b514a218715134d2366dd7efd7cf5b377b68 100644
--- a/paddle/fluid/operators/ngraph/ngraph_engine.cc
+++ b/paddle/fluid/operators/ngraph/ngraph_engine.cc
@@ -24,11 +24,11 @@ limitations under the License. */
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/framework/ngraph_bridge.h"
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/var_desc.h"
 #include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/operators/ngraph/ngraph_bridge.h"
 #include "paddle/fluid/operators/ngraph/ngraph_engine.h"
 
 namespace paddle {
@@ -88,15 +88,14 @@ static std::vector<std::vector<int>> NgraphOpIntervals(
   int pivot = left;
   while (pivot < right) {
     auto op_type = ops.at(pivot)->Type();
-    if (paddle::framework::NgraphBridge::NG_NODE_MAP.find(op_type) ==
-        paddle::framework::NgraphBridge::NG_NODE_MAP.end()) {
+    if (NgraphBridge::NG_NODE_MAP.find(op_type) ==
+        NgraphBridge::NG_NODE_MAP.end()) {
       ++pivot;
     } else {
       int start = pivot, end = start;
       while (pivot < right &&
-             (paddle::framework::NgraphBridge::NG_NODE_MAP.find(
-                  ops.at(pivot)->Type()) !=
-              paddle::framework::NgraphBridge::NG_NODE_MAP.end())) {
+             (NgraphBridge::NG_NODE_MAP.find(ops.at(pivot)->Type()) !=
+              NgraphBridge::NG_NODE_MAP.end())) {
         ++pivot;
         ++end;
       }
@@ -283,7 +282,7 @@ void NgraphEngine::BuildNgNodes() {
       }
     }
   }
-  framework::NgraphBridge ngb(var_node_map_);
+  NgraphBridge ngb(var_node_map_);
   for (auto& op : fused_ops_) {
     ngb.BuildNgNode(op);
   }
diff --git a/python/paddle/fluid/contrib/int8_inference/utility.py b/python/paddle/fluid/contrib/int8_inference/utility.py
index 197fc5f2d261798dc17daa37c1b6d258936a8a39..40de038f28a83738e6e6cd8c77c0a9916ce68b4f 100644
--- a/python/paddle/fluid/contrib/int8_inference/utility.py
+++ b/python/paddle/fluid/contrib/int8_inference/utility.py
@@ -32,10 +32,13 @@ class Calibrator(object):
 
     def __init__(self, *args, **kwargs):
         self.program = kwargs['program']
-        self.iterations = kwargs['iterations']
         self.pretrained_model = kwargs['pretrained_model']
-        self.debug = kwargs['debug']
+        self.debug = kwargs['debug'] if 'debug' in kwargs else False
         self.algo = kwargs['algo']
+        self.output = kwargs['output']
+        self.feed_var_names = kwargs['feed_var_names']
+        self.fetch_list = kwargs['fetch_list']
+        self.exe = kwargs['exe']
 
         self._conv_input_var_name = []
         self._conv_output_var_name = []
@@ -54,17 +57,38 @@ class Calibrator(object):
         self._u8_output_var = []
         self._s8_output_var = []
         self._persistable_vars = []
+        self._sampling_data = {}
 
-    def generate_sampling_program(self):
         self.__init_analysis()
         self.__generate_output_program()
 
-    def generate_quantized_data(self, sampling_data):
-        self.__sampling(sampling_data)
+    def save_int8_model(self):
+        self.__sampling(self._sampling_data)
         self.__save_scale()
         self.__update_program()
         self.__update_output_program_attr()
         self.__display_debug()
+        self.__save_offline_model()
+
+    def sample_data(self):
+        '''
+        Sampling the tensor data of variable.
+        '''
+        for i in self.sampling_program.list_vars():
+            if i.name in self.sampling_vars:
+                np_data = np.array(fluid.global_scope().find_var(i.name)
+                                   .get_tensor())
+                if i.name not in self._sampling_data:
+                    self._sampling_data[i.name] = []
+                self._sampling_data[i.name].append(np_data)
+
+    def __save_offline_model(self):
+        '''
+        Save the quantized model to the disk.
+        '''
+        fluid.io.save_inference_model(self.output, self.feed_var_names,
+                                      self.fetch_list, self.exe,
+                                      self.sampling_program)
 
     def __display_debug(self):
         if self.debug:
diff --git a/python/paddle/fluid/contrib/tests/test_calibration.py b/python/paddle/fluid/contrib/tests/test_calibration.py
index 17e4eb8b831268bed00736db2e9706aece9fdd74..ed5ea70260878ee27606dfcd0880e442b48525c1 100644
--- a/python/paddle/fluid/contrib/tests/test_calibration.py
+++ b/python/paddle/fluid/contrib/tests/test_calibration.py
@@ -26,7 +26,7 @@ import paddle.fluid.profiler as profiler
 from PIL import Image, ImageEnhance
 import math
 sys.path.append('..')
-import int8_inference.utility as ut
+import int8_inference.utility as int8_utility
 
 random.seed(0)
 np.random.seed(0)
@@ -120,13 +120,13 @@ class TestCalibration(unittest.TestCase):
     def setUp(self):
         # TODO(guomingz): Put the download process in the cmake.
         # Download and unzip test data set
-        imagenet_dl_url = 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
+        imagenet_dl_url = 'http://paddle-inference-dist.cdn.bcebos.com/int8/calibration_test_data.tar.gz'
         zip_file_name = imagenet_dl_url.split('/')[-1]
         cmd = 'rm -rf data {}  && mkdir data && wget {} && tar xvf {} -C data'.format(
             zip_file_name, imagenet_dl_url, zip_file_name)
         os.system(cmd)
         # resnet50 fp32 data
-        resnet50_fp32_model_url = 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz'
+        resnet50_fp32_model_url = 'http://paddle-inference-dist.cdn.bcebos.com/int8/resnet50_int8_model.tar.gz'
         resnet50_zip_name = resnet50_fp32_model_url.split('/')[-1]
         resnet50_unzip_folder_name = 'resnet50_fp32'
         cmd = 'rm -rf {} {} && mkdir {} && wget {} && tar xvf {} -C {}'.format(
@@ -135,8 +135,7 @@ class TestCalibration(unittest.TestCase):
             resnet50_zip_name, resnet50_unzip_folder_name)
         os.system(cmd)
 
-        self.iterations = 100
-        self.skip_batch_num = 5
+        self.iterations = 50
 
     def run_program(self, model_path, generate_int8=False, algo='direct'):
         image_shape = [3, 224, 224]
@@ -163,16 +162,15 @@ class TestCalibration(unittest.TestCase):
 
             print("Start calibration ...")
 
-            calibrator = ut.Calibrator(
+            calibrator = int8_utility.Calibrator(
                 program=infer_program,
                 pretrained_model=model_path,
-                iterations=100,
-                debug=False,
-                algo=algo)
-
-            sampling_data = {}
+                algo=algo,
+                exe=exe,
+                output=int8_model,
+                feed_var_names=feed_dict,
+                fetch_list=fetch_targets)
 
-            calibrator.generate_sampling_program()
         test_info = []
         cnt = 0
         for batch_id, data in enumerate(val_reader()):
@@ -192,13 +190,7 @@ class TestCalibration(unittest.TestCase):
                       feed_dict[1]: label},
                 fetch_list=fetch_targets)
             if generate_int8:
-                for i in calibrator.sampling_program.list_vars():
-                    if i.name in calibrator.sampling_vars:
-                        np_data = np.array(fluid.global_scope().find_var(i.name)
-                                           .get_tensor())
-                        if i.name not in sampling_data:
-                            sampling_data[i.name] = []
-                        sampling_data[i.name].append(np_data)
+                calibrator.sample_data()
 
             test_info.append(np.mean(acc1) * len(data))
             cnt += len(data)
@@ -209,9 +201,8 @@ class TestCalibration(unittest.TestCase):
             break
 
         if generate_int8:
-            calibrator.generate_quantized_data(sampling_data)
-            fluid.io.save_inference_model(int8_model, feed_dict, fetch_targets,
-                                          exe, calibrator.sampling_program)
+            calibrator.save_int8_model()
+
             print(
                 "Calibration is done and the corresponding files were generated at {}".
                 format(os.path.abspath("calibration_out")))