refactor inference analysis api (#14634)

875a07c3 · Yan Chunwei · GitHub · 99e6e8b0 · 875a07c3 · 875a07c3
27 changed file
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -134,6 +134,7 @@ if(WITH_GPU)
            message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF")
            set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
        endif()
+        add_definitions(-DWITH_ANAKIN)
    endif()
    if(WITH_ANAKIN)
        # NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR

--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -40,14 +40,14 @@ void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
 void NaiveExecutor::Run() {
 #ifndef PADDLE_ON_INFERENCE
-  LOG_FIRST_N(WARNING, 15) << "The NaiveExecutor can not work properly if the "
+  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
-                              "cmake flag ON_INFER is not set.";
+                             "cmake flag ON_INFER is not set.";
-  LOG_FIRST_N(WARNING, 15) << "Unlike the training phase, all the scopes and "
+  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
-                              "variables will be reused to save the allocation "
+                             "variables will be reused to save the allocation "
-                              "overhead.";
+                             "overhead.";
-  LOG_FIRST_N(WARNING, 15) << "Please re-compile the inference library by "
+  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
-                              "setting the cmake flag ON_INFER=ON if you are "
+                             "setting the cmake flag ON_INFER=ON if you are "
-                              "running Paddle Inference";
+                             "running Paddle Inference";
 #endif  // PADDLE_ON_INFERENCE
  for (auto &op : ops_) {
    VLOG(3) << std::this_thread::get_id() << " run " << op->Type()

--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -14,86 +14,101 @@
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_pass_builder.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle_pass_builder.h"  // NOLINT
+#include "paddle/fluid/platform/gpu_info.h"
 namespace paddle {
 PassStrategy *contrib::AnalysisConfig::pass_builder() const {
-  PADDLE_ENFORCE(
+  if (!pass_builder_.get()) {
-      pass_builder_.get(),
+    if (use_gpu_) {
-      "Should call constructor first, that will init the pass_builder_.");
+      LOG(INFO) << "Create GPU IR passes";
+      pass_builder_.reset(new GpuPassStrategy);
+    } else {
+      LOG(INFO) << "Create CPU IR passes";
+      pass_builder_.reset(new CpuPassStrategy);
+    }
+  } else if (pass_builder_->use_gpu() ^ use_gpu()) {
+    LOG(WARNING) << "The use_gpu flag is not compatible between Config and "
+                    "PassBuilder, the flags are "
+                 << use_gpu() << " " << pass_builder_->use_gpu();
+    LOG(WARNING) << "Please make them compatible, still use the existing "
+                    "PassBuilder.";
+  }
  return pass_builder_.get();
 }
-contrib::AnalysisConfig::AnalysisConfig(bool use_gpu) {
+contrib::AnalysisConfig::AnalysisConfig(const std::string &model_dir) {
-  this->use_gpu = use_gpu;
+  model_dir_ = model_dir;
-  if (use_gpu) {
+}
-    pass_builder_.reset(new GpuPassStrategy);
+contrib::AnalysisConfig::AnalysisConfig(const std::string &prog_file,
-  } else {
+                                        const std::string &params_file) {
-    pass_builder_.reset(new CpuPassStrategy);
+  prog_file_ = prog_file;
-  }
+  params_file_ = params_file;
+}
+void contrib::AnalysisConfig::SetModel(const std::string &prog_file_path,
+                                       const std::string &params_file_path) {
+  prog_file_ = prog_file_path;
+  params_file_ = params_file_path;
+}
+void contrib::AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
+                                           int device_id) {
+#ifdef PADDLE_WITH_CUDA
+  use_gpu_ = true;
+  memory_pool_init_size_mb_ = memory_pool_init_size_mb;
+  device_id_ = device_id;
+#else
+  LOG(ERROR) << "Please compile with gpu to EnableGpu";
+  use_gpu_ = false;
+#endif
 }
+void contrib::AnalysisConfig::DisableGpu() { use_gpu_ = false; }
 contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
-  // fields from Config
+#define CP_MEMBER(member__) member__ = other.member__;
-  model_dir = other.model_dir;
-  // fields from NativeConfig
+  // Model related.
-  use_gpu = other.use_gpu;
+  CP_MEMBER(model_dir_);
-  device = other.device;
+  CP_MEMBER(prog_file_);
-  fraction_of_gpu_memory = other.fraction_of_gpu_memory;
+  CP_MEMBER(params_file_);
-  prog_file = other.prog_file;
+  CP_MEMBER(model_from_memory_);  // the memory model reuses prog_file_ and
-  param_file = other.param_file;
+                                  // params_file_ fields.
-  specify_input_name = other.specify_input_name;
+  // Gpu releated.
-  cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
+  CP_MEMBER(use_gpu_);
-  // fields from this.
+  CP_MEMBER(device_id_);
-  enable_ir_optim = other.enable_ir_optim;
+  CP_MEMBER(memory_pool_init_size_mb_);
-  // For mkldnn
+  // TensorRT releated.
-  use_mkldnn_ = other.use_mkldnn_;
+  CP_MEMBER(use_tensorrt_);
-  mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_;
+  CP_MEMBER(tensorrt_workspace_size_);
+  CP_MEMBER(tensorrt_max_batchsize_);
-  use_feed_fetch_ops = other.use_feed_fetch_ops;
+  CP_MEMBER(tensorrt_min_subgraph_size_);
-  use_tensorrt_ = other.use_tensorrt_;
+  // MKLDNN releated.
-  tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_;
+  CP_MEMBER(use_mkldnn_);
-  tensorrt_workspace_size_ = other.tensorrt_workspace_size_;
+  CP_MEMBER(mkldnn_enabled_op_types_);
-  tensorrt_min_subgraph_size_ = other.tensorrt_min_subgraph_size_;
-  model_from_memory_ = other.model_from_memory_;
+  // Ir related.
+  CP_MEMBER(enable_ir_optim_);
-  if (use_gpu) {
+  CP_MEMBER(use_feed_fetch_ops_);
+  CP_MEMBER(ir_debug_);
+  CP_MEMBER(specify_input_name_);
+  CP_MEMBER(cpu_math_library_num_threads_);
+  CP_MEMBER(serialized_info_cache_);
+  if (use_gpu_) {
    pass_builder_.reset(new GpuPassStrategy(
        *static_cast<GpuPassStrategy *>(other.pass_builder())));
  } else {
    pass_builder_.reset(new CpuPassStrategy(
        *static_cast<CpuPassStrategy *>(other.pass_builder())));
  }
-}
-contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
+#undef CP_MEMBER
-  // fields from Config
-  model_dir = other.model_dir;
-  // fields from NativeConfig
-  use_gpu = other.use_gpu;
-  device = other.device;
-  fraction_of_gpu_memory = other.fraction_of_gpu_memory;
-  prog_file = other.prog_file;
-  param_file = other.param_file;
-  specify_input_name = other.specify_input_name;
-  cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
-  // fields from this.
-  enable_ir_optim = other.enable_ir_optim;
-  // For mkldnn
-  use_mkldnn_ = other.use_mkldnn_;
-  mkldnn_enabled_op_types_ = other.mkldnn_enabled_op_types_;
-  use_feed_fetch_ops = other.use_feed_fetch_ops;
-  use_tensorrt_ = other.use_tensorrt_;
-  tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_;
-  tensorrt_workspace_size_ = other.tensorrt_workspace_size_;
-  tensorrt_min_subgraph_size_ = other.tensorrt_min_subgraph_size_;
-  model_from_memory_ = other.model_from_memory_;
-  pass_builder_ = std::move(other.pass_builder_);
 }
 void contrib::AnalysisConfig::EnableMKLDNN() {
@@ -112,17 +127,90 @@ void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size,
  use_tensorrt_ = true;
  tensorrt_workspace_size_ = workspace_size;
  tensorrt_max_batchsize_ = max_batch_size;
-  tensorrt_min_subgraph_size_ = min_subgraph_size;
+}
-  // Append after the conv+affine_channel fuse pass.
-  pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
+void contrib::AnalysisConfig::Update() {
+  auto info = SerializeInfoCache();
+  if (info == serialized_info_cache_) return;
+  if (use_gpu_) {
+    pass_builder_.reset(new GpuPassStrategy);
+  } else {
+    pass_builder_.reset(new CpuPassStrategy);
+  }
+  if (use_tensorrt_) {
+    if (!use_gpu_) {
+      LOG(ERROR)
+          << "TensorRT engine is not available when EnableGpu() not actived.";
+    } else {
+      // Append after the infer_clean pass.
+      pass_builder()->InsertPass(1, "tensorrt_subgraph_pass");
+    }
+  }
+  if (use_mkldnn_) {
+    if (!enable_ir_optim_) {
+      LOG(ERROR)
+          << "EnableMKLDNN() only works when IR optimization is enabled.";
+    }
+#ifdef PADDLE_WITH_MKLDNN
+    pass_builder()->EnableMKLDNN();
+    use_mkldnn_ = true;
+#else
+    LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
+    use_mkldnn_ = false;
+#endif
+  }
+  if (ir_debug_) {
+    pass_builder()->TurnOnDebug();
+  }
+}
+std::string contrib::AnalysisConfig::SerializeInfoCache() {
+  std::stringstream ss;
+  ss << use_gpu_;
+  ss << memory_pool_init_size_mb_;
+  ss << use_tensorrt_;
+  ss << tensorrt_workspace_size_;
+  ss << tensorrt_max_batchsize_;
+  ss << use_mkldnn_;
+  ss << enable_ir_optim_;
+  ss << use_feed_fetch_ops_;
+  ss << ir_debug_;
+  return ss.str();
+}
+void contrib::AnalysisConfig::SetCpuMathLibraryNumThreads(
+    int cpu_math_library_num_threads) {
+  cpu_math_library_num_threads_ = cpu_math_library_num_threads;
+}
+float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
+#ifdef PADDLE_WITH_CUDA
+  // Get the GPU memory details and calculate the fraction of memory for the
+  // GPU memory pool.
+  size_t gpu_used, gpu_available;
+  platform::GpuMemoryUsage(&gpu_used, &gpu_available);
+  double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.;
+  float fraction_of_gpu_memory =
+      static_cast<double>(memory_pool_init_size_mb()) / total_gpu_memory;
+  return fraction_of_gpu_memory;
+#else
+  return 0.;
+#endif
 }
 void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer,
                                             size_t prog_buffer_size,
                                             const char *param_buffer,
                                             size_t param_buffer_size) {
-  prog_file = std::string(prog_buffer, prog_buffer + prog_buffer_size);
+  prog_file_ = std::string(prog_buffer, prog_buffer + prog_buffer_size);
-  param_file = std::string(param_buffer, param_buffer + param_buffer_size);
+  params_file_ = std::string(param_buffer, param_buffer + param_buffer_size);
  model_from_memory_ = true;
 }

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -33,6 +33,7 @@
 #include "paddle/fluid/inference/utils/singleton.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/cpu_helper.h"
+#include "paddle/fluid/platform/gpu_info.h"
 #include "paddle/fluid/platform/profiler.h"
 DECLARE_bool(profile);
@@ -59,8 +60,8 @@ bool AnalysisPredictor::Init(
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";
-    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
+    auto tracking_device = config_.use_gpu() ? platform::ProfilerState::kAll
-                                           : platform::ProfilerState::kCPU;
+                                             : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }
@@ -112,7 +113,7 @@ bool AnalysisPredictor::PrepareProgram(
    // Optimize the program, and load parameters and modify them in the
    // scope_.
    // This will change the scope_ address.
-    if (config_.enable_ir_optim) {
+    if (config_.ir_optim()) {
      status_ir_optim_enabled_ = true;
      OptimizeInferenceProgram();
    } else {
@@ -140,9 +141,9 @@ bool AnalysisPredictor::PrepareProgram(
  return true;
 }
 bool AnalysisPredictor::CreateExecutor() {
-  if (config_.use_gpu) {
+  if (config_.use_gpu_) {
    status_use_gpu_ = true;
-    place_ = paddle::platform::CUDAPlace(config_.device);
+    place_ = paddle::platform::CUDAPlace(config_.device_id_);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
@@ -151,7 +152,7 @@ bool AnalysisPredictor::CreateExecutor() {
 }
 bool AnalysisPredictor::PrepareExecutor() {
  executor_->Prepare(sub_scope_, *inference_program_, 0,
-                     config_.use_feed_fetch_ops);
+                     config_.use_feed_fetch_ops_);
  PADDLE_ENFORCE_NOT_NULL(sub_scope_);
@@ -250,7 +251,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
    }
    input.set_lod(lod);
    int idx = -1;
-    if (config_.specify_input_name) {
+    if (config_.specify_input_name_) {
      auto name = inputs[i].name;
      if (feed_names_.find(name) == feed_names_.end()) {
        LOG(ERROR) << "feed names from program do not have name: [" << name
@@ -314,22 +315,22 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
 void AnalysisPredictor::OptimizeInferenceProgram() {
  status_program_optimized_ = true;
-  argument_.SetUseGPU(config_.use_gpu);
+  argument_.SetUseGPU(config_.use_gpu());
-  argument_.SetGPUDeviceId(config_.device);
+  argument_.SetGPUDeviceId(config_.gpu_device_id());
  argument_.SetModelFromMemory(config_.model_from_memory_);
  // Analyze inference_program
-  if (!config_.model_dir.empty()) {
+  if (!config_.model_dir().empty()) {
-    argument_.SetModelDir(config_.model_dir);
+    argument_.SetModelDir(config_.model_dir());
  } else {
    PADDLE_ENFORCE(
-        !config_.param_file.empty(),
+        !config_.params_file().empty(),
        "Either model_dir or (param_file, prog_file) should be set.");
-    PADDLE_ENFORCE(!config_.prog_file.empty());
+    PADDLE_ENFORCE(!config_.prog_file().empty());
-    argument_.SetModelProgramPath(config_.prog_file);
+    argument_.SetModelProgramPath(config_.prog_file());
-    argument_.SetModelParamsPath(config_.param_file);
+    argument_.SetModelParamsPath(config_.params_file());
  }
-  if (config_.use_gpu && config_.use_tensorrt_) {
+  if (config_.use_gpu() && config_.tensorrt_engine_enabled()) {
    argument_.SetUseTensorRT(true);
    argument_.SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_);
    argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
@@ -341,7 +342,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
  }
  auto passes = config_.pass_builder()->AllPasses();
-  if (!config_.enable_ir_optim) passes.clear();
+  if (!config_.ir_optim()) passes.clear();
  argument_.SetIrAnalysisPasses(passes);
  argument_.SetScopeNotOwned(const_cast<framework::Scope *>(scope_.get()));
  Analyzer().Run(&argument_);
@@ -358,18 +359,26 @@ template <>
 std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
  VLOG(3) << "create AnalysisConfig";
-  if (config.use_gpu) {
+  if (config.use_gpu()) {
    // 1. GPU memeroy
-    PADDLE_ENFORCE_GT(
+    PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f);
-        config.fraction_of_gpu_memory, 0.f,
+    PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d",
-        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
+                      config.gpu_device_id());
-    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
    std::vector<std::string> flags;
-    if (config.fraction_of_gpu_memory >= 0.0f ||
-        config.fraction_of_gpu_memory <= 0.95f) {
+    float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool();
+    if (fraction_of_gpu_memory > 0.95f) {
+      LOG(ERROR)
+          << "Allocate too much memory for the GPU memory pool, assigned "
+          << config.memory_pool_init_size_mb() << " MB";
+      LOG(ERROR)
+          << "Try to shink the value by setting AnalysisConfig::EnableGpu(...)";
+    }
+    if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
-                         std::to_string(config.fraction_of_gpu_memory);
+                         std::to_string(fraction_of_gpu_memory);
      flags.push_back(flag);
      VLOG(3) << "set flag: " << flag;
      framework::InitGflags(flags);
@@ -443,22 +452,22 @@ bool AnalysisPredictor::ZeroCopyRun() {
 bool AnalysisPredictor::LoadProgramDesc() {
  // Initialize the inference program
  std::string filename;
-  if (!config_.model_dir.empty()) {
+  if (!config_.model_dir().empty()) {
-    filename = config_.model_dir + "/__model__";
+    filename = config_.model_dir() + "/__model__";
-  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
+  } else if (!config_.prog_file().empty() && !config_.params_file().empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
-    filename = config_.prog_file;
+    filename = config_.prog_file();
  } else {
-    if (config_.model_dir.empty() && config_.prog_file.empty()) {
+    if (config_.model_dir().empty() && config_.prog_file().empty()) {
      LOG(ERROR)
          << "Either model_dir or (prog_file, param_file) should be set.";
      return false;
    }
    LOG(ERROR) << string::Sprintf(
-        "not valid model path '%s' or program path '%s'.", config_.model_dir,
+        "not valid model path '%s' or program path '%s'.", config_.model_dir(),
-        config_.param_file);
+        config_.params_file());
    return false;
  }
@@ -478,7 +487,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
    proto.ParseFromString(pb_content);
  } else {
-    proto.ParseFromString(config_.prog_file);
+    proto.ParseFromString(config_.prog_file());
  }
  inference_program_.reset(new framework::ProgramDesc(proto));
  return true;
@@ -508,27 +517,27 @@ bool AnalysisPredictor::LoadParameters() {
      new_var->SetLoDLevel(var->GetLoDLevel());
      new_var->SetPersistable(true);
-      if (!config_.param_file.empty()) {
+      if (!config_.params_file().empty()) {
        params.push_back(new_var->Name());
      } else {
        // append_op
        framework::OpDesc *op = load_block->AppendOp();
        op->SetType("load");
        op->SetOutput("Out", {new_var->Name()});
-        op->SetAttr("file_path", {config_.model_dir + "/" + new_var->Name()});
+        op->SetAttr("file_path", {config_.model_dir() + "/" + new_var->Name()});
        op->CheckAttrs();
      }
    }
  }
-  if (!config_.param_file.empty()) {
+  if (!config_.params_file().empty()) {
    // sort paramlist to have consistent ordering
    std::sort(params.begin(), params.end());
    // append just the load_combine op
    framework::OpDesc *op = load_block->AppendOp();
    op->SetType("load_combine");
    op->SetOutput("Out", params);
-    op->SetAttr("file_path", {config_.param_file});
+    op->SetAttr("file_path", {config_.params_file()});
    op->CheckAttrs();
  }

--- a/paddle/fluid/inference/api/analysis_predictor_tester.cc
+++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc
@@ -25,9 +25,9 @@ namespace paddle {
 using contrib::AnalysisConfig;
 TEST(AnalysisPredictor, analysis_off) {
-  AnalysisConfig config(false);
+  AnalysisConfig config;
-  config.model_dir = FLAGS_dirname;
+  config.SetModel(FLAGS_dirname);
-  config.enable_ir_optim = false;
+  config.SwitchIrOptim(false);
  auto _predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  auto* predictor = static_cast<AnalysisPredictor*>(_predictor.get());
@@ -55,14 +55,14 @@ TEST(AnalysisPredictor, analysis_off) {
 }
 TEST(AnalysisPredictor, analysis_on) {
+  AnalysisConfig config;
+  config.SetModel(FLAGS_dirname);
+  config.SwitchIrOptim(true);
 #ifdef PADDLE_WITH_CUDA
-  AnalysisConfig config(true);
+  config.EnableUseGpu(100, 0);
-  config.fraction_of_gpu_memory = 0.15;
 #else
-  AnalysisConfig config;
+  config.DisableGpu();
 #endif
-  config.model_dir = FLAGS_dirname;
-  config.enable_ir_optim = true;
  auto _predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  auto* predictor = static_cast<AnalysisPredictor*>(_predictor.get());
@@ -89,7 +89,8 @@ TEST(AnalysisPredictor, analysis_on) {
  }
  // compare with NativePredictor
-  auto naive_predictor = CreatePaddlePredictor<NativeConfig>(config);
+  auto naive_predictor =
+      CreatePaddlePredictor<NativeConfig>(config.ToNativeConfig());
  std::vector<PaddleTensor> naive_outputs;
  ASSERT_TRUE(naive_predictor->Run(inputs, &naive_outputs));
  ASSERT_EQ(naive_outputs.size(), 1UL);
@@ -98,9 +99,8 @@ TEST(AnalysisPredictor, analysis_on) {
 TEST(AnalysisPredictor, ZeroCopy) {
  AnalysisConfig config;
-  config.model_dir = FLAGS_dirname;
+  config.SetModel(FLAGS_dirname);
-  config.use_feed_fetch_ops = false;
+  config.SwitchUseFeedFetchOps(false);
  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  auto w0 = predictor->GetInputTensor("firstw");
@@ -137,9 +137,9 @@ TEST(AnalysisPredictor, ZeroCopy) {
 TEST(AnalysisPredictor, Clone) {
  AnalysisConfig config;
-  config.model_dir = FLAGS_dirname;
+  config.SetModel(FLAGS_dirname);
-  config.use_feed_fetch_ops = true;
+  config.SwitchUseFeedFetchOps(true);
-  config.enable_ir_optim = true;
+  config.SwitchIrOptim(true);
  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
  predictors.emplace_back(CreatePaddlePredictor(config));

--- a/paddle/fluid/inference/api/api_anakin_engine.h
+++ b/paddle/fluid/inference/api/api_anakin_engine.h
@@ -19,8 +19,6 @@ limitations under the License. */
 #pragma once
-#define WITH_ANAKIN
 #include <vector>
 #include "framework/core/net/net.h"

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -288,7 +288,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
  VLOG(3) << "create NativePaddlePredictor";
  if (config.use_gpu) {
    // 1. GPU memeroy
-    PADDLE_ENFORCE_GT(
+    PADDLE_ENFORCE_GE(
        config.fraction_of_gpu_memory, 0.f,
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);

--- a/paddle/fluid/inference/api/api_impl_tester.cc
+++ b/paddle/fluid/inference/api/api_impl_tester.cc
@@ -295,7 +295,8 @@ TEST(inference_api_native, image_classification_gpu) {
 #endif
 TEST(PassBuilder, Delete) {
-  contrib::AnalysisConfig config(false);
+  contrib::AnalysisConfig config;
+  config.DisableGpu();
  config.pass_builder()->DeletePass("attention_lstm_fuse_pass");
  const auto& passes = config.pass_builder()->AllPasses();
  auto it = std::find(passes.begin(), passes.end(), "attention_lstm_fuse_pass");

--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -36,12 +36,11 @@ namespace demo {
 */
 void Main() {
  std::unique_ptr<PaddlePredictor> predictor;
-  paddle::contrib::AnalysisConfig config(true);
+  paddle::contrib::AnalysisConfig config;
-  config.param_file = FLAGS_modeldir + "/__params__";
+  config.EnableUseGpu(100, 0);
-  config.prog_file = FLAGS_modeldir + "/__model__";
+  config.SetModel(FLAGS_modeldir + "/__params__",
-  config.device = 0;
+                  FLAGS_modeldir + "/__model__");
  config.EnableTensorRtEngine();
-  config.fraction_of_gpu_memory = 0.1;  // set by yourself
  predictor = CreatePaddlePredictor(config);
  VLOG(3) << "begin to process data";

--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -40,15 +40,14 @@ using contrib::AnalysisConfig;
 */
 void Main(bool use_gpu) {
  std::unique_ptr<PaddlePredictor> predictor, analysis_predictor;
-  AnalysisConfig config(use_gpu);
+  AnalysisConfig config;
-  config.param_file = FLAGS_modeldir + "/__params__";
+  if (use_gpu) {
-  config.prog_file = FLAGS_modeldir + "/__model__";
+    config.EnableUseGpu(100, 0);
-  config.device = 0;
-  if (FLAGS_use_gpu) {
-    config.fraction_of_gpu_memory = 0.1;  // set by yourself
  }
+  config.SetModel(FLAGS_modeldir + "/__model__",
+                  FLAGS_modeldir + "/__params__");
-  predictor = CreatePaddlePredictor<NativeConfig>(config);
+  predictor = CreatePaddlePredictor<NativeConfig>(config.ToNativeConfig());
  analysis_predictor = CreatePaddlePredictor(config);
  // Just a single batch of data.

--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -34,26 +34,67 @@ class AnalysisPredictor;
 namespace contrib {
 // NOTE WIP, not stable yet.
-struct AnalysisConfig : public NativeConfig {
+struct AnalysisConfig {
-  explicit AnalysisConfig(bool use_gpu = false);
+  AnalysisConfig() = default;
  explicit AnalysisConfig(const AnalysisConfig& other);
-  explicit AnalysisConfig(AnalysisConfig&& other);
+  explicit AnalysisConfig(const std::string& model_dir);
+  explicit AnalysisConfig(const std::string& prog_file,
+                          const std::string& params_file);
+  // Model path related.
+  void SetModel(const std::string& model_dir) { model_dir_ = model_dir; }
+  void SetModel(const std::string& prog_file_path,
+                const std::string& params_file_path);
+  void SetProgFile(const std::string& x) { prog_file_ = x; }
+  void SetParamsFile(const std::string& x) { params_file_ = x; }
+  const std::string& model_dir() const { return model_dir_; }
+  const std::string& prog_file() const { return prog_file_; }
+  const std::string& params_file() const { return params_file_; }
+  // GPU related.
+  void EnableUseGpu(uint64_t memory_pool_init_size_mb, int device_id = 0);
+  void DisableGpu();
+  bool use_gpu() const { return use_gpu_; }
+  int gpu_device_id() const { return device_id_; }
+  int memory_pool_init_size_mb() const { return memory_pool_init_size_mb_; }
+  float fraction_of_gpu_memory_for_pool() const;
  // Determine whether to perform graph optimization.
-  bool enable_ir_optim = true;
+  void SwitchIrOptim(int x = true) { enable_ir_optim_ = x; }
+  bool ir_optim() const { return enable_ir_optim_; }
-  // Get a pass builder for customize the passes in IR analysis phase.
+  void SwitchUseFeedFetchOps(int x = true) { use_feed_fetch_ops_ = x; }
-  PassStrategy* pass_builder() const;
+  bool use_feed_fetch_ops_enabled() const { return use_feed_fetch_ops_; }
-  // NOT stable yet.
+  void SwitchSpecifyInputNames(bool x = true) { specify_input_name_ = x; }
-  bool use_feed_fetch_ops{true};
+  bool specify_input_name() const { return specify_input_name_; }
  void EnableTensorRtEngine(int workspace_size = 1 << 20,
                            int max_batch_size = 1, int min_subgraph_size = 3);
-  bool use_tensorrt() const { return use_tensorrt_; }
+  bool tensorrt_engine_enabled() const { return use_tensorrt_; }
+  void SwitchIrDebug(int x = true) { ir_debug_ = x; }
  void EnableMKLDNN();
-  bool use_mkldnn() const { return use_mkldnn_; }
+  bool mkldnn_enabled() const { return use_mkldnn_; }
+  // Set and get the number of cpu math library threads.
+  void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads);
+  int cpu_math_library_num_threads() const {
+    return cpu_math_library_num_threads_;
+  }
+  NativeConfig ToNativeConfig() const {
+    NativeConfig config;
+    config.model_dir = model_dir_;
+    config.prog_file = prog_file_;
+    config.param_file = params_file_;
+    config.use_gpu = use_gpu_;
+    config.device = device_id_;
+    config.fraction_of_gpu_memory = fraction_of_gpu_memory_for_pool();
+    config.specify_input_name = specify_input_name_;
+    return config;
+  }
  void SetMKLDNNOp(std::unordered_set<std::string> op_list) {
    mkldnn_enabled_op_types_ = op_list;
  }
@@ -65,10 +106,29 @@ struct AnalysisConfig : public NativeConfig {
  friend class ::paddle::AnalysisPredictor;
+  // NOTE just for developer, not an official API, easily to be broken.
+  // Get a pass builder for customize the passes in IR analysis phase.
+  PassStrategy* pass_builder() const;
+ protected:
+  // Update the config.
+  void Update();
+  std::string SerializeInfoCache();
 protected:
+  // Model pathes.
+  std::string model_dir_;
+  std::string prog_file_;
+  std::string params_file_;
+  // GPU releated.
+  bool use_gpu_{false};
+  int device_id_{0};
+  uint64_t memory_pool_init_size_mb_{100};  // initial size is 100MB.
+  // TensorRT releated.
  bool use_tensorrt_{false};
-  bool use_mkldnn_{false};
-  std::unordered_set<std::string> mkldnn_enabled_op_types_;
  // For workspace_size, refer it from here:
  // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
  int tensorrt_workspace_size_;
@@ -82,17 +142,24 @@ struct AnalysisConfig : public NativeConfig {
  //  We set this variable to control the minimum number of nodes in the
  //  subgraph, 3 as default value.
  int tensorrt_min_subgraph_size_{3};
-  std::unique_ptr<PassStrategy> pass_builder_;
+  bool use_mkldnn_{false};
+  std::unordered_set<std::string> mkldnn_enabled_op_types_;
  bool model_from_memory_{false};
-};
-// Configurations for Anakin engine.
+  bool enable_ir_optim_{true};
-struct AnakinConfig : public PaddlePredictor::Config {
+  bool use_feed_fetch_ops_{true};
-  enum TargetType { NVGPU = 0, X86 };
+  bool ir_debug_{false};
-  int device;
-  std::string model_file;
+  bool specify_input_name_{false};
-  int max_batch_size{-1};
-  TargetType target_type;
+  int cpu_math_library_num_threads_{1};
+  // A runtime cache, shouldn't be transferred to others.
+  std::string serialized_info_cache_;
+  mutable std::unique_ptr<PassStrategy> pass_builder_;
 };
 }  // namespace contrib

--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -26,9 +26,8 @@ limitations under the License. */
 #include <string>
 #include <vector>
-#include "paddle_api.h"  // NOLINT
-#ifndef WITH_ANAKIN
 #include "paddle_analysis_config.h"  // NOLINT
-#else
+#include "paddle_api.h"              // NOLINT
+#ifdef WITH_ANAKIN
 #include "paddle_anakin_config.h"  // NOLINT
 #endif
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -62,7 +62,12 @@ class PassStrategy : public PaddlePassBuilder {
  // still some CPU kernels running in CPU mode.
  virtual void EnableMKLDNN() = 0;
+  bool use_gpu() const { return use_gpu_; }
  virtual ~PassStrategy() = default;
+ protected:
+  bool use_gpu_{false};
 };
 /*
@@ -88,6 +93,7 @@ class CpuPassStrategy : public PassStrategy {
        "conv_eltwiseadd_bn_fuse_pass",  //
        "is_test_pass",                  //
    });
+    use_gpu_ = false;
  }
  virtual ~CpuPassStrategy() = default;
@@ -126,10 +132,14 @@ class GpuPassStrategy : public PassStrategy {
        "conv_elementwise_add2_act_fuse_pass",       //
        "conv_elementwise_add_fuse_pass",            //
    });
+    use_gpu_ = true;
  }
  GpuPassStrategy(const GpuPassStrategy &other)
-      : PassStrategy(other.AllPasses()) {}
+      : PassStrategy(other.AllPasses()) {
+    use_gpu_ = true;
+  }
  void EnableMKLDNN() override;

--- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
@@ -165,12 +165,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }
 void SetConfig(contrib::AnalysisConfig *cfg) {
-  cfg->prog_file = FLAGS_infer_model + "/__model__";
+  cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
-  cfg->param_file = FLAGS_infer_model + "/param";
+  cfg->SwitchSpecifyInputNames();
-  cfg->use_gpu = false;
+  cfg->SwitchIrOptim(true);
-  cfg->device = 0;
-  cfg->specify_input_name = true;
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
@@ -105,11 +105,10 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->model_dir = FLAGS_infer_model;
+  cfg->SetModel(FLAGS_infer_model);
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
+  cfg->SwitchIrOptim();
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
@@ -76,11 +76,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }
 void SetConfig(contrib::AnalysisConfig *cfg) {
-  cfg->model_dir = FLAGS_infer_model;
+  cfg->SetModel(FLAGS_infer_model);
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
+  cfg->SwitchIrOptim();
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
@@ -84,13 +84,12 @@ void SetConfig(contrib::AnalysisConfig *cfg, bool memory_load = false) {
    cfg->SetModelBuffer(&buffer_prog[0], buffer_prog.size(), &buffer_param[0],
                        buffer_param.size());
  } else {
-    cfg->prog_file = FLAGS_infer_model + "/__model__";
+    cfg->SetModel(FLAGS_infer_model + "/__model__",
-    cfg->param_file = FLAGS_infer_model + "/param";
+                  FLAGS_infer_model + "/param");
  }
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
+  cfg->SwitchIrOptim();
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
@@ -21,12 +21,10 @@ namespace inference {
 namespace analysis {
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->param_file = FLAGS_infer_model + "/params";
+  cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");
-  cfg->prog_file = FLAGS_infer_model + "/model";
+  cfg->DisableGpu();
-  cfg->use_gpu = false;
+  cfg->SwitchIrOptim();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->enable_ir_optim = true;
-  cfg->specify_input_name = true;
  cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
 }

--- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
@@ -204,12 +204,10 @@ void PrepareZeroCopyInputs(ZeroCopyTensor *lod_attention_tensor,
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->prog_file = FLAGS_infer_model + "/__model__";
+  cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
-  cfg->param_file = FLAGS_infer_model + "/param";
+  cfg->DisableGpu();
-  cfg->use_gpu = false;
+  cfg->SwitchSpecifyInputNames();
-  cfg->device = 0;
+  cfg->SwitchIrOptim();
-  cfg->specify_input_name = true;
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
@@ -225,10 +223,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
 // Easy for profiling independently.
 TEST(Analyzer_rnn1, profile) {
-  contrib::AnalysisConfig cfg(false);
+  contrib::AnalysisConfig cfg;
  SetConfig(&cfg);
-  cfg.fraction_of_gpu_memory = 0.1;
+  cfg.DisableGpu();
-  cfg.pass_builder()->TurnOnDebug();
+  cfg.SwitchIrDebug();
  std::vector<PaddleTensor> outputs;
  std::vector<std::vector<PaddleTensor>> input_slots_all;
@@ -293,16 +291,18 @@ TEST(Analyzer_rnn1, multi_thread) {
 TEST(Analyzer_rnn1, ZeroCopy) {
  AnalysisConfig config;
  SetConfig(&config);
-  config.use_feed_fetch_ops = false;
+  config.SwitchUseFeedFetchOps(false);
  PaddlePlace place;
  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
-  config.use_feed_fetch_ops = true;
+  config.SwitchUseFeedFetchOps(true);
-  auto native_predictor = CreatePaddlePredictor<NativeConfig>(config);
+  auto native_predictor =
+      CreatePaddlePredictor<NativeConfig>(config.ToNativeConfig());
-  config.use_feed_fetch_ops = true;  // the analysis predictor needs feed/fetch.
+  config.SwitchUseFeedFetchOps(
+      true);  // the analysis predictor needs feed/fetch.
  auto analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
 #define NEW_TENSOR(name__) \
@@ -362,7 +362,7 @@ TEST(Analyzer_rnn1, ZeroCopy) {
 TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
  AnalysisConfig config;
  SetConfig(&config);
-  config.use_feed_fetch_ops = false;
+  config.SwitchUseFeedFetchOps(false);
 #define NEW_TENSOR(name__) \
  auto name__##_tensor = predictor->GetInputTensor(#name__);

--- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
@@ -105,12 +105,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->prog_file = FLAGS_infer_model + "/__model__";
+  cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
-  cfg->param_file = FLAGS_infer_model + "/param";
+  cfg->DisableGpu();
-  cfg->use_gpu = false;
+  cfg->SwitchSpecifyInputNames();
-  cfg->device = 0;
+  cfg->SwitchIrOptim();
-  cfg->specify_input_name = true;
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
@@ -89,11 +89,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->model_dir = FLAGS_infer_model;
+  cfg->SetModel(FLAGS_infer_model);
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
+  cfg->SwitchIrOptim();
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
@@ -122,12 +122,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->param_file = FLAGS_infer_model + "/params";
+  cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");
-  cfg->prog_file = FLAGS_infer_model + "/model";
+  cfg->DisableGpu();
-  cfg->use_gpu = false;
+  cfg->SwitchSpecifyInputNames();
-  cfg->device = 0;
-  cfg->enable_ir_optim = true;
-  cfg->specify_input_name = true;
  cfg->pass_builder()->TurnOnDebug();
  cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
 }

--- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
@@ -47,11 +47,10 @@ struct DataReader {
 };
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->model_dir = FLAGS_infer_model;
+  cfg->SetModel(FLAGS_infer_model);
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
+  cfg->SwitchIrOptim();
-  cfg->enable_ir_optim = true;
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
@@ -51,12 +51,11 @@ Record ProcessALine(const std::string &line) {
 }
 void SetConfig(AnalysisConfig *cfg) {
-  cfg->param_file = FLAGS_infer_model + "/__params__";
+  cfg->SetModel(FLAGS_infer_model + "/__model__",
-  cfg->prog_file = FLAGS_infer_model + "/__model__";
+                FLAGS_infer_model + "/__params__");
-  cfg->use_gpu = false;
+  cfg->DisableGpu();
-  cfg->device = 0;
+  cfg->SwitchIrDebug();
-  cfg->enable_ir_optim = true;
+  cfg->SwitchSpecifyInputNames();
-  cfg->specify_input_name = true;
  // TODO(TJ): fix fusion gru
  cfg->pass_builder()->DeletePass("fc_gru_fuse_pass");
 }

--- a/paddle/fluid/inference/tests/api/config_printer.h
+++ b/paddle/fluid/inference/tests/api/config_printer.h
@@ -64,19 +64,23 @@ std::ostream &operator<<(std::ostream &os,
  num_spaces++;
  os << *reinterpret_cast<const NativeConfig *>(&config);
  if (!config.model_from_memory()) {
-    os << GenSpaces(num_spaces) << "prog_file: " << config.prog_file << "\n";
+    os << GenSpaces(num_spaces) << "prog_file: " << config.prog_file() << "\n";
-    os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
+    os << GenSpaces(num_spaces) << "param_file: " << config.params_file()
+       << "\n";
  } else {
    os << GenSpaces(num_spaces)
       << "prog_file and param_file: load from memory \n";
  }
-  os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.enable_ir_optim
+  os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.ir_optim()
     << "\n";
+  os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.ir_optim()
+     << "\n";
+  os << GenSpaces(num_spaces)
+     << "use_feed_fetch_ops: " << config.use_feed_fetch_ops_enabled() << "\n";
  os << GenSpaces(num_spaces)
-     << "use_feed_fetch_ops: " << config.use_feed_fetch_ops << "\n";
+     << "use_tensorrt: " << config.tensorrt_engine_enabled() << "\n";
-  os << GenSpaces(num_spaces) << "use_tensorrt: " << config.use_tensorrt()
+  os << GenSpaces(num_spaces) << "use_mkldnn: " << config.mkldnn_enabled()
     << "\n";
-  os << GenSpaces(num_spaces) << "use_mkldnn: " << config.use_mkldnn() << "\n";
  num_spaces--;
  os << GenSpaces(num_spaces) << "}\n";
  return os;

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -328,7 +328,10 @@ void CompareNativeAndAnalysis(
    const std::vector<std::vector<PaddleTensor>> &inputs) {
  PrintConfig(config, true);
  std::vector<PaddleTensor> native_outputs, analysis_outputs;
-  TestOneThreadPrediction(config, inputs, &native_outputs, false);
+  const auto *analysis_config =
+      reinterpret_cast<const contrib::AnalysisConfig *>(config);
+  auto native_config = analysis_config->ToNativeConfig();
+  TestOneThreadPrediction(&native_config, inputs, &native_outputs, false);
  TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
  CompareResult(analysis_outputs, native_outputs);
 }

--- a/paddle/fluid/inference/tests/api/trt_models_tester.cc
+++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc
@@ -46,22 +46,20 @@ void SetConfig<contrib::AnalysisConfig>(contrib::AnalysisConfig* config,
                                        std::string model_dir, bool use_gpu,
                                        bool use_tensorrt, int batch_size) {
  if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
-    config->prog_file = model_dir + "/" + FLAGS_prog_filename;
+    config->SetModel(model_dir + "/" + FLAGS_prog_filename,
-    config->param_file = model_dir + "/" + FLAGS_param_filename;
+                     model_dir + "/" + FLAGS_param_filename);
  } else {
-    config->model_dir = model_dir;
+    config->SetModel(model_dir);
  }
  if (use_gpu) {
-    config->use_gpu = true;
+    config->EnableUseGpu(100, 0);
-    config->device = 0;
-    config->fraction_of_gpu_memory = 0.15;
    if (use_tensorrt) {
      config->EnableTensorRtEngine(1 << 10, batch_size);
      config->pass_builder()->DeletePass("conv_bn_fuse_pass");
      config->pass_builder()->DeletePass("fc_fuse_pass");
      config->pass_builder()->TurnOnDebug();
    } else {
-      config->enable_ir_optim = true;
+      config->SwitchIrOptim();
    }
  }
 }
@@ -77,7 +75,8 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
  std::vector<PaddleTensor> outputs;
  if (use_analysis || use_tensorrt) {
-    contrib::AnalysisConfig config(true);
+    contrib::AnalysisConfig config;
+    config.EnableUseGpu(100, 0);
    config.pass_builder()->TurnOnDebug();
    SetConfig<contrib::AnalysisConfig>(&config, model_dir, true, use_tensorrt,
                                       FLAGS_batch_size);
@@ -109,7 +108,8 @@ void compare(std::string model_dir, bool use_tensorrt) {
      &native_outputs, false);
  std::vector<PaddleTensor> analysis_outputs;
-  contrib::AnalysisConfig analysis_config(true);
+  contrib::AnalysisConfig analysis_config;
+  analysis_config.EnableUseGpu(50, 0);
  SetConfig<contrib::AnalysisConfig>(&analysis_config, model_dir, true,
                                     use_tensorrt, FLAGS_batch_size);
  TestOneThreadPrediction(
@@ -154,9 +154,9 @@ TEST(TensorRT_mobilenet, analysis) {
 TEST(AnalysisPredictor, use_gpu) {
  std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
-  AnalysisConfig config(true);
+  AnalysisConfig config;
-  config.model_dir = model_dir;
+  config.EnableUseGpu(100, 0);
-  config.fraction_of_gpu_memory = 0.15;
+  config.SetModel(model_dir);
  config.pass_builder()->TurnOnDebug();
  std::vector<std::vector<PaddleTensor>> inputs_all;