From 5fea8e10208f3997b1ba4d5927b36dc1f8ecfb2e Mon Sep 17 00:00:00 2001
From: huzhiqiang <912790387@qq.com>
Date: Wed, 25 Mar 2020 13:07:43 +0800
Subject: [PATCH] [Python lib] Add opt lib into python lib (#3209)

---
 cmake/lite.cmake                              |  48 ++-
 lite/api/CMakeLists.txt                       |   5 +
 lite/api/light_api_impl.cc                    |   1 +
 lite/api/opt_base.cc                          | 364 ++++++++++++++++++
 lite/api/opt_base.h                           |  86 +++++
 lite/api/python/pybind/CMakeLists.txt         |   2 +-
 lite/api/python/pybind/pybind.cc              |  18 +
 lite/api/python/pybind/pybind.h               |   4 +
 lite/core/CMakeLists.txt                      |   6 +-
 lite/core/context.h                           |   2 +-
 lite/kernels/arm/CMakeLists.txt               |   2 +-
 lite/kernels/cuda/CMakeLists.txt              |   2 +-
 lite/kernels/fpga/CMakeLists.txt              |   2 +-
 lite/kernels/opencl/CMakeLists.txt            |   2 +-
 lite/tools/build.sh                           |   1 +
 .../create_fake_kernel_registry.py            | 151 ++++----
 .../cmake_tools/record_supported_kernel_op.py |  35 +-
 17 files changed, 634 insertions(+), 97 deletions(-)
 create mode 100644 lite/api/opt_base.cc
 create mode 100644 lite/api/opt_base.h

diff --git a/cmake/lite.cmake b/cmake/lite.cmake
index fd40fa437b..265de3fbf6 100644
--- a/cmake/lite.cmake
+++ b/cmake/lite.cmake
@@ -275,6 +275,11 @@ set(host_kernels CACHE INTERNAL "host kernels")
 
 set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
 file(WRITE ${kernels_src_list} "") # clean
+
+# file to record faked kernels for opt python lib
+set(fake_kernels_src_list "${CMAKE_BINARY_DIR}/fake_kernels_src_list.txt")
+file(WRITE ${fake_kernels_src_list} "") # clean
+
 if(LITE_BUILD_TAILOR)
   set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
   file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
@@ -303,62 +308,74 @@ function(add_kernel TARGET device level)
         return()
     endif()
 
-    if (LITE_ON_MODEL_OPTIMIZE_TOOL)
-      # the source list will collect for model_optimize_tool to fake kernel generation.
-      foreach(src ${args_SRCS})
-          file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
-      endforeach()
-      return()
-    endif()
-
-    # when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated,
-    # no need to continue the compilation of the true kernel source.
-    if (LITE_ON_MODEL_OPTIMIZE_TOOL)
-      return()
-    endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
-
 
     if ("${device}" STREQUAL "Host")
         set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "ARM")
         if (NOT LITE_WITH_ARM)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "X86")
         if (NOT LITE_WITH_X86)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
+            return()
+        elseif (LITE_ON_MODEL_OPTIMIZE_TOOL)
+            foreach(src ${args_SRCS})
+                file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "NPU")
         if (NOT LITE_WITH_NPU)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "XPU")
         if (NOT LITE_WITH_XPU)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "FPGA")
         if (NOT LITE_WITH_FPGA)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "BM")
         if (NOT LITE_WITH_BM)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "")
     endif()
     if ("${device}" STREQUAL "OPENCL")
         if (NOT LITE_WITH_OPENCL)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
@@ -366,6 +383,9 @@ function(add_kernel TARGET device level)
 
     if ("${device}" STREQUAL "CUDA")
         if (NOT LITE_WITH_CUDA)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
             return()
         endif()
         set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "")
diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt
index e786f346cc..b360b476e0 100644
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -303,6 +303,11 @@ if (LITE_ON_TINY_PUBLISH)
     return()
 endif()
 
+
+# add library for opt_base
+lite_cc_library(opt_base SRCS opt_base.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc DEPS kernel op optimizer mir_passes utils)
+add_dependencies(opt_base supported_kernel_op_info_h framework_proto all_kernel_faked_cc kernel_list_h)
+
 if (LITE_ON_MODEL_OPTIMIZE_TOOL)
     message(STATUS "Compiling opt")
     lite_cc_binary(opt SRCS opt.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc
diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc
index 3965843250..cdf5b7fb06 100644
--- a/lite/api/light_api_impl.cc
+++ b/lite/api/light_api_impl.cc
@@ -58,6 +58,7 @@ void LightPredictorImpl::Run() {
 
 std::shared_ptr<lite_api::PaddlePredictor> LightPredictorImpl::Clone() {
   LOG(FATAL) << "The Clone API is not supported in LigthPredictor";
+  return nullptr;
 }
 
 std::string LightPredictorImpl::GetVersion() const { return lite::version(); }
diff --git a/lite/api/opt_base.cc b/lite/api/opt_base.cc
new file mode 100644
index 0000000000..bd86f48624
--- /dev/null
+++ b/lite/api/opt_base.cc
@@ -0,0 +1,364 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/api/opt_base.h"
+#include "all_kernel_faked.cc"  // NOLINT
+
+namespace paddle {
+namespace lite_api {
+
+void OptBase::SetModelDir(const std::string& model_path) {
+  opt_config_.set_model_dir(model_path);
+}
+
+void OptBase::SetModelFile(const std::string& model_path) {
+  opt_config_.set_model_file(model_path);
+}
+
+void OptBase::SetParamFile(const std::string& param_path) {
+  opt_config_.set_param_file(param_path);
+}
+
+void OptBase::SetModelType(std::string optimize_out_type) {
+  if (optimize_out_type == "protobuf") {
+    model_type_ = LiteModelType::kProtobuf;
+  } else if (optimize_out_type == "naive_buffer") {
+    model_type_ = LiteModelType::kNaiveBuffer;
+  } else {
+    LOG(FATAL) << "Unsupported Model type :" << optimize_out_type;
+  }
+}
+
+void OptBase::SetValidPlaces(const std::string& valid_places) {
+  valid_places_.clear();
+  auto target_reprs = lite::Split(valid_places, ",");
+  for (auto& target_repr : target_reprs) {
+    if (target_repr == "arm") {
+      valid_places_.emplace_back(TARGET(kARM));
+    } else if (target_repr == "opencl") {
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)});
+      valid_places_.emplace_back(
+          TARGET(kARM));  // enable kARM CPU kernel when no opencl kernel
+    } else if (target_repr == "x86") {
+      valid_places_.emplace_back(TARGET(kX86));
+    } else if (target_repr == "npu") {
+      valid_places_.emplace_back(TARGET(kNPU));
+    } else if (target_repr == "xpu") {
+      valid_places_.emplace_back(TARGET(kXPU));
+    } else {
+      LOG(FATAL) << lite::string_format(
+          "Wrong target '%s' found, please check the command flag "
+          "'valid_targets'",
+          target_repr.c_str());
+    }
+  }
+  CHECK(!valid_places_.empty())
+      << "At least one target should be set, should set the "
+         "command argument 'valid_targets'";
+}
+
+void OptBase::SetOptimizeOut(const std::string& optimized_out_path) {
+  optimize_out_path_ = optimized_out_path;
+}
+
+void OptBase::RunOptimize(bool record_strip_info) {
+  CheckIfModelSupported(false);
+  OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
+  opt_config_.set_valid_places(valid_places_);
+  if (model_set_dir_ != "") {
+    RunOptimizeFromModelSet(record_strip_info);
+  } else {
+    auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
+    opt_predictor->SaveOptimizedModel(
+        optimize_out_path_, model_type_, record_strip_info);
+    auto resulted_model_name =
+        record_strip_info ? "information of striped model" : "optimized model";
+    std::cout << "Save the " << resulted_model_name
+              << " into :" << optimize_out_path_ << "successfully";
+  }
+}
+
+// collect ops info of modelset
+void CollectModelMetaInfo(const std::string& output_dir,
+                          const std::vector<std::string>& models,
+                          const std::string& filename) {
+  std::set<std::string> total;
+  for (const auto& name : models) {
+    std::string model_path =
+        lite::Join<std::string>({output_dir, name, filename}, "/");
+    auto lines = lite::ReadLines(model_path);
+    total.insert(lines.begin(), lines.end());
+  }
+  std::string output_path =
+      lite::Join<std::string>({output_dir, filename}, "/");
+  lite::WriteLines(std::vector<std::string>(total.begin(), total.end()),
+                   output_path);
+}
+
+void OptBase::SetModelSetDir(const std::string& model_set_path) {
+  model_set_dir_ = model_set_path;
+}
+void OptBase::RunOptimizeFromModelSet(bool record_strip_info) {
+  // 1. mkdir of outputed optimized model set.
+  lite::MkDirRecur(optimize_out_path_);
+  auto model_dirs = lite::ListDir(model_set_dir_, true);
+  if (model_dirs.size() == 0) {
+    LOG(FATAL) << "[" << model_set_dir_ << "] does not contain any model";
+  }
+
+  // 2. optimize each model in inputed model set dir.
+  std::string model_file = opt_config_.model_file();
+  std::string param_file = opt_config_.param_file();
+  for (const auto& name : model_dirs) {
+    std::string input_model_dir =
+        lite::Join<std::string>({model_set_dir_, name}, "/");
+    std::string output_model_dir =
+        lite::Join<std::string>({optimize_out_path_, name}, "/");
+
+    if (opt_config_.model_file() != "" && opt_config_.param_file() != "") {
+      auto model_file_path =
+          lite::Join<std::string>({input_model_dir, model_file}, "/");
+      auto param_file_path =
+          lite::Join<std::string>({input_model_dir, param_file}, "/");
+    }
+
+    std::cout << "Start optimize model: " << input_model_dir;
+
+    opt_config_.set_model_dir(input_model_dir);
+    opt_config_.set_model_file(model_file);
+    opt_config_.set_param_file(param_file);
+
+    auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
+    opt_predictor->SaveOptimizedModel(
+        optimize_out_path_, model_type_, record_strip_info);
+
+    std::cout << "Optimize done. ";
+  }
+
+  // 3. if record_strip_info = true, we will record striping info
+  if (record_strip_info) {
+    // Collect all models information
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_OPS_SOURCE_LIST_FILENAME);
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_OPS_LIST_NAME);
+    CollectModelMetaInfo(optimize_out_path_,
+                         model_dirs,
+                         lite::TAILORD_KERNELS_SOURCE_LIST_FILENAME);
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_KERNELS_LIST_NAME);
+    std::cout << "Record the information of stripped models into :"
+              << optimize_out_path_ << "successfully";
+  }
+}
+
+void OptBase::PrintHelpInfo() {
+  const std::string opt_version = lite::version();
+  const char help_info[] =
+      "At least one argument should be inputed. Valid arguments are listed "
+      "below:\n"
+      "  Arguments of help information:\n"
+      "        `help()`   Print help infomation\n"
+      "  Arguments of model optimization:\n"
+      "        `set_model_dir(model_dir)`\n"
+      "        `set_model_file(model_file_path)`\n"
+      "        `set_param_file(param_file_path)`\n"
+      "        `set_model_type(protobuf|naive_buffer)`\n"
+      "        `set_optimize_out(output_optimize_model_dir)`\n"
+      "        `set_valid_places(arm|opencl|x86|npu|xpu)`\n"
+      "        `run_optimize(false|true)`\n"
+      "        `  ----fasle&true refer to whether to record ops info for "
+      "tailoring lib, false by default`\n"
+      "  Arguments of model checking and ops information:\n"
+      "        `print_all_ops()`   Display all the valid operators of "
+      "Paddle-Lite\n"
+      "        `print_supported_ops`   Display supported operators of valid "
+      "places\n"
+      "        `check_if_model_supported()`   Check if the input model is "
+      "supported\n";
+
+  std::cout << "opt version:" << opt_version << std::endl
+            << help_info << std::endl;
+}
+// 2. Print supported info of inputed ops
+void OptBase::PrintOpsInfo(const std::set<std::string>& valid_ops) {
+  std::vector<std::string> lite_supported_targets = {"kHost",
+                                                     "kX86",
+                                                     "kCUDA",
+                                                     "kARM",
+                                                     "kOpenCL",
+                                                     "kFPGA",
+                                                     "kNPU",
+                                                     "kXPU",
+                                                     "kAny",
+                                                     "kUnk"};
+  // Get the lengh of the first column: maximum length of the op_type
+  size_t maximum_optype_length = 0;
+  for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) {
+    maximum_optype_length = it->first.size() > maximum_optype_length
+                                ? it->first.size()
+                                : maximum_optype_length;
+  }
+  std::cout << std::setiosflags(std::ios::internal);
+  // Print the first row: OP_nam taget1 target2 ...
+  std::cout << std::setw(maximum_optype_length) << "OP_name";
+  for (size_t i = 0; i < lite_supported_targets.size(); i++) {
+    std::cout << std::setw(10) << lite_supported_targets[i].substr(1);
+  }
+  std::cout << std::endl;
+  // Print the name of supported ops and mark if it's supported by each target
+  // print the support info of inputed ops: valid_ops
+  for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) {
+    std::cout << std::setw(maximum_optype_length) << *op;
+    // Check: If this kernel doesn't match any operator, we will skip it.
+    if (supported_ops.find(*op) == supported_ops.end()) {
+      continue;
+    }
+    // Print OP info.
+    auto ops_valid_places = supported_ops.at(*op);
+    for (size_t i = 0; i < lite_supported_targets.size(); i++) {
+      if (std::find(ops_valid_places.begin(),
+                    ops_valid_places.end(),
+                    lite_supported_targets[i]) != ops_valid_places.end()) {
+        std::cout << std::setw(10) << "Y";
+      } else {
+        std::cout << std::setw(10) << " ";
+      }
+    }
+    std::cout << std::endl;
+  }
+}
+
+void OptBase::DisplayKernelsInfo() {  // Display kernel information
+  std::cout << ::paddle::lite::KernelRegistry::Global().DebugString();
+}
+void OptBase::PrintAllOps() {
+  // 1. Get supported ops on these targets
+  std::set<std::string> valid_ops;
+  for (size_t i = 0; i < supported_ops_target.size(); i++) {
+    auto ops = supported_ops_target[i];
+    valid_ops.insert(ops.begin(), ops.end());
+  }
+  // 2. Print support info of these ops
+  PrintOpsInfo(valid_ops);
+}
+
+void OptBase::PrintSupportedOps() {
+  // 1. Get the valid hardware targets
+  std::vector<TargetType> target_types = {};
+  for (size_t i = 0; i < valid_places_.size(); i++) {
+    target_types.push_back(valid_places_[i].target);
+  }
+  std::string targets_str = TargetToStr(target_types[0]);
+  for (size_t i = 1; i < target_types.size(); i++) {
+    targets_str = targets_str + TargetToStr(target_types[i]);
+  }
+  std::cout << "Supported OPs on '" << targets_str << "': " << std::endl;
+  target_types.push_back(TARGET(kHost));
+  target_types.push_back(TARGET(kUnk));
+
+  // 2. Get supported ops on these targets
+  std::set<std::string> valid_ops;
+  for (size_t i = 0; i < target_types.size(); i++) {
+    auto ops = supported_ops_target[static_cast<int>(target_types[i])];
+    valid_ops.insert(ops.begin(), ops.end());
+  }
+  // 3. Print support info of these ops
+  PrintOpsInfo(valid_ops);
+}
+
+// test whether this model is supported
+void OptBase::CheckIfModelSupported(bool print_ops_info) {
+  // 1. parse valid places and valid targets
+  auto valid_ops = supported_ops_target[static_cast<int>(TARGET(kHost))];
+  auto valid_unktype_ops = supported_ops_target[static_cast<int>(TARGET(kUnk))];
+  valid_ops.insert(
+      valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end());
+  for (size_t i = 0; i < valid_places_.size(); i++) {
+    auto target = valid_places_[i].target;
+    auto ops = supported_ops_target[static_cast<int>(target)];
+    valid_ops.insert(valid_ops.end(), ops.begin(), ops.end());
+  }
+  // get valid ops
+  std::set<std::string> valid_ops_set(valid_ops.begin(), valid_ops.end());
+
+  // 2.Load model into program to get ops in model
+  std::string prog_path = opt_config_.model_dir() + "/__model__";
+  if (!(opt_config_.model_file()).empty() &&
+      !(opt_config_.param_file()).empty()) {
+    prog_path = opt_config_.model_file();
+  }
+  lite::cpp::ProgramDesc cpp_prog;
+  framework::proto::ProgramDesc pb_proto_prog =
+      *lite::LoadProgram(prog_path, false);
+  lite::pb::ProgramDesc pb_prog(&pb_proto_prog);
+  // Transform to cpp::ProgramDesc
+  lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog);
+
+  std::set<std::string> unsupported_ops;
+  std::set<std::string> input_model_ops;
+  for (size_t index = 0; index < cpp_prog.BlocksSize(); index++) {
+    auto current_block = cpp_prog.GetBlock<lite::cpp::BlockDesc>(index);
+    for (size_t i = 0; i < current_block->OpsSize(); ++i) {
+      auto& op_desc = *current_block->GetOp<lite::cpp::OpDesc>(i);
+      auto op_type = op_desc.Type();
+      input_model_ops.insert(op_type);
+      if (valid_ops_set.count(op_type) == 0) {
+        unsupported_ops.insert(op_type);
+      }
+    }
+  }
+  // 3. Print ops_info of input model and check if this model is supported
+  if (print_ops_info) {
+    std::cout << "OPs in the input model include:\n";
+    PrintOpsInfo(input_model_ops);
+  }
+  if (!unsupported_ops.empty()) {
+    std::string unsupported_ops_str = *unsupported_ops.begin();
+    for (auto op_str = ++unsupported_ops.begin();
+         op_str != unsupported_ops.end();
+         op_str++) {
+      unsupported_ops_str = unsupported_ops_str + ", " + *op_str;
+    }
+    std::vector<TargetType> targets = {};
+    for (size_t i = 0; i < valid_places_.size(); i++) {
+      targets.push_back(valid_places_[i].target);
+    }
+    std::sort(targets.begin(), targets.end());
+    targets.erase(unique(targets.begin(), targets.end()), targets.end());
+    std::string targets_str = TargetToStr(targets[0]);
+    for (size_t i = 1; i < targets.size(); i++) {
+      targets_str = targets_str + "," + TargetToStr(targets[i]);
+    }
+
+    LOG(ERROR) << "Error: This model is not supported, because "
+               << unsupported_ops.size() << " ops are not supported on '"
+               << targets_str << "'. These unsupported ops are: '"
+               << unsupported_ops_str << "'.";
+    exit(1);
+  }
+  if (print_ops_info) {
+    std::cout << "Paddle-Lite supports this model!" << std::endl;
+    exit(1);
+  }
+}
+}  // namespace lite_api
+}  // namespace paddle
diff --git a/lite/api/opt_base.h b/lite/api/opt_base.h
new file mode 100644
index 0000000000..a8d6d0390c
--- /dev/null
+++ b/lite/api/opt_base.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * This file defines Opt and basic functions about model transformation.
+ */
+
+#ifndef PADDLE_LITE_OPT_H_  // NOLINT
+#define PADDLE_LITE_OPT_H_
+#include <algorithm>
+#include <iomanip>
+#include <set>
+#include <string>
+#include <vector>
+// stores the map that records the source_file path of each kernel.
+#include "kernel_src_map.h"  // NOLINT
+#include "lite/api/cxx_api.h"
+// version of Paddle-lite
+#include "lite/core/version.h"
+// model parser functions to pre-load model to verify if this model is supported
+#include "lite/model_parser/compatible_pb.h"
+#include "lite/model_parser/pb/program_desc.h"
+#include "lite/utils/string.h"
+// recorded all the ops supported by paddle-lite
+#include "supported_kernel_op_info.h"  // NOLINT
+
+namespace paddle {
+namespace lite_api {
+
+/// The PaddlePredictor defines the basic interfaces for different kinds of
+/// predictors.
+class LITE_API OptBase {
+ public:
+  OptBase() = default;
+  void SetModelSetDir(const std::string &model_set_path);
+  void SetModelDir(const std::string &model_path);
+  void SetModelFile(const std::string &model_path);
+  void SetParamFile(const std::string &param_path);
+  void SetValidPlaces(const std::string &valid_places);
+  void SetOptimizeOut(const std::string &optimized_out_path);
+  // set optimized_model type
+  void SetModelType(std::string model_type);
+  // transform and save the optimized model
+  void RunOptimize(bool record_strip_info = false);
+
+  // fuctions of printing info
+  // 1. help info
+  void PrintHelpInfo();
+  // 2. PrintOpsInfo
+  void PrintOpsInfo(const std::set<std::string> &valid_ops =
+                        {});  // print supported ops on target_types
+  void PrintAllOps();         // print all ops
+  void PrintSupportedOps();   // print ops supported on valid_places_
+  void DisplayKernelsInfo();  // Display kernel information
+  // 3. Check if this model is supported
+  void CheckIfModelSupported(bool print_ops_info = true);
+
+ private:
+  CxxConfig opt_config_;
+  // valid places for the optimized_model
+  std::vector<Place> valid_places_;
+  // filename of the optimized_model
+  std::string optimize_out_path_;
+  // type of the optimized_model, kNaiveBuffer default.
+  LiteModelType model_type_{LiteModelType::kNaiveBuffer};
+  // Dir path of a set of models, this should be combined with model
+  std::string model_set_dir_;
+
+  void RunOptimizeFromModelSet(bool record_strip_info = false);
+};
+
+}  // namespace lite_api
+}  // namespace paddle
+
+#endif  // NOLINT
diff --git a/lite/api/python/pybind/CMakeLists.txt b/lite/api/python/pybind/CMakeLists.txt
index eabb6b150b..b1de18d50c 100644
--- a/lite/api/python/pybind/CMakeLists.txt
+++ b/lite/api/python/pybind/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(PYBIND_DEPS pybind python paddle_api_light paddle_api)
 if (NOT LITE_ON_TINY_PUBLISH)
-   set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full)
+   set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full opt_base)
 endif()
 
 lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
diff --git a/lite/api/python/pybind/pybind.cc b/lite/api/python/pybind/pybind.cc
index 985246a31a..e86d570e18 100644
--- a/lite/api/python/pybind/pybind.cc
+++ b/lite/api/python/pybind/pybind.cc
@@ -26,6 +26,7 @@
 
 #ifndef LITE_ON_TINY_PUBLISH
 #include "lite/api/cxx_api.h"
+#include "lite/api/opt_base.h"
 #endif
 
 #include "lite/api/light_api.h"
@@ -47,10 +48,27 @@ using lite_api::PrecisionType;
 using lite_api::DataLayoutType;
 using lite_api::Place;
 using lite::LightPredictorImpl;
+using lite_api::OptBase;
 
 #ifndef LITE_ON_TINY_PUBLISH
 using lite::CxxPaddleApiImpl;
 static void BindLiteCxxPredictor(py::module *m);
+void BindLiteOpt(py::module *m) {
+  py::class_<OptBase> opt_base(*m, "Opt");
+  opt_base.def(py::init<>())
+      .def("set_model_dir", &OptBase::SetModelDir)
+      .def("set_modelset_dir", &OptBase::SetModelSetDir)
+      .def("set_model_file", &OptBase::SetModelFile)
+      .def("set_param_file", &OptBase::SetParamFile)
+      .def("set_valid_places", &OptBase::SetValidPlaces)
+      .def("set_optimize_out", &OptBase::SetOptimizeOut)
+      .def("set_model_type", &OptBase::SetModelType)
+      .def("run_optimize", &OptBase::RunOptimize)
+      .def("help", &OptBase::PrintHelpInfo)
+      .def("print_supported_ops", &OptBase::PrintSupportedOps)
+      .def("display_kernels_info", &OptBase::DisplayKernelsInfo)
+      .def("print_all_ops", &OptBase::PrintAllOps);
+}
 #endif
 static void BindLiteLightPredictor(py::module *m);
 static void BindLiteCxxConfig(py::module *m);
diff --git a/lite/api/python/pybind/pybind.h b/lite/api/python/pybind/pybind.h
index 7caf00a9c3..15609957e0 100644
--- a/lite/api/python/pybind/pybind.h
+++ b/lite/api/python/pybind/pybind.h
@@ -22,11 +22,15 @@ namespace lite {
 namespace pybind {
 
 void BindLiteApi(pybind11::module *m);
+void BindLiteOpt(pybind11::module *m);
 
 PYBIND11_MODULE(lite, m) {
   m.doc() = "C++ core of Paddle-Lite";
 
   BindLiteApi(&m);
+#ifndef LITE_ON_TINY_PUBLISH
+  BindLiteOpt(&m);
+#endif
 }
 
 }  // namespace pybind
diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt
index fd595bca51..db8bc29d70 100644
--- a/lite/core/CMakeLists.txt
+++ b/lite/core/CMakeLists.txt
@@ -93,9 +93,13 @@ add_custom_command(
   OUTPUT ops.h # not a real path to the output to force it execute every time.
   )
 # generate fake kernels for memory_optimize_tool
+
+#-------------------------------opt----------------------------------------------------------------
+# tricks to create headfiles for opt
 add_custom_command(
   COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
   ${kernels_src_list}
+  ${fake_kernels_src_list}
   ${CMAKE_BINARY_DIR}/all_kernel_faked.cc
   ${CMAKE_BINARY_DIR}/kernel_src_map.h
   OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
@@ -103,12 +107,12 @@ add_custom_command(
 add_custom_target(op_list_h DEPENDS ops.h)
 add_custom_target(kernel_list_h DEPENDS kernels.h)
 add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
-#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc)
 
 # create headfile to restore ops info sorted by suppported platforms
 add_custom_command(
   COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
   ${kernels_src_list}
+  ${fake_kernels_src_list}
   ${ops_src_list}
   ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h
   OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time.
diff --git a/lite/core/context.h b/lite/core/context.h
index fd0715d698..978fb5d67a 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -395,7 +395,7 @@ class ContextScheduler {
         break;
 #endif
       default:
-#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
+#if (!defined LITE_ON_MODEL_OPTIMIZE_TOOL) && (!defined LITE_WITH_PYTHON)
         LOG(FATAL) << "unsupported target " << TargetToStr(target);
 #endif
         break;
diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt
index 75dee596dd..7550d77014 100644
--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
@@ -1,6 +1,6 @@
 # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered
 # to the model_optimize_tool.
-if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
     return()
 endif()
 
diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt
index 9ec335ce81..3fb3136bfc 100644
--- a/lite/kernels/cuda/CMakeLists.txt
+++ b/lite/kernels/cuda/CMakeLists.txt
@@ -1,4 +1,4 @@
-if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA))
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_CUDA))
     return()
 endif()
 
diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt
index f6c3a39949..1f9b84e7db 100755
--- a/lite/kernels/fpga/CMakeLists.txt
+++ b/lite/kernels/fpga/CMakeLists.txt
@@ -1,4 +1,4 @@
-if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA))
+if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA))
     return()
 endif()
 
diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt
index 44dfc1e010..652ce25938 100644
--- a/lite/kernels/opencl/CMakeLists.txt
+++ b/lite/kernels/opencl/CMakeLists.txt
@@ -1,4 +1,4 @@
-if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_OPENCL))
+if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_OPENCL))
     return ()
 endif()
 
diff --git a/lite/tools/build.sh b/lite/tools/build.sh
index d556cc1cf1..e28dd6c53e 100755
--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -358,6 +358,7 @@ function make_x86 {
             -DLITE_WITH_ARM=OFF \
             -DLITE_WITH_PYTHON=$BUILD_PYTHON \
             -DWITH_GPU=OFF \
+            -DLITE_WITH_PYTHON=${BUILD_PYTHON} \
             -DLITE_BUILD_EXTRA=ON \
             -DLITE_WITH_XPU=$BUID_XPU \
             -DXPU_SDK_ROOT=$XPU_SDK_ROOT
diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py
index 35012d5b16..0b96652c6f 100644
--- a/lite/tools/cmake_tools/create_fake_kernel_registry.py
+++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# this module will record kernels in unvalid_places into all_kernel_faked.cc
 
 from __future__ import print_function
 import sys
@@ -18,12 +19,13 @@ import logging
 from ast import RegisterLiteKernelParser
 from utils import *
 
-if len(sys.argv) != 4:
+if len(sys.argv) != 5:
     print("Error: create_fake_kernel_registry.py requires three inputs!")
     exit(1)
-ops_list_path = sys.argv[1]
-dest_path = sys.argv[2]
-kernelmap_path = sys.argv[3]
+kernels_list_path = sys.argv[1]
+faked_kernels_list_path = sys.argv[2]
+dest_path = sys.argv[3]
+kernelmap_path = sys.argv[4]
 
 out_lines = [
     '#pragma once',
@@ -77,68 +79,85 @@ const std::map<std::string, std::string> kernel2path_map{
 '''
 ]
 
+def parse_fake_kernels_from_path(list_path):
+    with open(list_path) as f:
+        paths = set([path for path in f])
+        for path in paths:
+            print('path', path)
+            with open(path.strip()) as g:
+                c = g.read()
+                kernel_parser = RegisterLiteKernelParser(c)
+                kernel_parser.parse()
+
+                for k in kernel_parser.kernels:
+                    kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
+                        op_type=k.op_type,
+                        target=k.target,
+                        precision=k.precision,
+                        data_layout=k.data_layout,
+                        alias=k.alias
+                    )
+
+                    kernel_define = fake_kernel % (
+                        kernel_name,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        kernel_name
+                    )
+
+                    out_lines.append(kernel_define)
+                    out_lines.append("")
+
+
+                    key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
+                        k.op_type,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        '::paddle::lite::' + kernel_name,
+                        k.alias
+                    )
+                    out_lines.append(key)
+
+                    for input in k.inputs:
+                        io = '    .BindInput("%s", {%s})' % (input.name, input.type)
+                        out_lines.append(io)
+                    for output in k.outputs:
+                        io = '    .BindOutput("%s", {%s})' % (output.name, output.type)
+                        out_lines.append(io)
+                    out_lines.append("    .Finalize();")
+                    out_lines.append("")
+                    out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
+
+def parse_sppported_kernels_from_path(list_path):
+    with open(list_path) as f:
+        paths = set([path for path in f])
+        for path in paths:
+            print('path', path)
+            with open(path.strip()) as g:
+                c = g.read()
+                kernel_parser = RegisterLiteKernelParser(c)
+                kernel_parser.parse()
+
+                for k in kernel_parser.kernels:
+                    index = path.rindex('/')
+                    filename = path[index + 1:]
+                    map_element = '  {"%s,%s,%s,%s,%s", "%s"},' % (
+                        k.op_type,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        k.alias,
+                        filename.strip()
+                    )
+                    kernel_src_map_lines.append(map_element)
+
+
+parse_fake_kernels_from_path(faked_kernels_list_path)
+parse_sppported_kernels_from_path(faked_kernels_list_path)
+parse_sppported_kernels_from_path(kernels_list_path)
 
-with open(ops_list_path) as f:
-    paths = set([path for path in f])
-    for path in paths:
-        print('path', path)
-        with open(path.strip()) as g:
-            c = g.read()
-            kernel_parser = RegisterLiteKernelParser(c)
-            kernel_parser.parse()
-
-            for k in kernel_parser.kernels:
-                kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
-                    op_type = k.op_type,
-                    target = k.target,
-                    precision = k.precision,
-                    data_layout = k.data_layout,
-                    alias = k.alias,
-                )
-
-                kernel_define = fake_kernel % (
-                    kernel_name,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    kernel_name,
-                )
-
-                out_lines.append(kernel_define)
-                out_lines.append("")
-
-
-                key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
-                    k.op_type,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    '::paddle::lite::' + kernel_name,
-                    k.alias,
-                )
-                out_lines.append(key)
-
-                for input in k.inputs:
-                    io = '    .BindInput("%s", {%s})' % (input.name, input.type)
-                    out_lines.append(io)
-                for output in k.outputs:
-                    io = '    .BindOutput("%s", {%s})' % (output.name, output.type)
-                    out_lines.append(io)
-                out_lines.append("    .Finalize();")
-                out_lines.append("")
-                out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
-
-                index = path.rindex('/')
-                filename = path[index + 1:]
-                map_element = '  {"%s,%s,%s,%s,%s", "%s"},' % (
-                    k.op_type,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    k.alias,
-                    filename.strip()
-                )
-                kernel_src_map_lines.append(map_element)
 with open(dest_path, 'w') as f:
     logging.info("write kernel list to %s" % dest_path)
     f.write('\n'.join(out_lines))
diff --git a/lite/tools/cmake_tools/record_supported_kernel_op.py b/lite/tools/cmake_tools/record_supported_kernel_op.py
index f6a3af6bd3..560174bc63 100644
--- a/lite/tools/cmake_tools/record_supported_kernel_op.py
+++ b/lite/tools/cmake_tools/record_supported_kernel_op.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# this module will record supported ops from kernels_src.txt
 
 from __future__ import print_function
 import sys
@@ -18,12 +19,13 @@ import logging
 from ast import RegisterLiteKernelParser
 from ast import RegisterLiteOpParser
 
-if len(sys.argv) != 4:
-    print("Error: record_supported_kernel_op.py requires three inputs!")
-    exit(1)
+if len(sys.argv) != 5:
+    print("Error: record_supported_kernel_op.py requires four inputs!")
+    sys.exit(1)
 kernels_list_path = sys.argv[1]
-ops_list_path = sys.argv[2]
-kernel_op_map_dest_path = sys.argv[3]
+faked_kernels_list_path = sys.argv[2]
+ops_list_path = sys.argv[3]
+kernel_op_map_dest_path = sys.argv[4]
 
 
 out_lines = [
@@ -51,11 +53,11 @@ const std::vector<std::vector<std::string>> supported_ops_target = {
 '''
 ]
 
-ops_lines=[]
+ops_lines = []
 
 # valid targets and valid_ops
 valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"]
-valid_ops = [[],[],[],[],[],[],[],[],[],[]]
+valid_ops = [[], [], [], [], [], [], [], [], [], []]
 class TargetType:
     kUnk = 0
     kHost = 1
@@ -78,8 +80,21 @@ with open(kernels_list_path) as f:
             kernel_parser.parse()
             for k in kernel_parser.kernels:
                 if hasattr(TargetType, k.target):
-                    index=getattr(TargetType, k.target)
+                    index = getattr(TargetType, k.target)
                     valid_ops[index].append(k.op_type)
+# record op_info of valid kernels into `valid_ops` according to different target type
+with open(faked_kernels_list_path) as f:
+    paths = set([path for path in f])
+    for path in paths:
+        with open(path.strip()) as g:
+            c = g.read()
+            kernel_parser = RegisterLiteKernelParser(c)
+            kernel_parser.parse()
+            for k in kernel_parser.kernels:
+                if hasattr(TargetType, k.target):
+                    index = getattr(TargetType, k.target)
+                    valid_ops[index].append(k.op_type)
+
 
 # clear the repeated ops
 for target in valid_targets:
@@ -114,7 +129,7 @@ with open(kernel_op_map_dest_path, 'w') as f:
     f.write('\n'.join(out_lines))
     # write kernels into head file
     for target in valid_targets:
-        if len(valid_ops[getattr(TargetType, target)]) == 0 :
+        if len(valid_ops[getattr(TargetType, target)]) == 0:
             f.write("\n    // %s_OPS: " %target)
             f.write('\n    {},')
         else:
-- 
GitLab