[Python lib] Add opt lib into python lib (#3209)

5fea8e10 · huzhiqiang · GitHub · 90ec3960 · 5fea8e10 · 5fea8e10
17 changed file
--- a/cmake/lite.cmake
+++ b/cmake/lite.cmake
@@ -275,6 +275,11 @@ set(host_kernels CACHE INTERNAL "host kernels")
 set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
 file(WRITE ${kernels_src_list} "") # clean
+# file to record faked kernels for opt python lib
+set(fake_kernels_src_list "${CMAKE_BINARY_DIR}/fake_kernels_src_list.txt")
+file(WRITE ${fake_kernels_src_list} "") # clean
 if(LITE_BUILD_TAILOR)
  set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
  file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
@@ -303,62 +308,74 @@ function(add_kernel TARGET device level)
        return()
    endif()
-    if (LITE_ON_MODEL_OPTIMIZE_TOOL)
-      # the source list will collect for model_optimize_tool to fake kernel generation.
-      foreach(src ${args_SRCS})
-          file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
-      endforeach()
-      return()
-    endif()
-    # when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated,
-    # no need to continue the compilation of the true kernel source.
-    if (LITE_ON_MODEL_OPTIMIZE_TOOL)
-      return()
-    endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
    if ("${device}" STREQUAL "Host")
        set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "ARM")
        if (NOT LITE_WITH_ARM)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "X86")
        if (NOT LITE_WITH_X86)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
+            return()
+        elseif (LITE_ON_MODEL_OPTIMIZE_TOOL)
+            foreach(src ${args_SRCS})
+                file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "NPU")
        if (NOT LITE_WITH_NPU)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "XPU")
        if (NOT LITE_WITH_XPU)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "FPGA")
        if (NOT LITE_WITH_FPGA)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "BM")
        if (NOT LITE_WITH_BM)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "")
    endif()
    if ("${device}" STREQUAL "OPENCL")
        if (NOT LITE_WITH_OPENCL)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
@@ -366,6 +383,9 @@ function(add_kernel TARGET device level)
    if ("${device}" STREQUAL "CUDA")
        if (NOT LITE_WITH_CUDA)
+            foreach(src ${args_SRCS})
+                file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
+            endforeach()
            return()
        endif()
        set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "")

--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -303,6 +303,11 @@ if (LITE_ON_TINY_PUBLISH)
    return()
 endif()
+# add library for opt_base
+lite_cc_library(opt_base SRCS opt_base.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc DEPS kernel op optimizer mir_passes utils)
+add_dependencies(opt_base supported_kernel_op_info_h framework_proto all_kernel_faked_cc kernel_list_h)
 if (LITE_ON_MODEL_OPTIMIZE_TOOL)
    message(STATUS "Compiling opt")
    lite_cc_binary(opt SRCS opt.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc

--- a/lite/api/light_api_impl.cc
+++ b/lite/api/light_api_impl.cc
@@ -58,6 +58,7 @@ void LightPredictorImpl::Run() {
 std::shared_ptr<lite_api::PaddlePredictor> LightPredictorImpl::Clone() {
  LOG(FATAL) << "The Clone API is not supported in LigthPredictor";
+  return nullptr;
 }
 std::string LightPredictorImpl::GetVersion() const { return lite::version(); }

--- a/lite/api/opt_base.cc
+++ b/lite/api/opt_base.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/api/opt_base.h"
+#include "all_kernel_faked.cc"  // NOLINT
+namespace paddle {
+namespace lite_api {
+void OptBase::SetModelDir(const std::string& model_path) {
+  opt_config_.set_model_dir(model_path);
+}
+void OptBase::SetModelFile(const std::string& model_path) {
+  opt_config_.set_model_file(model_path);
+}
+void OptBase::SetParamFile(const std::string& param_path) {
+  opt_config_.set_param_file(param_path);
+}
+void OptBase::SetModelType(std::string optimize_out_type) {
+  if (optimize_out_type == "protobuf") {
+    model_type_ = LiteModelType::kProtobuf;
+  } else if (optimize_out_type == "naive_buffer") {
+    model_type_ = LiteModelType::kNaiveBuffer;
+  } else {
+    LOG(FATAL) << "Unsupported Model type :" << optimize_out_type;
+  }
+}
+void OptBase::SetValidPlaces(const std::string& valid_places) {
+  valid_places_.clear();
+  auto target_reprs = lite::Split(valid_places, ",");
+  for (auto& target_repr : target_reprs) {
+    if (target_repr == "arm") {
+      valid_places_.emplace_back(TARGET(kARM));
+    } else if (target_repr == "opencl") {
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)});
+      valid_places_.emplace_back(
+          Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)});
+      valid_places_.emplace_back(
+          TARGET(kARM));  // enable kARM CPU kernel when no opencl kernel
+    } else if (target_repr == "x86") {
+      valid_places_.emplace_back(TARGET(kX86));
+    } else if (target_repr == "npu") {
+      valid_places_.emplace_back(TARGET(kNPU));
+    } else if (target_repr == "xpu") {
+      valid_places_.emplace_back(TARGET(kXPU));
+    } else {
+      LOG(FATAL) << lite::string_format(
+          "Wrong target '%s' found, please check the command flag "
+          "'valid_targets'",
+          target_repr.c_str());
+    }
+  }
+  CHECK(!valid_places_.empty())
+      << "At least one target should be set, should set the "
+         "command argument 'valid_targets'";
+}
+void OptBase::SetOptimizeOut(const std::string& optimized_out_path) {
+  optimize_out_path_ = optimized_out_path;
+}
+void OptBase::RunOptimize(bool record_strip_info) {
+  CheckIfModelSupported(false);
+  OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
+  opt_config_.set_valid_places(valid_places_);
+  if (model_set_dir_ != "") {
+    RunOptimizeFromModelSet(record_strip_info);
+  } else {
+    auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
+    opt_predictor->SaveOptimizedModel(
+        optimize_out_path_, model_type_, record_strip_info);
+    auto resulted_model_name =
+        record_strip_info ? "information of striped model" : "optimized model";
+    std::cout << "Save the " << resulted_model_name
+              << " into :" << optimize_out_path_ << "successfully";
+  }
+}
+// collect ops info of modelset
+void CollectModelMetaInfo(const std::string& output_dir,
+                          const std::vector<std::string>& models,
+                          const std::string& filename) {
+  std::set<std::string> total;
+  for (const auto& name : models) {
+    std::string model_path =
+        lite::Join<std::string>({output_dir, name, filename}, "/");
+    auto lines = lite::ReadLines(model_path);
+    total.insert(lines.begin(), lines.end());
+  }
+  std::string output_path =
+      lite::Join<std::string>({output_dir, filename}, "/");
+  lite::WriteLines(std::vector<std::string>(total.begin(), total.end()),
+                   output_path);
+}
+void OptBase::SetModelSetDir(const std::string& model_set_path) {
+  model_set_dir_ = model_set_path;
+}
+void OptBase::RunOptimizeFromModelSet(bool record_strip_info) {
+  // 1. mkdir of outputed optimized model set.
+  lite::MkDirRecur(optimize_out_path_);
+  auto model_dirs = lite::ListDir(model_set_dir_, true);
+  if (model_dirs.size() == 0) {
+    LOG(FATAL) << "[" << model_set_dir_ << "] does not contain any model";
+  }
+  // 2. optimize each model in inputed model set dir.
+  std::string model_file = opt_config_.model_file();
+  std::string param_file = opt_config_.param_file();
+  for (const auto& name : model_dirs) {
+    std::string input_model_dir =
+        lite::Join<std::string>({model_set_dir_, name}, "/");
+    std::string output_model_dir =
+        lite::Join<std::string>({optimize_out_path_, name}, "/");
+    if (opt_config_.model_file() != "" && opt_config_.param_file() != "") {
+      auto model_file_path =
+          lite::Join<std::string>({input_model_dir, model_file}, "/");
+      auto param_file_path =
+          lite::Join<std::string>({input_model_dir, param_file}, "/");
+    }
+    std::cout << "Start optimize model: " << input_model_dir;
+    opt_config_.set_model_dir(input_model_dir);
+    opt_config_.set_model_file(model_file);
+    opt_config_.set_param_file(param_file);
+    auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
+    opt_predictor->SaveOptimizedModel(
+        optimize_out_path_, model_type_, record_strip_info);
+    std::cout << "Optimize done. ";
+  }
+  // 3. if record_strip_info = true, we will record striping info
+  if (record_strip_info) {
+    // Collect all models information
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_OPS_SOURCE_LIST_FILENAME);
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_OPS_LIST_NAME);
+    CollectModelMetaInfo(optimize_out_path_,
+                         model_dirs,
+                         lite::TAILORD_KERNELS_SOURCE_LIST_FILENAME);
+    CollectModelMetaInfo(
+        optimize_out_path_, model_dirs, lite::TAILORD_KERNELS_LIST_NAME);
+    std::cout << "Record the information of stripped models into :"
+              << optimize_out_path_ << "successfully";
+  }
+}
+void OptBase::PrintHelpInfo() {
+  const std::string opt_version = lite::version();
+  const char help_info[] =
+      "At least one argument should be inputed. Valid arguments are listed "
+      "below:\n"
+      "  Arguments of help information:\n"
+      "        `help()`   Print help infomation\n"
+      "  Arguments of model optimization:\n"
+      "        `set_model_dir(model_dir)`\n"
+      "        `set_model_file(model_file_path)`\n"
+      "        `set_param_file(param_file_path)`\n"
+      "        `set_model_type(protobuf|naive_buffer)`\n"
+      "        `set_optimize_out(output_optimize_model_dir)`\n"
+      "        `set_valid_places(arm|opencl|x86|npu|xpu)`\n"
+      "        `run_optimize(false|true)`\n"
+      "        `  ----fasle&true refer to whether to record ops info for "
+      "tailoring lib, false by default`\n"
+      "  Arguments of model checking and ops information:\n"
+      "        `print_all_ops()`   Display all the valid operators of "
+      "Paddle-Lite\n"
+      "        `print_supported_ops`   Display supported operators of valid "
+      "places\n"
+      "        `check_if_model_supported()`   Check if the input model is "
+      "supported\n";
+  std::cout << "opt version:" << opt_version << std::endl
+            << help_info << std::endl;
+}
+// 2. Print supported info of inputed ops
+void OptBase::PrintOpsInfo(const std::set<std::string>& valid_ops) {
+  std::vector<std::string> lite_supported_targets = {"kHost",
+                                                     "kX86",
+                                                     "kCUDA",
+                                                     "kARM",
+                                                     "kOpenCL",
+                                                     "kFPGA",
+                                                     "kNPU",
+                                                     "kXPU",
+                                                     "kAny",
+                                                     "kUnk"};
+  // Get the lengh of the first column: maximum length of the op_type
+  size_t maximum_optype_length = 0;
+  for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) {
+    maximum_optype_length = it->first.size() > maximum_optype_length
+                                ? it->first.size()
+                                : maximum_optype_length;
+  }
+  std::cout << std::setiosflags(std::ios::internal);
+  // Print the first row: OP_nam taget1 target2 ...
+  std::cout << std::setw(maximum_optype_length) << "OP_name";
+  for (size_t i = 0; i < lite_supported_targets.size(); i++) {
+    std::cout << std::setw(10) << lite_supported_targets[i].substr(1);
+  }
+  std::cout << std::endl;
+  // Print the name of supported ops and mark if it's supported by each target
+  // print the support info of inputed ops: valid_ops
+  for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) {
+    std::cout << std::setw(maximum_optype_length) << *op;
+    // Check: If this kernel doesn't match any operator, we will skip it.
+    if (supported_ops.find(*op) == supported_ops.end()) {
+      continue;
+    }
+    // Print OP info.
+    auto ops_valid_places = supported_ops.at(*op);
+    for (size_t i = 0; i < lite_supported_targets.size(); i++) {
+      if (std::find(ops_valid_places.begin(),
+                    ops_valid_places.end(),
+                    lite_supported_targets[i]) != ops_valid_places.end()) {
+        std::cout << std::setw(10) << "Y";
+      } else {
+        std::cout << std::setw(10) << " ";
+      }
+    }
+    std::cout << std::endl;
+  }
+}
+void OptBase::DisplayKernelsInfo() {  // Display kernel information
+  std::cout << ::paddle::lite::KernelRegistry::Global().DebugString();
+}
+void OptBase::PrintAllOps() {
+  // 1. Get supported ops on these targets
+  std::set<std::string> valid_ops;
+  for (size_t i = 0; i < supported_ops_target.size(); i++) {
+    auto ops = supported_ops_target[i];
+    valid_ops.insert(ops.begin(), ops.end());
+  }
+  // 2. Print support info of these ops
+  PrintOpsInfo(valid_ops);
+}
+void OptBase::PrintSupportedOps() {
+  // 1. Get the valid hardware targets
+  std::vector<TargetType> target_types = {};
+  for (size_t i = 0; i < valid_places_.size(); i++) {
+    target_types.push_back(valid_places_[i].target);
+  }
+  std::string targets_str = TargetToStr(target_types[0]);
+  for (size_t i = 1; i < target_types.size(); i++) {
+    targets_str = targets_str + TargetToStr(target_types[i]);
+  }
+  std::cout << "Supported OPs on '" << targets_str << "': " << std::endl;
+  target_types.push_back(TARGET(kHost));
+  target_types.push_back(TARGET(kUnk));
+  // 2. Get supported ops on these targets
+  std::set<std::string> valid_ops;
+  for (size_t i = 0; i < target_types.size(); i++) {
+    auto ops = supported_ops_target[static_cast<int>(target_types[i])];
+    valid_ops.insert(ops.begin(), ops.end());
+  }
+  // 3. Print support info of these ops
+  PrintOpsInfo(valid_ops);
+}
+// test whether this model is supported
+void OptBase::CheckIfModelSupported(bool print_ops_info) {
+  // 1. parse valid places and valid targets
+  auto valid_ops = supported_ops_target[static_cast<int>(TARGET(kHost))];
+  auto valid_unktype_ops = supported_ops_target[static_cast<int>(TARGET(kUnk))];
+  valid_ops.insert(
+      valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end());
+  for (size_t i = 0; i < valid_places_.size(); i++) {
+    auto target = valid_places_[i].target;
+    auto ops = supported_ops_target[static_cast<int>(target)];
+    valid_ops.insert(valid_ops.end(), ops.begin(), ops.end());
+  }
+  // get valid ops
+  std::set<std::string> valid_ops_set(valid_ops.begin(), valid_ops.end());
+  // 2.Load model into program to get ops in model
+  std::string prog_path = opt_config_.model_dir() + "/__model__";
+  if (!(opt_config_.model_file()).empty() &&
+      !(opt_config_.param_file()).empty()) {
+    prog_path = opt_config_.model_file();
+  }
+  lite::cpp::ProgramDesc cpp_prog;
+  framework::proto::ProgramDesc pb_proto_prog =
+      *lite::LoadProgram(prog_path, false);
+  lite::pb::ProgramDesc pb_prog(&pb_proto_prog);
+  // Transform to cpp::ProgramDesc
+  lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog);
+  std::set<std::string> unsupported_ops;
+  std::set<std::string> input_model_ops;
+  for (size_t index = 0; index < cpp_prog.BlocksSize(); index++) {
+    auto current_block = cpp_prog.GetBlock<lite::cpp::BlockDesc>(index);
+    for (size_t i = 0; i < current_block->OpsSize(); ++i) {
+      auto& op_desc = *current_block->GetOp<lite::cpp::OpDesc>(i);
+      auto op_type = op_desc.Type();
+      input_model_ops.insert(op_type);
+      if (valid_ops_set.count(op_type) == 0) {
+        unsupported_ops.insert(op_type);
+      }
+    }
+  }
+  // 3. Print ops_info of input model and check if this model is supported
+  if (print_ops_info) {
+    std::cout << "OPs in the input model include:\n";
+    PrintOpsInfo(input_model_ops);
+  }
+  if (!unsupported_ops.empty()) {
+    std::string unsupported_ops_str = *unsupported_ops.begin();
+    for (auto op_str = ++unsupported_ops.begin();
+         op_str != unsupported_ops.end();
+         op_str++) {
+      unsupported_ops_str = unsupported_ops_str + ", " + *op_str;
+    }
+    std::vector<TargetType> targets = {};
+    for (size_t i = 0; i < valid_places_.size(); i++) {
+      targets.push_back(valid_places_[i].target);
+    }
+    std::sort(targets.begin(), targets.end());
+    targets.erase(unique(targets.begin(), targets.end()), targets.end());
+    std::string targets_str = TargetToStr(targets[0]);
+    for (size_t i = 1; i < targets.size(); i++) {
+      targets_str = targets_str + "," + TargetToStr(targets[i]);
+    }
+    LOG(ERROR) << "Error: This model is not supported, because "
+               << unsupported_ops.size() << " ops are not supported on '"
+               << targets_str << "'. These unsupported ops are: '"
+               << unsupported_ops_str << "'.";
+    exit(1);
+  }
+  if (print_ops_info) {
+    std::cout << "Paddle-Lite supports this model!" << std::endl;
+    exit(1);
+  }
+}
+}  // namespace lite_api
+}  // namespace paddle
--- a/lite/api/opt_base.h
+++ b/lite/api/opt_base.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * This file defines Opt and basic functions about model transformation.
+ */
+#ifndef PADDLE_LITE_OPT_H_  // NOLINT
+#define PADDLE_LITE_OPT_H_
+#include <algorithm>
+#include <iomanip>
+#include <set>
+#include <string>
+#include <vector>
+// stores the map that records the source_file path of each kernel.
+#include "kernel_src_map.h"  // NOLINT
+#include "lite/api/cxx_api.h"
+// version of Paddle-lite
+#include "lite/core/version.h"
+// model parser functions to pre-load model to verify if this model is supported
+#include "lite/model_parser/compatible_pb.h"
+#include "lite/model_parser/pb/program_desc.h"
+#include "lite/utils/string.h"
+// recorded all the ops supported by paddle-lite
+#include "supported_kernel_op_info.h"  // NOLINT
+namespace paddle {
+namespace lite_api {
+/// The PaddlePredictor defines the basic interfaces for different kinds of
+/// predictors.
+class LITE_API OptBase {
+ public:
+  OptBase() = default;
+  void SetModelSetDir(const std::string &model_set_path);
+  void SetModelDir(const std::string &model_path);
+  void SetModelFile(const std::string &model_path);
+  void SetParamFile(const std::string &param_path);
+  void SetValidPlaces(const std::string &valid_places);
+  void SetOptimizeOut(const std::string &optimized_out_path);
+  // set optimized_model type
+  void SetModelType(std::string model_type);
+  // transform and save the optimized model
+  void RunOptimize(bool record_strip_info = false);
+  // fuctions of printing info
+  // 1. help info
+  void PrintHelpInfo();
+  // 2. PrintOpsInfo
+  void PrintOpsInfo(const std::set<std::string> &valid_ops =
+                        {});  // print supported ops on target_types
+  void PrintAllOps();         // print all ops
+  void PrintSupportedOps();   // print ops supported on valid_places_
+  void DisplayKernelsInfo();  // Display kernel information
+  // 3. Check if this model is supported
+  void CheckIfModelSupported(bool print_ops_info = true);
+ private:
+  CxxConfig opt_config_;
+  // valid places for the optimized_model
+  std::vector<Place> valid_places_;
+  // filename of the optimized_model
+  std::string optimize_out_path_;
+  // type of the optimized_model, kNaiveBuffer default.
+  LiteModelType model_type_{LiteModelType::kNaiveBuffer};
+  // Dir path of a set of models, this should be combined with model
+  std::string model_set_dir_;
+  void RunOptimizeFromModelSet(bool record_strip_info = false);
+};
+}  // namespace lite_api
+}  // namespace paddle
+#endif  // NOLINT
--- a/lite/api/python/pybind/CMakeLists.txt
+++ b/lite/api/python/pybind/CMakeLists.txt
 set(PYBIND_DEPS pybind python paddle_api_light paddle_api)
 if (NOT LITE_ON_TINY_PUBLISH)
-   set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full)
+   set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full opt_base)
 endif()
 lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})

--- a/lite/api/python/pybind/pybind.cc
+++ b/lite/api/python/pybind/pybind.cc
@@ -26,6 +26,7 @@
 #ifndef LITE_ON_TINY_PUBLISH
 #include "lite/api/cxx_api.h"
+#include "lite/api/opt_base.h"
 #endif
 #include "lite/api/light_api.h"
@@ -47,10 +48,27 @@ using lite_api::PrecisionType;
 using lite_api::DataLayoutType;
 using lite_api::Place;
 using lite::LightPredictorImpl;
+using lite_api::OptBase;
 #ifndef LITE_ON_TINY_PUBLISH
 using lite::CxxPaddleApiImpl;
 static void BindLiteCxxPredictor(py::module *m);
+void BindLiteOpt(py::module *m) {
+  py::class_<OptBase> opt_base(*m, "Opt");
+  opt_base.def(py::init<>())
+      .def("set_model_dir", &OptBase::SetModelDir)
+      .def("set_modelset_dir", &OptBase::SetModelSetDir)
+      .def("set_model_file", &OptBase::SetModelFile)
+      .def("set_param_file", &OptBase::SetParamFile)
+      .def("set_valid_places", &OptBase::SetValidPlaces)
+      .def("set_optimize_out", &OptBase::SetOptimizeOut)
+      .def("set_model_type", &OptBase::SetModelType)
+      .def("run_optimize", &OptBase::RunOptimize)
+      .def("help", &OptBase::PrintHelpInfo)
+      .def("print_supported_ops", &OptBase::PrintSupportedOps)
+      .def("display_kernels_info", &OptBase::DisplayKernelsInfo)
+      .def("print_all_ops", &OptBase::PrintAllOps);
+}
 #endif
 static void BindLiteLightPredictor(py::module *m);
 static void BindLiteCxxConfig(py::module *m);

--- a/lite/api/python/pybind/pybind.h
+++ b/lite/api/python/pybind/pybind.h
@@ -22,11 +22,15 @@ namespace lite {
 namespace pybind {
 void BindLiteApi(pybind11::module *m);
+void BindLiteOpt(pybind11::module *m);
 PYBIND11_MODULE(lite, m) {
  m.doc() = "C++ core of Paddle-Lite";
  BindLiteApi(&m);
+#ifndef LITE_ON_TINY_PUBLISH
+  BindLiteOpt(&m);
+#endif
 }
 }  // namespace pybind

--- a/lite/core/CMakeLists.txt
+++ b/lite/core/CMakeLists.txt
@@ -93,9 +93,13 @@ add_custom_command(
  OUTPUT ops.h # not a real path to the output to force it execute every time.
  )
 # generate fake kernels for memory_optimize_tool
+#-------------------------------opt----------------------------------------------------------------
+# tricks to create headfiles for opt
 add_custom_command(
  COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
  ${kernels_src_list}
+  ${fake_kernels_src_list}
  ${CMAKE_BINARY_DIR}/all_kernel_faked.cc
  ${CMAKE_BINARY_DIR}/kernel_src_map.h
  OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
@@ -103,12 +107,12 @@ add_custom_command(
 add_custom_target(op_list_h DEPENDS ops.h)
 add_custom_target(kernel_list_h DEPENDS kernels.h)
 add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
-#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc)
 # create headfile to restore ops info sorted by suppported platforms
 add_custom_command(
  COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
  ${kernels_src_list}
+  ${fake_kernels_src_list}
  ${ops_src_list}
  ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h
  OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time.

--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -395,7 +395,7 @@ class ContextScheduler {
        break;
 #endif
      default:
-#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
+#if (!defined LITE_ON_MODEL_OPTIMIZE_TOOL) && (!defined LITE_WITH_PYTHON)
        LOG(FATAL) << "unsupported target " << TargetToStr(target);
 #endif
        break;

--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
 # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered
 # to the model_optimize_tool.
-if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
    return()
 endif()

--- a/lite/kernels/cuda/CMakeLists.txt
+++ b/lite/kernels/cuda/CMakeLists.txt
-if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA))
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_CUDA))
    return()
 endif()

--- a/lite/kernels/fpga/CMakeLists.txt
+++ b/lite/kernels/fpga/CMakeLists.txt
-if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA))
+if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA))
    return()
 endif()

--- a/lite/kernels/opencl/CMakeLists.txt
+++ b/lite/kernels/opencl/CMakeLists.txt
-if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_OPENCL))
+if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_OPENCL))
    return ()
 endif()

--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -358,6 +358,7 @@ function make_x86 {
            -DLITE_WITH_ARM=OFF \
            -DLITE_WITH_PYTHON=$BUILD_PYTHON \
            -DWITH_GPU=OFF \
+            -DLITE_WITH_PYTHON=${BUILD_PYTHON} \
            -DLITE_BUILD_EXTRA=ON \
            -DLITE_WITH_XPU=$BUID_XPU \
            -DXPU_SDK_ROOT=$XPU_SDK_ROOT

--- a/lite/tools/cmake_tools/create_fake_kernel_registry.py
+++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# this module will record kernels in unvalid_places into all_kernel_faked.cc
 from __future__ import print_function
 import sys
@@ -18,12 +19,13 @@ import logging
 from ast import RegisterLiteKernelParser
 from utils import *
-if len(sys.argv) != 4:
+if len(sys.argv) != 5:
    print("Error: create_fake_kernel_registry.py requires three inputs!")
    exit(1)
-ops_list_path = sys.argv[1]
+kernels_list_path = sys.argv[1]
-dest_path = sys.argv[2]
+faked_kernels_list_path = sys.argv[2]
-kernelmap_path = sys.argv[3]
+dest_path = sys.argv[3]
+kernelmap_path = sys.argv[4]
 out_lines = [
    '#pragma once',
@@ -77,68 +79,85 @@ const std::map<std::string, std::string> kernel2path_map{
 '''
 ]
+def parse_fake_kernels_from_path(list_path):
+    with open(list_path) as f:
+        paths = set([path for path in f])
+        for path in paths:
+            print('path', path)
+            with open(path.strip()) as g:
+                c = g.read()
+                kernel_parser = RegisterLiteKernelParser(c)
+                kernel_parser.parse()
+                for k in kernel_parser.kernels:
+                    kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
+                        op_type=k.op_type,
+                        target=k.target,
+                        precision=k.precision,
+                        data_layout=k.data_layout,
+                        alias=k.alias
+                    )
+                    kernel_define = fake_kernel % (
+                        kernel_name,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        kernel_name
+                    )
+                    out_lines.append(kernel_define)
+                    out_lines.append("")
+                    key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
+                        k.op_type,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        '::paddle::lite::' + kernel_name,
+                        k.alias
+                    )
+                    out_lines.append(key)
+                    for input in k.inputs:
+                        io = '    .BindInput("%s", {%s})' % (input.name, input.type)
+                        out_lines.append(io)
+                    for output in k.outputs:
+                        io = '    .BindOutput("%s", {%s})' % (output.name, output.type)
+                        out_lines.append(io)
+                    out_lines.append("    .Finalize();")
+                    out_lines.append("")
+                    out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
+def parse_sppported_kernels_from_path(list_path):
+    with open(list_path) as f:
+        paths = set([path for path in f])
+        for path in paths:
+            print('path', path)
+            with open(path.strip()) as g:
+                c = g.read()
+                kernel_parser = RegisterLiteKernelParser(c)
+                kernel_parser.parse()
+                for k in kernel_parser.kernels:
+                    index = path.rindex('/')
+                    filename = path[index + 1:]
+                    map_element = '  {"%s,%s,%s,%s,%s", "%s"},' % (
+                        k.op_type,
+                        k.target,
+                        k.precision,
+                        k.data_layout,
+                        k.alias,
+                        filename.strip()
+                    )
+                    kernel_src_map_lines.append(map_element)
+parse_fake_kernels_from_path(faked_kernels_list_path)
+parse_sppported_kernels_from_path(faked_kernels_list_path)
+parse_sppported_kernels_from_path(kernels_list_path)
-with open(ops_list_path) as f:
-    paths = set([path for path in f])
-    for path in paths:
-        print('path', path)
-        with open(path.strip()) as g:
-            c = g.read()
-            kernel_parser = RegisterLiteKernelParser(c)
-            kernel_parser.parse()
-            for k in kernel_parser.kernels:
-                kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
-                    op_type = k.op_type,
-                    target = k.target,
-                    precision = k.precision,
-                    data_layout = k.data_layout,
-                    alias = k.alias,
-                )
-                kernel_define = fake_kernel % (
-                    kernel_name,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    kernel_name,
-                )
-                out_lines.append(kernel_define)
-                out_lines.append("")
-                key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
-                    k.op_type,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    '::paddle::lite::' + kernel_name,
-                    k.alias,
-                )
-                out_lines.append(key)
-                for input in k.inputs:
-                    io = '    .BindInput("%s", {%s})' % (input.name, input.type)
-                    out_lines.append(io)
-                for output in k.outputs:
-                    io = '    .BindOutput("%s", {%s})' % (output.name, output.type)
-                    out_lines.append(io)
-                out_lines.append("    .Finalize();")
-                out_lines.append("")
-                out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
-                index = path.rindex('/')
-                filename = path[index + 1:]
-                map_element = '  {"%s,%s,%s,%s,%s", "%s"},' % (
-                    k.op_type,
-                    k.target,
-                    k.precision,
-                    k.data_layout,
-                    k.alias,
-                    filename.strip()
-                )
-                kernel_src_map_lines.append(map_element)
 with open(dest_path, 'w') as f:
    logging.info("write kernel list to %s" % dest_path)
    f.write('\n'.join(out_lines))

--- a/lite/tools/cmake_tools/record_supported_kernel_op.py
+++ b/lite/tools/cmake_tools/record_supported_kernel_op.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# this module will record supported ops from kernels_src.txt
 from __future__ import print_function
 import sys
@@ -18,12 +19,13 @@ import logging
 from ast import RegisterLiteKernelParser
 from ast import RegisterLiteOpParser
-if len(sys.argv) != 4:
+if len(sys.argv) != 5:
-    print("Error: record_supported_kernel_op.py requires three inputs!")
+    print("Error: record_supported_kernel_op.py requires four inputs!")
-    exit(1)
+    sys.exit(1)
 kernels_list_path = sys.argv[1]
-ops_list_path = sys.argv[2]
+faked_kernels_list_path = sys.argv[2]
-kernel_op_map_dest_path = sys.argv[3]
+ops_list_path = sys.argv[3]
+kernel_op_map_dest_path = sys.argv[4]
 out_lines = [
@@ -51,11 +53,11 @@ const std::vector<std::vector<std::string>> supported_ops_target = {
 '''
 ]
-ops_lines=[]
+ops_lines = []
 # valid targets and valid_ops
 valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"]
-valid_ops = [[],[],[],[],[],[],[],[],[],[]]
+valid_ops = [[], [], [], [], [], [], [], [], [], []]
 class TargetType:
    kUnk = 0
    kHost = 1
@@ -78,8 +80,21 @@ with open(kernels_list_path) as f:
            kernel_parser.parse()
            for k in kernel_parser.kernels:
                if hasattr(TargetType, k.target):
-                    index=getattr(TargetType, k.target)
+                    index = getattr(TargetType, k.target)
                    valid_ops[index].append(k.op_type)
+# record op_info of valid kernels into `valid_ops` according to different target type
+with open(faked_kernels_list_path) as f:
+    paths = set([path for path in f])
+    for path in paths:
+        with open(path.strip()) as g:
+            c = g.read()
+            kernel_parser = RegisterLiteKernelParser(c)
+            kernel_parser.parse()
+            for k in kernel_parser.kernels:
+                if hasattr(TargetType, k.target):
+                    index = getattr(TargetType, k.target)
+                    valid_ops[index].append(k.op_type)
 # clear the repeated ops
 for target in valid_targets:
@@ -114,7 +129,7 @@ with open(kernel_op_map_dest_path, 'w') as f:
    f.write('\n'.join(out_lines))
    # write kernels into head file
    for target in valid_targets:
-        if len(valid_ops[getattr(TargetType, target)]) == 0 :
+        if len(valid_ops[getattr(TargetType, target)]) == 0:
            f.write("\n    // %s_OPS: " %target)
            f.write('\n    {},')
        else: