From 81de4127c0332e8ccfcb34ddaecf778f23d55ec1 Mon Sep 17 00:00:00 2001 From: huzhiqiang <912790387@qq.com> Date: Wed, 25 Mar 2020 13:07:43 +0800 Subject: [PATCH] [Python lib] Add opt lib into python lib (#3209) --- cmake/lite.cmake | 48 ++- lite/api/CMakeLists.txt | 5 + lite/api/light_api_impl.cc | 1 + lite/api/opt_base.cc | 364 ++++++++++++++++++ lite/api/opt_base.h | 86 +++++ lite/api/python/pybind/CMakeLists.txt | 2 +- lite/api/python/pybind/pybind.cc | 18 + lite/api/python/pybind/pybind.h | 4 + lite/core/CMakeLists.txt | 6 +- lite/core/context.h | 2 +- lite/kernels/arm/CMakeLists.txt | 2 +- lite/kernels/cuda/CMakeLists.txt | 2 +- lite/kernels/fpga/CMakeLists.txt | 2 +- lite/kernels/opencl/CMakeLists.txt | 2 +- lite/tools/build.sh | 1 + .../create_fake_kernel_registry.py | 151 ++++---- .../cmake_tools/record_supported_kernel_op.py | 35 +- 17 files changed, 634 insertions(+), 97 deletions(-) create mode 100644 lite/api/opt_base.cc create mode 100644 lite/api/opt_base.h diff --git a/cmake/lite.cmake b/cmake/lite.cmake index fd40fa437b..265de3fbf6 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -275,6 +275,11 @@ set(host_kernels CACHE INTERNAL "host kernels") set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt") file(WRITE ${kernels_src_list} "") # clean + +# file to record faked kernels for opt python lib +set(fake_kernels_src_list "${CMAKE_BINARY_DIR}/fake_kernels_src_list.txt") +file(WRITE ${fake_kernels_src_list} "") # clean + if(LITE_BUILD_TAILOR) set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list") file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list) @@ -303,62 +308,74 @@ function(add_kernel TARGET device level) return() endif() - if (LITE_ON_MODEL_OPTIMIZE_TOOL) - # the source list will collect for model_optimize_tool to fake kernel generation. - foreach(src ${args_SRCS}) - file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") - endforeach() - return() - endif() - - # when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated, - # no need to continue the compilation of the true kernel source. - if (LITE_ON_MODEL_OPTIMIZE_TOOL) - return() - endif(LITE_ON_MODEL_OPTIMIZE_TOOL) - if ("${device}" STREQUAL "Host") set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "ARM") if (NOT LITE_WITH_ARM) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "X86") if (NOT LITE_WITH_X86) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() + return() + elseif (LITE_ON_MODEL_OPTIMIZE_TOOL) + foreach(src ${args_SRCS}) + file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "NPU") if (NOT LITE_WITH_NPU) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "XPU") if (NOT LITE_WITH_XPU) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "FPGA") if (NOT LITE_WITH_FPGA) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "BM") if (NOT LITE_WITH_BM) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "") endif() if ("${device}" STREQUAL "OPENCL") if (NOT LITE_WITH_OPENCL) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "") @@ -366,6 +383,9 @@ function(add_kernel TARGET device level) if ("${device}" STREQUAL "CUDA") if (NOT LITE_WITH_CUDA) + foreach(src ${args_SRCS}) + file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() return() endif() set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "") diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index e786f346cc..b360b476e0 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -303,6 +303,11 @@ if (LITE_ON_TINY_PUBLISH) return() endif() + +# add library for opt_base +lite_cc_library(opt_base SRCS opt_base.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc DEPS kernel op optimizer mir_passes utils) +add_dependencies(opt_base supported_kernel_op_info_h framework_proto all_kernel_faked_cc kernel_list_h) + if (LITE_ON_MODEL_OPTIMIZE_TOOL) message(STATUS "Compiling opt") lite_cc_binary(opt SRCS opt.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc index 3965843250..cdf5b7fb06 100644 --- a/lite/api/light_api_impl.cc +++ b/lite/api/light_api_impl.cc @@ -58,6 +58,7 @@ void LightPredictorImpl::Run() { std::shared_ptr LightPredictorImpl::Clone() { LOG(FATAL) << "The Clone API is not supported in LigthPredictor"; + return nullptr; } std::string LightPredictorImpl::GetVersion() const { return lite::version(); } diff --git a/lite/api/opt_base.cc b/lite/api/opt_base.cc new file mode 100644 index 0000000000..bd86f48624 --- /dev/null +++ b/lite/api/opt_base.cc @@ -0,0 +1,364 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/api/opt_base.h" +#include "all_kernel_faked.cc" // NOLINT + +namespace paddle { +namespace lite_api { + +void OptBase::SetModelDir(const std::string& model_path) { + opt_config_.set_model_dir(model_path); +} + +void OptBase::SetModelFile(const std::string& model_path) { + opt_config_.set_model_file(model_path); +} + +void OptBase::SetParamFile(const std::string& param_path) { + opt_config_.set_param_file(param_path); +} + +void OptBase::SetModelType(std::string optimize_out_type) { + if (optimize_out_type == "protobuf") { + model_type_ = LiteModelType::kProtobuf; + } else if (optimize_out_type == "naive_buffer") { + model_type_ = LiteModelType::kNaiveBuffer; + } else { + LOG(FATAL) << "Unsupported Model type :" << optimize_out_type; + } +} + +void OptBase::SetValidPlaces(const std::string& valid_places) { + valid_places_.clear(); + auto target_reprs = lite::Split(valid_places, ","); + for (auto& target_repr : target_reprs) { + if (target_repr == "arm") { + valid_places_.emplace_back(TARGET(kARM)); + } else if (target_repr == "opencl") { + valid_places_.emplace_back( + Place{TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)}); + valid_places_.emplace_back( + Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)}); + valid_places_.emplace_back( + Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)}); + valid_places_.emplace_back( + Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)}); + valid_places_.emplace_back( + TARGET(kARM)); // enable kARM CPU kernel when no opencl kernel + } else if (target_repr == "x86") { + valid_places_.emplace_back(TARGET(kX86)); + } else if (target_repr == "npu") { + valid_places_.emplace_back(TARGET(kNPU)); + } else if (target_repr == "xpu") { + valid_places_.emplace_back(TARGET(kXPU)); + } else { + LOG(FATAL) << lite::string_format( + "Wrong target '%s' found, please check the command flag " + "'valid_targets'", + target_repr.c_str()); + } + } + CHECK(!valid_places_.empty()) + << "At least one target should be set, should set the " + "command argument 'valid_targets'"; +} + +void OptBase::SetOptimizeOut(const std::string& optimized_out_path) { + optimize_out_path_ = optimized_out_path; +} + +void OptBase::RunOptimize(bool record_strip_info) { + CheckIfModelSupported(false); + OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map); + opt_config_.set_valid_places(valid_places_); + if (model_set_dir_ != "") { + RunOptimizeFromModelSet(record_strip_info); + } else { + auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_); + opt_predictor->SaveOptimizedModel( + optimize_out_path_, model_type_, record_strip_info); + auto resulted_model_name = + record_strip_info ? "information of striped model" : "optimized model"; + std::cout << "Save the " << resulted_model_name + << " into :" << optimize_out_path_ << "successfully"; + } +} + +// collect ops info of modelset +void CollectModelMetaInfo(const std::string& output_dir, + const std::vector& models, + const std::string& filename) { + std::set total; + for (const auto& name : models) { + std::string model_path = + lite::Join({output_dir, name, filename}, "/"); + auto lines = lite::ReadLines(model_path); + total.insert(lines.begin(), lines.end()); + } + std::string output_path = + lite::Join({output_dir, filename}, "/"); + lite::WriteLines(std::vector(total.begin(), total.end()), + output_path); +} + +void OptBase::SetModelSetDir(const std::string& model_set_path) { + model_set_dir_ = model_set_path; +} +void OptBase::RunOptimizeFromModelSet(bool record_strip_info) { + // 1. mkdir of outputed optimized model set. + lite::MkDirRecur(optimize_out_path_); + auto model_dirs = lite::ListDir(model_set_dir_, true); + if (model_dirs.size() == 0) { + LOG(FATAL) << "[" << model_set_dir_ << "] does not contain any model"; + } + + // 2. optimize each model in inputed model set dir. + std::string model_file = opt_config_.model_file(); + std::string param_file = opt_config_.param_file(); + for (const auto& name : model_dirs) { + std::string input_model_dir = + lite::Join({model_set_dir_, name}, "/"); + std::string output_model_dir = + lite::Join({optimize_out_path_, name}, "/"); + + if (opt_config_.model_file() != "" && opt_config_.param_file() != "") { + auto model_file_path = + lite::Join({input_model_dir, model_file}, "/"); + auto param_file_path = + lite::Join({input_model_dir, param_file}, "/"); + } + + std::cout << "Start optimize model: " << input_model_dir; + + opt_config_.set_model_dir(input_model_dir); + opt_config_.set_model_file(model_file); + opt_config_.set_param_file(param_file); + + auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_); + opt_predictor->SaveOptimizedModel( + optimize_out_path_, model_type_, record_strip_info); + + std::cout << "Optimize done. "; + } + + // 3. if record_strip_info = true, we will record striping info + if (record_strip_info) { + // Collect all models information + CollectModelMetaInfo( + optimize_out_path_, model_dirs, lite::TAILORD_OPS_SOURCE_LIST_FILENAME); + CollectModelMetaInfo( + optimize_out_path_, model_dirs, lite::TAILORD_OPS_LIST_NAME); + CollectModelMetaInfo(optimize_out_path_, + model_dirs, + lite::TAILORD_KERNELS_SOURCE_LIST_FILENAME); + CollectModelMetaInfo( + optimize_out_path_, model_dirs, lite::TAILORD_KERNELS_LIST_NAME); + std::cout << "Record the information of stripped models into :" + << optimize_out_path_ << "successfully"; + } +} + +void OptBase::PrintHelpInfo() { + const std::string opt_version = lite::version(); + const char help_info[] = + "At least one argument should be inputed. Valid arguments are listed " + "below:\n" + " Arguments of help information:\n" + " `help()` Print help infomation\n" + " Arguments of model optimization:\n" + " `set_model_dir(model_dir)`\n" + " `set_model_file(model_file_path)`\n" + " `set_param_file(param_file_path)`\n" + " `set_model_type(protobuf|naive_buffer)`\n" + " `set_optimize_out(output_optimize_model_dir)`\n" + " `set_valid_places(arm|opencl|x86|npu|xpu)`\n" + " `run_optimize(false|true)`\n" + " ` ----fasle&true refer to whether to record ops info for " + "tailoring lib, false by default`\n" + " Arguments of model checking and ops information:\n" + " `print_all_ops()` Display all the valid operators of " + "Paddle-Lite\n" + " `print_supported_ops` Display supported operators of valid " + "places\n" + " `check_if_model_supported()` Check if the input model is " + "supported\n"; + + std::cout << "opt version:" << opt_version << std::endl + << help_info << std::endl; +} +// 2. Print supported info of inputed ops +void OptBase::PrintOpsInfo(const std::set& valid_ops) { + std::vector lite_supported_targets = {"kHost", + "kX86", + "kCUDA", + "kARM", + "kOpenCL", + "kFPGA", + "kNPU", + "kXPU", + "kAny", + "kUnk"}; + // Get the lengh of the first column: maximum length of the op_type + size_t maximum_optype_length = 0; + for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) { + maximum_optype_length = it->first.size() > maximum_optype_length + ? it->first.size() + : maximum_optype_length; + } + std::cout << std::setiosflags(std::ios::internal); + // Print the first row: OP_nam taget1 target2 ... + std::cout << std::setw(maximum_optype_length) << "OP_name"; + for (size_t i = 0; i < lite_supported_targets.size(); i++) { + std::cout << std::setw(10) << lite_supported_targets[i].substr(1); + } + std::cout << std::endl; + // Print the name of supported ops and mark if it's supported by each target + // print the support info of inputed ops: valid_ops + for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) { + std::cout << std::setw(maximum_optype_length) << *op; + // Check: If this kernel doesn't match any operator, we will skip it. + if (supported_ops.find(*op) == supported_ops.end()) { + continue; + } + // Print OP info. + auto ops_valid_places = supported_ops.at(*op); + for (size_t i = 0; i < lite_supported_targets.size(); i++) { + if (std::find(ops_valid_places.begin(), + ops_valid_places.end(), + lite_supported_targets[i]) != ops_valid_places.end()) { + std::cout << std::setw(10) << "Y"; + } else { + std::cout << std::setw(10) << " "; + } + } + std::cout << std::endl; + } +} + +void OptBase::DisplayKernelsInfo() { // Display kernel information + std::cout << ::paddle::lite::KernelRegistry::Global().DebugString(); +} +void OptBase::PrintAllOps() { + // 1. Get supported ops on these targets + std::set valid_ops; + for (size_t i = 0; i < supported_ops_target.size(); i++) { + auto ops = supported_ops_target[i]; + valid_ops.insert(ops.begin(), ops.end()); + } + // 2. Print support info of these ops + PrintOpsInfo(valid_ops); +} + +void OptBase::PrintSupportedOps() { + // 1. Get the valid hardware targets + std::vector target_types = {}; + for (size_t i = 0; i < valid_places_.size(); i++) { + target_types.push_back(valid_places_[i].target); + } + std::string targets_str = TargetToStr(target_types[0]); + for (size_t i = 1; i < target_types.size(); i++) { + targets_str = targets_str + TargetToStr(target_types[i]); + } + std::cout << "Supported OPs on '" << targets_str << "': " << std::endl; + target_types.push_back(TARGET(kHost)); + target_types.push_back(TARGET(kUnk)); + + // 2. Get supported ops on these targets + std::set valid_ops; + for (size_t i = 0; i < target_types.size(); i++) { + auto ops = supported_ops_target[static_cast(target_types[i])]; + valid_ops.insert(ops.begin(), ops.end()); + } + // 3. Print support info of these ops + PrintOpsInfo(valid_ops); +} + +// test whether this model is supported +void OptBase::CheckIfModelSupported(bool print_ops_info) { + // 1. parse valid places and valid targets + auto valid_ops = supported_ops_target[static_cast(TARGET(kHost))]; + auto valid_unktype_ops = supported_ops_target[static_cast(TARGET(kUnk))]; + valid_ops.insert( + valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end()); + for (size_t i = 0; i < valid_places_.size(); i++) { + auto target = valid_places_[i].target; + auto ops = supported_ops_target[static_cast(target)]; + valid_ops.insert(valid_ops.end(), ops.begin(), ops.end()); + } + // get valid ops + std::set valid_ops_set(valid_ops.begin(), valid_ops.end()); + + // 2.Load model into program to get ops in model + std::string prog_path = opt_config_.model_dir() + "/__model__"; + if (!(opt_config_.model_file()).empty() && + !(opt_config_.param_file()).empty()) { + prog_path = opt_config_.model_file(); + } + lite::cpp::ProgramDesc cpp_prog; + framework::proto::ProgramDesc pb_proto_prog = + *lite::LoadProgram(prog_path, false); + lite::pb::ProgramDesc pb_prog(&pb_proto_prog); + // Transform to cpp::ProgramDesc + lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog); + + std::set unsupported_ops; + std::set input_model_ops; + for (size_t index = 0; index < cpp_prog.BlocksSize(); index++) { + auto current_block = cpp_prog.GetBlock(index); + for (size_t i = 0; i < current_block->OpsSize(); ++i) { + auto& op_desc = *current_block->GetOp(i); + auto op_type = op_desc.Type(); + input_model_ops.insert(op_type); + if (valid_ops_set.count(op_type) == 0) { + unsupported_ops.insert(op_type); + } + } + } + // 3. Print ops_info of input model and check if this model is supported + if (print_ops_info) { + std::cout << "OPs in the input model include:\n"; + PrintOpsInfo(input_model_ops); + } + if (!unsupported_ops.empty()) { + std::string unsupported_ops_str = *unsupported_ops.begin(); + for (auto op_str = ++unsupported_ops.begin(); + op_str != unsupported_ops.end(); + op_str++) { + unsupported_ops_str = unsupported_ops_str + ", " + *op_str; + } + std::vector targets = {}; + for (size_t i = 0; i < valid_places_.size(); i++) { + targets.push_back(valid_places_[i].target); + } + std::sort(targets.begin(), targets.end()); + targets.erase(unique(targets.begin(), targets.end()), targets.end()); + std::string targets_str = TargetToStr(targets[0]); + for (size_t i = 1; i < targets.size(); i++) { + targets_str = targets_str + "," + TargetToStr(targets[i]); + } + + LOG(ERROR) << "Error: This model is not supported, because " + << unsupported_ops.size() << " ops are not supported on '" + << targets_str << "'. These unsupported ops are: '" + << unsupported_ops_str << "'."; + exit(1); + } + if (print_ops_info) { + std::cout << "Paddle-Lite supports this model!" << std::endl; + exit(1); + } +} +} // namespace lite_api +} // namespace paddle diff --git a/lite/api/opt_base.h b/lite/api/opt_base.h new file mode 100644 index 0000000000..a8d6d0390c --- /dev/null +++ b/lite/api/opt_base.h @@ -0,0 +1,86 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file defines Opt and basic functions about model transformation. + */ + +#ifndef PADDLE_LITE_OPT_H_ // NOLINT +#define PADDLE_LITE_OPT_H_ +#include +#include +#include +#include +#include +// stores the map that records the source_file path of each kernel. +#include "kernel_src_map.h" // NOLINT +#include "lite/api/cxx_api.h" +// version of Paddle-lite +#include "lite/core/version.h" +// model parser functions to pre-load model to verify if this model is supported +#include "lite/model_parser/compatible_pb.h" +#include "lite/model_parser/pb/program_desc.h" +#include "lite/utils/string.h" +// recorded all the ops supported by paddle-lite +#include "supported_kernel_op_info.h" // NOLINT + +namespace paddle { +namespace lite_api { + +/// The PaddlePredictor defines the basic interfaces for different kinds of +/// predictors. +class LITE_API OptBase { + public: + OptBase() = default; + void SetModelSetDir(const std::string &model_set_path); + void SetModelDir(const std::string &model_path); + void SetModelFile(const std::string &model_path); + void SetParamFile(const std::string ¶m_path); + void SetValidPlaces(const std::string &valid_places); + void SetOptimizeOut(const std::string &optimized_out_path); + // set optimized_model type + void SetModelType(std::string model_type); + // transform and save the optimized model + void RunOptimize(bool record_strip_info = false); + + // fuctions of printing info + // 1. help info + void PrintHelpInfo(); + // 2. PrintOpsInfo + void PrintOpsInfo(const std::set &valid_ops = + {}); // print supported ops on target_types + void PrintAllOps(); // print all ops + void PrintSupportedOps(); // print ops supported on valid_places_ + void DisplayKernelsInfo(); // Display kernel information + // 3. Check if this model is supported + void CheckIfModelSupported(bool print_ops_info = true); + + private: + CxxConfig opt_config_; + // valid places for the optimized_model + std::vector valid_places_; + // filename of the optimized_model + std::string optimize_out_path_; + // type of the optimized_model, kNaiveBuffer default. + LiteModelType model_type_{LiteModelType::kNaiveBuffer}; + // Dir path of a set of models, this should be combined with model + std::string model_set_dir_; + + void RunOptimizeFromModelSet(bool record_strip_info = false); +}; + +} // namespace lite_api +} // namespace paddle + +#endif // NOLINT diff --git a/lite/api/python/pybind/CMakeLists.txt b/lite/api/python/pybind/CMakeLists.txt index eabb6b150b..b1de18d50c 100644 --- a/lite/api/python/pybind/CMakeLists.txt +++ b/lite/api/python/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ set(PYBIND_DEPS pybind python paddle_api_light paddle_api) if (NOT LITE_ON_TINY_PUBLISH) - set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full) + set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full opt_base) endif() lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS}) diff --git a/lite/api/python/pybind/pybind.cc b/lite/api/python/pybind/pybind.cc index 985246a31a..e86d570e18 100644 --- a/lite/api/python/pybind/pybind.cc +++ b/lite/api/python/pybind/pybind.cc @@ -26,6 +26,7 @@ #ifndef LITE_ON_TINY_PUBLISH #include "lite/api/cxx_api.h" +#include "lite/api/opt_base.h" #endif #include "lite/api/light_api.h" @@ -47,10 +48,27 @@ using lite_api::PrecisionType; using lite_api::DataLayoutType; using lite_api::Place; using lite::LightPredictorImpl; +using lite_api::OptBase; #ifndef LITE_ON_TINY_PUBLISH using lite::CxxPaddleApiImpl; static void BindLiteCxxPredictor(py::module *m); +void BindLiteOpt(py::module *m) { + py::class_ opt_base(*m, "Opt"); + opt_base.def(py::init<>()) + .def("set_model_dir", &OptBase::SetModelDir) + .def("set_modelset_dir", &OptBase::SetModelSetDir) + .def("set_model_file", &OptBase::SetModelFile) + .def("set_param_file", &OptBase::SetParamFile) + .def("set_valid_places", &OptBase::SetValidPlaces) + .def("set_optimize_out", &OptBase::SetOptimizeOut) + .def("set_model_type", &OptBase::SetModelType) + .def("run_optimize", &OptBase::RunOptimize) + .def("help", &OptBase::PrintHelpInfo) + .def("print_supported_ops", &OptBase::PrintSupportedOps) + .def("display_kernels_info", &OptBase::DisplayKernelsInfo) + .def("print_all_ops", &OptBase::PrintAllOps); +} #endif static void BindLiteLightPredictor(py::module *m); static void BindLiteCxxConfig(py::module *m); diff --git a/lite/api/python/pybind/pybind.h b/lite/api/python/pybind/pybind.h index 7caf00a9c3..15609957e0 100644 --- a/lite/api/python/pybind/pybind.h +++ b/lite/api/python/pybind/pybind.h @@ -22,11 +22,15 @@ namespace lite { namespace pybind { void BindLiteApi(pybind11::module *m); +void BindLiteOpt(pybind11::module *m); PYBIND11_MODULE(lite, m) { m.doc() = "C++ core of Paddle-Lite"; BindLiteApi(&m); +#ifndef LITE_ON_TINY_PUBLISH + BindLiteOpt(&m); +#endif } } // namespace pybind diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index fd595bca51..db8bc29d70 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -93,9 +93,13 @@ add_custom_command( OUTPUT ops.h # not a real path to the output to force it execute every time. ) # generate fake kernels for memory_optimize_tool + +#-------------------------------opt---------------------------------------------------------------- +# tricks to create headfiles for opt add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py ${kernels_src_list} + ${fake_kernels_src_list} ${CMAKE_BINARY_DIR}/all_kernel_faked.cc ${CMAKE_BINARY_DIR}/kernel_src_map.h OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time. @@ -103,12 +107,12 @@ add_custom_command( add_custom_target(op_list_h DEPENDS ops.h) add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) -#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc) # create headfile to restore ops info sorted by suppported platforms add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py ${kernels_src_list} + ${fake_kernels_src_list} ${ops_src_list} ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time. diff --git a/lite/core/context.h b/lite/core/context.h index fd0715d698..978fb5d67a 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -395,7 +395,7 @@ class ContextScheduler { break; #endif default: -#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL +#if (!defined LITE_ON_MODEL_OPTIMIZE_TOOL) && (!defined LITE_WITH_PYTHON) LOG(FATAL) << "unsupported target " << TargetToStr(target); #endif break; diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index 75dee596dd..7550d77014 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -1,6 +1,6 @@ # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered # to the model_optimize_tool. -if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))) +if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))) return() endif() diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt index 9ec335ce81..3fb3136bfc 100644 --- a/lite/kernels/cuda/CMakeLists.txt +++ b/lite/kernels/cuda/CMakeLists.txt @@ -1,4 +1,4 @@ -if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA)) +if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_CUDA)) return() endif() diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt index f6c3a39949..1f9b84e7db 100755 --- a/lite/kernels/fpga/CMakeLists.txt +++ b/lite/kernels/fpga/CMakeLists.txt @@ -1,4 +1,4 @@ -if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA)) +if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA)) return() endif() diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt index 44dfc1e010..652ce25938 100644 --- a/lite/kernels/opencl/CMakeLists.txt +++ b/lite/kernels/opencl/CMakeLists.txt @@ -1,4 +1,4 @@ -if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_OPENCL)) +if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_OPENCL)) return () endif() diff --git a/lite/tools/build.sh b/lite/tools/build.sh index d556cc1cf1..e28dd6c53e 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -358,6 +358,7 @@ function make_x86 { -DLITE_WITH_ARM=OFF \ -DLITE_WITH_PYTHON=$BUILD_PYTHON \ -DWITH_GPU=OFF \ + -DLITE_WITH_PYTHON=${BUILD_PYTHON} \ -DLITE_BUILD_EXTRA=ON \ -DLITE_WITH_XPU=$BUID_XPU \ -DXPU_SDK_ROOT=$XPU_SDK_ROOT diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py index 35012d5b16..0b96652c6f 100644 --- a/lite/tools/cmake_tools/create_fake_kernel_registry.py +++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# this module will record kernels in unvalid_places into all_kernel_faked.cc from __future__ import print_function import sys @@ -18,12 +19,13 @@ import logging from ast import RegisterLiteKernelParser from utils import * -if len(sys.argv) != 4: +if len(sys.argv) != 5: print("Error: create_fake_kernel_registry.py requires three inputs!") exit(1) -ops_list_path = sys.argv[1] -dest_path = sys.argv[2] -kernelmap_path = sys.argv[3] +kernels_list_path = sys.argv[1] +faked_kernels_list_path = sys.argv[2] +dest_path = sys.argv[3] +kernelmap_path = sys.argv[4] out_lines = [ '#pragma once', @@ -77,68 +79,85 @@ const std::map kernel2path_map{ ''' ] +def parse_fake_kernels_from_path(list_path): + with open(list_path) as f: + paths = set([path for path in f]) + for path in paths: + print('path', path) + with open(path.strip()) as g: + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + + for k in kernel_parser.kernels: + kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format( + op_type=k.op_type, + target=k.target, + precision=k.precision, + data_layout=k.data_layout, + alias=k.alias + ) + + kernel_define = fake_kernel % ( + kernel_name, + k.target, + k.precision, + k.data_layout, + kernel_name + ) + + out_lines.append(kernel_define) + out_lines.append("") + + + key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + '::paddle::lite::' + kernel_name, + k.alias + ) + out_lines.append(key) + + for input in k.inputs: + io = ' .BindInput("%s", {%s})' % (input.name, input.type) + out_lines.append(io) + for output in k.outputs: + io = ' .BindOutput("%s", {%s})' % (output.name, output.type) + out_lines.append(io) + out_lines.append(" .Finalize();") + out_lines.append("") + out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias)) + +def parse_sppported_kernels_from_path(list_path): + with open(list_path) as f: + paths = set([path for path in f]) + for path in paths: + print('path', path) + with open(path.strip()) as g: + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + + for k in kernel_parser.kernels: + index = path.rindex('/') + filename = path[index + 1:] + map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + k.alias, + filename.strip() + ) + kernel_src_map_lines.append(map_element) + + +parse_fake_kernels_from_path(faked_kernels_list_path) +parse_sppported_kernels_from_path(faked_kernels_list_path) +parse_sppported_kernels_from_path(kernels_list_path) -with open(ops_list_path) as f: - paths = set([path for path in f]) - for path in paths: - print('path', path) - with open(path.strip()) as g: - c = g.read() - kernel_parser = RegisterLiteKernelParser(c) - kernel_parser.parse() - - for k in kernel_parser.kernels: - kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format( - op_type = k.op_type, - target = k.target, - precision = k.precision, - data_layout = k.data_layout, - alias = k.alias, - ) - - kernel_define = fake_kernel % ( - kernel_name, - k.target, - k.precision, - k.data_layout, - kernel_name, - ) - - out_lines.append(kernel_define) - out_lines.append("") - - - key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % ( - k.op_type, - k.target, - k.precision, - k.data_layout, - '::paddle::lite::' + kernel_name, - k.alias, - ) - out_lines.append(key) - - for input in k.inputs: - io = ' .BindInput("%s", {%s})' % (input.name, input.type) - out_lines.append(io) - for output in k.outputs: - io = ' .BindOutput("%s", {%s})' % (output.name, output.type) - out_lines.append(io) - out_lines.append(" .Finalize();") - out_lines.append("") - out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias)) - - index = path.rindex('/') - filename = path[index + 1:] - map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % ( - k.op_type, - k.target, - k.precision, - k.data_layout, - k.alias, - filename.strip() - ) - kernel_src_map_lines.append(map_element) with open(dest_path, 'w') as f: logging.info("write kernel list to %s" % dest_path) f.write('\n'.join(out_lines)) diff --git a/lite/tools/cmake_tools/record_supported_kernel_op.py b/lite/tools/cmake_tools/record_supported_kernel_op.py index f6a3af6bd3..560174bc63 100644 --- a/lite/tools/cmake_tools/record_supported_kernel_op.py +++ b/lite/tools/cmake_tools/record_supported_kernel_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# this module will record supported ops from kernels_src.txt from __future__ import print_function import sys @@ -18,12 +19,13 @@ import logging from ast import RegisterLiteKernelParser from ast import RegisterLiteOpParser -if len(sys.argv) != 4: - print("Error: record_supported_kernel_op.py requires three inputs!") - exit(1) +if len(sys.argv) != 5: + print("Error: record_supported_kernel_op.py requires four inputs!") + sys.exit(1) kernels_list_path = sys.argv[1] -ops_list_path = sys.argv[2] -kernel_op_map_dest_path = sys.argv[3] +faked_kernels_list_path = sys.argv[2] +ops_list_path = sys.argv[3] +kernel_op_map_dest_path = sys.argv[4] out_lines = [ @@ -51,11 +53,11 @@ const std::vector> supported_ops_target = { ''' ] -ops_lines=[] +ops_lines = [] # valid targets and valid_ops valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"] -valid_ops = [[],[],[],[],[],[],[],[],[],[]] +valid_ops = [[], [], [], [], [], [], [], [], [], []] class TargetType: kUnk = 0 kHost = 1 @@ -78,8 +80,21 @@ with open(kernels_list_path) as f: kernel_parser.parse() for k in kernel_parser.kernels: if hasattr(TargetType, k.target): - index=getattr(TargetType, k.target) + index = getattr(TargetType, k.target) valid_ops[index].append(k.op_type) +# record op_info of valid kernels into `valid_ops` according to different target type +with open(faked_kernels_list_path) as f: + paths = set([path for path in f]) + for path in paths: + with open(path.strip()) as g: + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + for k in kernel_parser.kernels: + if hasattr(TargetType, k.target): + index = getattr(TargetType, k.target) + valid_ops[index].append(k.op_type) + # clear the repeated ops for target in valid_targets: @@ -114,7 +129,7 @@ with open(kernel_op_map_dest_path, 'w') as f: f.write('\n'.join(out_lines)) # write kernels into head file for target in valid_targets: - if len(valid_ops[getattr(TargetType, target)]) == 0 : + if len(valid_ops[getattr(TargetType, target)]) == 0: f.write("\n // %s_OPS: " %target) f.write('\n {},') else: -- GitLab