未验证 提交 5fea8e10 编写于 作者: H huzhiqiang 提交者: GitHub

[Python lib] Add opt lib into python lib (#3209)

上级 90ec3960
...@@ -275,6 +275,11 @@ set(host_kernels CACHE INTERNAL "host kernels") ...@@ -275,6 +275,11 @@ set(host_kernels CACHE INTERNAL "host kernels")
set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt") set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
file(WRITE ${kernels_src_list} "") # clean file(WRITE ${kernels_src_list} "") # clean
# file to record faked kernels for opt python lib
set(fake_kernels_src_list "${CMAKE_BINARY_DIR}/fake_kernels_src_list.txt")
file(WRITE ${fake_kernels_src_list} "") # clean
if(LITE_BUILD_TAILOR) if(LITE_BUILD_TAILOR)
set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list") set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list) file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
...@@ -303,62 +308,74 @@ function(add_kernel TARGET device level) ...@@ -303,62 +308,74 @@ function(add_kernel TARGET device level)
return() return()
endif() endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
# the source list will collect for model_optimize_tool to fake kernel generation.
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return()
endif()
# when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated,
# no need to continue the compilation of the true kernel source.
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
return()
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
if ("${device}" STREQUAL "Host") if ("${device}" STREQUAL "Host")
set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "") set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "ARM") if ("${device}" STREQUAL "ARM")
if (NOT LITE_WITH_ARM) if (NOT LITE_WITH_ARM)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "") set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "X86") if ("${device}" STREQUAL "X86")
if (NOT LITE_WITH_X86) if (NOT LITE_WITH_X86)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return()
elseif (LITE_ON_MODEL_OPTIMIZE_TOOL)
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "") set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "NPU") if ("${device}" STREQUAL "NPU")
if (NOT LITE_WITH_NPU) if (NOT LITE_WITH_NPU)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "") set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "XPU") if ("${device}" STREQUAL "XPU")
if (NOT LITE_WITH_XPU) if (NOT LITE_WITH_XPU)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "") set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "FPGA") if ("${device}" STREQUAL "FPGA")
if (NOT LITE_WITH_FPGA) if (NOT LITE_WITH_FPGA)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "") set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "BM") if ("${device}" STREQUAL "BM")
if (NOT LITE_WITH_BM) if (NOT LITE_WITH_BM)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "") set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "OPENCL") if ("${device}" STREQUAL "OPENCL")
if (NOT LITE_WITH_OPENCL) if (NOT LITE_WITH_OPENCL)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "") set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
...@@ -366,6 +383,9 @@ function(add_kernel TARGET device level) ...@@ -366,6 +383,9 @@ function(add_kernel TARGET device level)
if ("${device}" STREQUAL "CUDA") if ("${device}" STREQUAL "CUDA")
if (NOT LITE_WITH_CUDA) if (NOT LITE_WITH_CUDA)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return() return()
endif() endif()
set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "") set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "")
......
...@@ -303,6 +303,11 @@ if (LITE_ON_TINY_PUBLISH) ...@@ -303,6 +303,11 @@ if (LITE_ON_TINY_PUBLISH)
return() return()
endif() endif()
# add library for opt_base
lite_cc_library(opt_base SRCS opt_base.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc DEPS kernel op optimizer mir_passes utils)
add_dependencies(opt_base supported_kernel_op_info_h framework_proto all_kernel_faked_cc kernel_list_h)
if (LITE_ON_MODEL_OPTIMIZE_TOOL) if (LITE_ON_MODEL_OPTIMIZE_TOOL)
message(STATUS "Compiling opt") message(STATUS "Compiling opt")
lite_cc_binary(opt SRCS opt.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc lite_cc_binary(opt SRCS opt.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc
......
...@@ -58,6 +58,7 @@ void LightPredictorImpl::Run() { ...@@ -58,6 +58,7 @@ void LightPredictorImpl::Run() {
std::shared_ptr<lite_api::PaddlePredictor> LightPredictorImpl::Clone() { std::shared_ptr<lite_api::PaddlePredictor> LightPredictorImpl::Clone() {
LOG(FATAL) << "The Clone API is not supported in LigthPredictor"; LOG(FATAL) << "The Clone API is not supported in LigthPredictor";
return nullptr;
} }
std::string LightPredictorImpl::GetVersion() const { return lite::version(); } std::string LightPredictorImpl::GetVersion() const { return lite::version(); }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/api/opt_base.h"
#include "all_kernel_faked.cc" // NOLINT
namespace paddle {
namespace lite_api {
void OptBase::SetModelDir(const std::string& model_path) {
opt_config_.set_model_dir(model_path);
}
void OptBase::SetModelFile(const std::string& model_path) {
opt_config_.set_model_file(model_path);
}
void OptBase::SetParamFile(const std::string& param_path) {
opt_config_.set_param_file(param_path);
}
void OptBase::SetModelType(std::string optimize_out_type) {
if (optimize_out_type == "protobuf") {
model_type_ = LiteModelType::kProtobuf;
} else if (optimize_out_type == "naive_buffer") {
model_type_ = LiteModelType::kNaiveBuffer;
} else {
LOG(FATAL) << "Unsupported Model type :" << optimize_out_type;
}
}
void OptBase::SetValidPlaces(const std::string& valid_places) {
valid_places_.clear();
auto target_reprs = lite::Split(valid_places, ",");
for (auto& target_repr : target_reprs) {
if (target_repr == "arm") {
valid_places_.emplace_back(TARGET(kARM));
} else if (target_repr == "opencl") {
valid_places_.emplace_back(
Place{TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)});
valid_places_.emplace_back(
Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)});
valid_places_.emplace_back(
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)});
valid_places_.emplace_back(
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)});
valid_places_.emplace_back(
TARGET(kARM)); // enable kARM CPU kernel when no opencl kernel
} else if (target_repr == "x86") {
valid_places_.emplace_back(TARGET(kX86));
} else if (target_repr == "npu") {
valid_places_.emplace_back(TARGET(kNPU));
} else if (target_repr == "xpu") {
valid_places_.emplace_back(TARGET(kXPU));
} else {
LOG(FATAL) << lite::string_format(
"Wrong target '%s' found, please check the command flag "
"'valid_targets'",
target_repr.c_str());
}
}
CHECK(!valid_places_.empty())
<< "At least one target should be set, should set the "
"command argument 'valid_targets'";
}
void OptBase::SetOptimizeOut(const std::string& optimized_out_path) {
optimize_out_path_ = optimized_out_path;
}
void OptBase::RunOptimize(bool record_strip_info) {
CheckIfModelSupported(false);
OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
opt_config_.set_valid_places(valid_places_);
if (model_set_dir_ != "") {
RunOptimizeFromModelSet(record_strip_info);
} else {
auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
opt_predictor->SaveOptimizedModel(
optimize_out_path_, model_type_, record_strip_info);
auto resulted_model_name =
record_strip_info ? "information of striped model" : "optimized model";
std::cout << "Save the " << resulted_model_name
<< " into :" << optimize_out_path_ << "successfully";
}
}
// collect ops info of modelset
void CollectModelMetaInfo(const std::string& output_dir,
const std::vector<std::string>& models,
const std::string& filename) {
std::set<std::string> total;
for (const auto& name : models) {
std::string model_path =
lite::Join<std::string>({output_dir, name, filename}, "/");
auto lines = lite::ReadLines(model_path);
total.insert(lines.begin(), lines.end());
}
std::string output_path =
lite::Join<std::string>({output_dir, filename}, "/");
lite::WriteLines(std::vector<std::string>(total.begin(), total.end()),
output_path);
}
void OptBase::SetModelSetDir(const std::string& model_set_path) {
model_set_dir_ = model_set_path;
}
void OptBase::RunOptimizeFromModelSet(bool record_strip_info) {
// 1. mkdir of outputed optimized model set.
lite::MkDirRecur(optimize_out_path_);
auto model_dirs = lite::ListDir(model_set_dir_, true);
if (model_dirs.size() == 0) {
LOG(FATAL) << "[" << model_set_dir_ << "] does not contain any model";
}
// 2. optimize each model in inputed model set dir.
std::string model_file = opt_config_.model_file();
std::string param_file = opt_config_.param_file();
for (const auto& name : model_dirs) {
std::string input_model_dir =
lite::Join<std::string>({model_set_dir_, name}, "/");
std::string output_model_dir =
lite::Join<std::string>({optimize_out_path_, name}, "/");
if (opt_config_.model_file() != "" && opt_config_.param_file() != "") {
auto model_file_path =
lite::Join<std::string>({input_model_dir, model_file}, "/");
auto param_file_path =
lite::Join<std::string>({input_model_dir, param_file}, "/");
}
std::cout << "Start optimize model: " << input_model_dir;
opt_config_.set_model_dir(input_model_dir);
opt_config_.set_model_file(model_file);
opt_config_.set_param_file(param_file);
auto opt_predictor = lite_api::CreatePaddlePredictor(opt_config_);
opt_predictor->SaveOptimizedModel(
optimize_out_path_, model_type_, record_strip_info);
std::cout << "Optimize done. ";
}
// 3. if record_strip_info = true, we will record striping info
if (record_strip_info) {
// Collect all models information
CollectModelMetaInfo(
optimize_out_path_, model_dirs, lite::TAILORD_OPS_SOURCE_LIST_FILENAME);
CollectModelMetaInfo(
optimize_out_path_, model_dirs, lite::TAILORD_OPS_LIST_NAME);
CollectModelMetaInfo(optimize_out_path_,
model_dirs,
lite::TAILORD_KERNELS_SOURCE_LIST_FILENAME);
CollectModelMetaInfo(
optimize_out_path_, model_dirs, lite::TAILORD_KERNELS_LIST_NAME);
std::cout << "Record the information of stripped models into :"
<< optimize_out_path_ << "successfully";
}
}
void OptBase::PrintHelpInfo() {
const std::string opt_version = lite::version();
const char help_info[] =
"At least one argument should be inputed. Valid arguments are listed "
"below:\n"
" Arguments of help information:\n"
" `help()` Print help infomation\n"
" Arguments of model optimization:\n"
" `set_model_dir(model_dir)`\n"
" `set_model_file(model_file_path)`\n"
" `set_param_file(param_file_path)`\n"
" `set_model_type(protobuf|naive_buffer)`\n"
" `set_optimize_out(output_optimize_model_dir)`\n"
" `set_valid_places(arm|opencl|x86|npu|xpu)`\n"
" `run_optimize(false|true)`\n"
" ` ----fasle&true refer to whether to record ops info for "
"tailoring lib, false by default`\n"
" Arguments of model checking and ops information:\n"
" `print_all_ops()` Display all the valid operators of "
"Paddle-Lite\n"
" `print_supported_ops` Display supported operators of valid "
"places\n"
" `check_if_model_supported()` Check if the input model is "
"supported\n";
std::cout << "opt version:" << opt_version << std::endl
<< help_info << std::endl;
}
// 2. Print supported info of inputed ops
void OptBase::PrintOpsInfo(const std::set<std::string>& valid_ops) {
std::vector<std::string> lite_supported_targets = {"kHost",
"kX86",
"kCUDA",
"kARM",
"kOpenCL",
"kFPGA",
"kNPU",
"kXPU",
"kAny",
"kUnk"};
// Get the lengh of the first column: maximum length of the op_type
size_t maximum_optype_length = 0;
for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) {
maximum_optype_length = it->first.size() > maximum_optype_length
? it->first.size()
: maximum_optype_length;
}
std::cout << std::setiosflags(std::ios::internal);
// Print the first row: OP_nam taget1 target2 ...
std::cout << std::setw(maximum_optype_length) << "OP_name";
for (size_t i = 0; i < lite_supported_targets.size(); i++) {
std::cout << std::setw(10) << lite_supported_targets[i].substr(1);
}
std::cout << std::endl;
// Print the name of supported ops and mark if it's supported by each target
// print the support info of inputed ops: valid_ops
for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) {
std::cout << std::setw(maximum_optype_length) << *op;
// Check: If this kernel doesn't match any operator, we will skip it.
if (supported_ops.find(*op) == supported_ops.end()) {
continue;
}
// Print OP info.
auto ops_valid_places = supported_ops.at(*op);
for (size_t i = 0; i < lite_supported_targets.size(); i++) {
if (std::find(ops_valid_places.begin(),
ops_valid_places.end(),
lite_supported_targets[i]) != ops_valid_places.end()) {
std::cout << std::setw(10) << "Y";
} else {
std::cout << std::setw(10) << " ";
}
}
std::cout << std::endl;
}
}
void OptBase::DisplayKernelsInfo() { // Display kernel information
std::cout << ::paddle::lite::KernelRegistry::Global().DebugString();
}
void OptBase::PrintAllOps() {
// 1. Get supported ops on these targets
std::set<std::string> valid_ops;
for (size_t i = 0; i < supported_ops_target.size(); i++) {
auto ops = supported_ops_target[i];
valid_ops.insert(ops.begin(), ops.end());
}
// 2. Print support info of these ops
PrintOpsInfo(valid_ops);
}
void OptBase::PrintSupportedOps() {
// 1. Get the valid hardware targets
std::vector<TargetType> target_types = {};
for (size_t i = 0; i < valid_places_.size(); i++) {
target_types.push_back(valid_places_[i].target);
}
std::string targets_str = TargetToStr(target_types[0]);
for (size_t i = 1; i < target_types.size(); i++) {
targets_str = targets_str + TargetToStr(target_types[i]);
}
std::cout << "Supported OPs on '" << targets_str << "': " << std::endl;
target_types.push_back(TARGET(kHost));
target_types.push_back(TARGET(kUnk));
// 2. Get supported ops on these targets
std::set<std::string> valid_ops;
for (size_t i = 0; i < target_types.size(); i++) {
auto ops = supported_ops_target[static_cast<int>(target_types[i])];
valid_ops.insert(ops.begin(), ops.end());
}
// 3. Print support info of these ops
PrintOpsInfo(valid_ops);
}
// test whether this model is supported
void OptBase::CheckIfModelSupported(bool print_ops_info) {
// 1. parse valid places and valid targets
auto valid_ops = supported_ops_target[static_cast<int>(TARGET(kHost))];
auto valid_unktype_ops = supported_ops_target[static_cast<int>(TARGET(kUnk))];
valid_ops.insert(
valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end());
for (size_t i = 0; i < valid_places_.size(); i++) {
auto target = valid_places_[i].target;
auto ops = supported_ops_target[static_cast<int>(target)];
valid_ops.insert(valid_ops.end(), ops.begin(), ops.end());
}
// get valid ops
std::set<std::string> valid_ops_set(valid_ops.begin(), valid_ops.end());
// 2.Load model into program to get ops in model
std::string prog_path = opt_config_.model_dir() + "/__model__";
if (!(opt_config_.model_file()).empty() &&
!(opt_config_.param_file()).empty()) {
prog_path = opt_config_.model_file();
}
lite::cpp::ProgramDesc cpp_prog;
framework::proto::ProgramDesc pb_proto_prog =
*lite::LoadProgram(prog_path, false);
lite::pb::ProgramDesc pb_prog(&pb_proto_prog);
// Transform to cpp::ProgramDesc
lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog);
std::set<std::string> unsupported_ops;
std::set<std::string> input_model_ops;
for (size_t index = 0; index < cpp_prog.BlocksSize(); index++) {
auto current_block = cpp_prog.GetBlock<lite::cpp::BlockDesc>(index);
for (size_t i = 0; i < current_block->OpsSize(); ++i) {
auto& op_desc = *current_block->GetOp<lite::cpp::OpDesc>(i);
auto op_type = op_desc.Type();
input_model_ops.insert(op_type);
if (valid_ops_set.count(op_type) == 0) {
unsupported_ops.insert(op_type);
}
}
}
// 3. Print ops_info of input model and check if this model is supported
if (print_ops_info) {
std::cout << "OPs in the input model include:\n";
PrintOpsInfo(input_model_ops);
}
if (!unsupported_ops.empty()) {
std::string unsupported_ops_str = *unsupported_ops.begin();
for (auto op_str = ++unsupported_ops.begin();
op_str != unsupported_ops.end();
op_str++) {
unsupported_ops_str = unsupported_ops_str + ", " + *op_str;
}
std::vector<TargetType> targets = {};
for (size_t i = 0; i < valid_places_.size(); i++) {
targets.push_back(valid_places_[i].target);
}
std::sort(targets.begin(), targets.end());
targets.erase(unique(targets.begin(), targets.end()), targets.end());
std::string targets_str = TargetToStr(targets[0]);
for (size_t i = 1; i < targets.size(); i++) {
targets_str = targets_str + "," + TargetToStr(targets[i]);
}
LOG(ERROR) << "Error: This model is not supported, because "
<< unsupported_ops.size() << " ops are not supported on '"
<< targets_str << "'. These unsupported ops are: '"
<< unsupported_ops_str << "'.";
exit(1);
}
if (print_ops_info) {
std::cout << "Paddle-Lite supports this model!" << std::endl;
exit(1);
}
}
} // namespace lite_api
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* This file defines Opt and basic functions about model transformation.
*/
#ifndef PADDLE_LITE_OPT_H_ // NOLINT
#define PADDLE_LITE_OPT_H_
#include <algorithm>
#include <iomanip>
#include <set>
#include <string>
#include <vector>
// stores the map that records the source_file path of each kernel.
#include "kernel_src_map.h" // NOLINT
#include "lite/api/cxx_api.h"
// version of Paddle-lite
#include "lite/core/version.h"
// model parser functions to pre-load model to verify if this model is supported
#include "lite/model_parser/compatible_pb.h"
#include "lite/model_parser/pb/program_desc.h"
#include "lite/utils/string.h"
// recorded all the ops supported by paddle-lite
#include "supported_kernel_op_info.h" // NOLINT
namespace paddle {
namespace lite_api {
/// The PaddlePredictor defines the basic interfaces for different kinds of
/// predictors.
class LITE_API OptBase {
public:
OptBase() = default;
void SetModelSetDir(const std::string &model_set_path);
void SetModelDir(const std::string &model_path);
void SetModelFile(const std::string &model_path);
void SetParamFile(const std::string &param_path);
void SetValidPlaces(const std::string &valid_places);
void SetOptimizeOut(const std::string &optimized_out_path);
// set optimized_model type
void SetModelType(std::string model_type);
// transform and save the optimized model
void RunOptimize(bool record_strip_info = false);
// fuctions of printing info
// 1. help info
void PrintHelpInfo();
// 2. PrintOpsInfo
void PrintOpsInfo(const std::set<std::string> &valid_ops =
{}); // print supported ops on target_types
void PrintAllOps(); // print all ops
void PrintSupportedOps(); // print ops supported on valid_places_
void DisplayKernelsInfo(); // Display kernel information
// 3. Check if this model is supported
void CheckIfModelSupported(bool print_ops_info = true);
private:
CxxConfig opt_config_;
// valid places for the optimized_model
std::vector<Place> valid_places_;
// filename of the optimized_model
std::string optimize_out_path_;
// type of the optimized_model, kNaiveBuffer default.
LiteModelType model_type_{LiteModelType::kNaiveBuffer};
// Dir path of a set of models, this should be combined with model
std::string model_set_dir_;
void RunOptimizeFromModelSet(bool record_strip_info = false);
};
} // namespace lite_api
} // namespace paddle
#endif // NOLINT
set(PYBIND_DEPS pybind python paddle_api_light paddle_api) set(PYBIND_DEPS pybind python paddle_api_light paddle_api)
if (NOT LITE_ON_TINY_PUBLISH) if (NOT LITE_ON_TINY_PUBLISH)
set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full) set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full opt_base)
endif() endif()
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS}) lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#ifndef LITE_ON_TINY_PUBLISH #ifndef LITE_ON_TINY_PUBLISH
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include "lite/api/opt_base.h"
#endif #endif
#include "lite/api/light_api.h" #include "lite/api/light_api.h"
...@@ -47,10 +48,27 @@ using lite_api::PrecisionType; ...@@ -47,10 +48,27 @@ using lite_api::PrecisionType;
using lite_api::DataLayoutType; using lite_api::DataLayoutType;
using lite_api::Place; using lite_api::Place;
using lite::LightPredictorImpl; using lite::LightPredictorImpl;
using lite_api::OptBase;
#ifndef LITE_ON_TINY_PUBLISH #ifndef LITE_ON_TINY_PUBLISH
using lite::CxxPaddleApiImpl; using lite::CxxPaddleApiImpl;
static void BindLiteCxxPredictor(py::module *m); static void BindLiteCxxPredictor(py::module *m);
void BindLiteOpt(py::module *m) {
py::class_<OptBase> opt_base(*m, "Opt");
opt_base.def(py::init<>())
.def("set_model_dir", &OptBase::SetModelDir)
.def("set_modelset_dir", &OptBase::SetModelSetDir)
.def("set_model_file", &OptBase::SetModelFile)
.def("set_param_file", &OptBase::SetParamFile)
.def("set_valid_places", &OptBase::SetValidPlaces)
.def("set_optimize_out", &OptBase::SetOptimizeOut)
.def("set_model_type", &OptBase::SetModelType)
.def("run_optimize", &OptBase::RunOptimize)
.def("help", &OptBase::PrintHelpInfo)
.def("print_supported_ops", &OptBase::PrintSupportedOps)
.def("display_kernels_info", &OptBase::DisplayKernelsInfo)
.def("print_all_ops", &OptBase::PrintAllOps);
}
#endif #endif
static void BindLiteLightPredictor(py::module *m); static void BindLiteLightPredictor(py::module *m);
static void BindLiteCxxConfig(py::module *m); static void BindLiteCxxConfig(py::module *m);
......
...@@ -22,11 +22,15 @@ namespace lite { ...@@ -22,11 +22,15 @@ namespace lite {
namespace pybind { namespace pybind {
void BindLiteApi(pybind11::module *m); void BindLiteApi(pybind11::module *m);
void BindLiteOpt(pybind11::module *m);
PYBIND11_MODULE(lite, m) { PYBIND11_MODULE(lite, m) {
m.doc() = "C++ core of Paddle-Lite"; m.doc() = "C++ core of Paddle-Lite";
BindLiteApi(&m); BindLiteApi(&m);
#ifndef LITE_ON_TINY_PUBLISH
BindLiteOpt(&m);
#endif
} }
} // namespace pybind } // namespace pybind
......
...@@ -93,9 +93,13 @@ add_custom_command( ...@@ -93,9 +93,13 @@ add_custom_command(
OUTPUT ops.h # not a real path to the output to force it execute every time. OUTPUT ops.h # not a real path to the output to force it execute every time.
) )
# generate fake kernels for memory_optimize_tool # generate fake kernels for memory_optimize_tool
#-------------------------------opt----------------------------------------------------------------
# tricks to create headfiles for opt
add_custom_command( add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
${kernels_src_list} ${kernels_src_list}
${fake_kernels_src_list}
${CMAKE_BINARY_DIR}/all_kernel_faked.cc ${CMAKE_BINARY_DIR}/all_kernel_faked.cc
${CMAKE_BINARY_DIR}/kernel_src_map.h ${CMAKE_BINARY_DIR}/kernel_src_map.h
OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time. OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
...@@ -103,12 +107,12 @@ add_custom_command( ...@@ -103,12 +107,12 @@ add_custom_command(
add_custom_target(op_list_h DEPENDS ops.h) add_custom_target(op_list_h DEPENDS ops.h)
add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(kernel_list_h DEPENDS kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc)
# create headfile to restore ops info sorted by suppported platforms # create headfile to restore ops info sorted by suppported platforms
add_custom_command( add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
${kernels_src_list} ${kernels_src_list}
${fake_kernels_src_list}
${ops_src_list} ${ops_src_list}
${CMAKE_BINARY_DIR}/supported_kernel_op_info.h ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h
OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time. OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time.
......
...@@ -395,7 +395,7 @@ class ContextScheduler { ...@@ -395,7 +395,7 @@ class ContextScheduler {
break; break;
#endif #endif
default: default:
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL #if (!defined LITE_ON_MODEL_OPTIMIZE_TOOL) && (!defined LITE_WITH_PYTHON)
LOG(FATAL) << "unsupported target " << TargetToStr(target); LOG(FATAL) << "unsupported target " << TargetToStr(target);
#endif #endif
break; break;
......
# NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered
# to the model_optimize_tool. # to the model_optimize_tool.
if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))) if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
return() return()
endif() endif()
......
if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA)) if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_CUDA))
return() return()
endif() endif()
......
if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA)) if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA))
return() return()
endif() endif()
......
if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_OPENCL)) if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_OPENCL))
return () return ()
endif() endif()
......
...@@ -358,6 +358,7 @@ function make_x86 { ...@@ -358,6 +358,7 @@ function make_x86 {
-DLITE_WITH_ARM=OFF \ -DLITE_WITH_ARM=OFF \
-DLITE_WITH_PYTHON=$BUILD_PYTHON \ -DLITE_WITH_PYTHON=$BUILD_PYTHON \
-DWITH_GPU=OFF \ -DWITH_GPU=OFF \
-DLITE_WITH_PYTHON=${BUILD_PYTHON} \
-DLITE_BUILD_EXTRA=ON \ -DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_XPU=$BUID_XPU \ -DLITE_WITH_XPU=$BUID_XPU \
-DXPU_SDK_ROOT=$XPU_SDK_ROOT -DXPU_SDK_ROOT=$XPU_SDK_ROOT
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# this module will record kernels in unvalid_places into all_kernel_faked.cc
from __future__ import print_function from __future__ import print_function
import sys import sys
...@@ -18,12 +19,13 @@ import logging ...@@ -18,12 +19,13 @@ import logging
from ast import RegisterLiteKernelParser from ast import RegisterLiteKernelParser
from utils import * from utils import *
if len(sys.argv) != 4: if len(sys.argv) != 5:
print("Error: create_fake_kernel_registry.py requires three inputs!") print("Error: create_fake_kernel_registry.py requires three inputs!")
exit(1) exit(1)
ops_list_path = sys.argv[1] kernels_list_path = sys.argv[1]
dest_path = sys.argv[2] faked_kernels_list_path = sys.argv[2]
kernelmap_path = sys.argv[3] dest_path = sys.argv[3]
kernelmap_path = sys.argv[4]
out_lines = [ out_lines = [
'#pragma once', '#pragma once',
...@@ -77,68 +79,85 @@ const std::map<std::string, std::string> kernel2path_map{ ...@@ -77,68 +79,85 @@ const std::map<std::string, std::string> kernel2path_map{
''' '''
] ]
def parse_fake_kernels_from_path(list_path):
with open(list_path) as f:
paths = set([path for path in f])
for path in paths:
print('path', path)
with open(path.strip()) as g:
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
op_type=k.op_type,
target=k.target,
precision=k.precision,
data_layout=k.data_layout,
alias=k.alias
)
kernel_define = fake_kernel % (
kernel_name,
k.target,
k.precision,
k.data_layout,
kernel_name
)
out_lines.append(kernel_define)
out_lines.append("")
key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
k.op_type,
k.target,
k.precision,
k.data_layout,
'::paddle::lite::' + kernel_name,
k.alias
)
out_lines.append(key)
for input in k.inputs:
io = ' .BindInput("%s", {%s})' % (input.name, input.type)
out_lines.append(io)
for output in k.outputs:
io = ' .BindOutput("%s", {%s})' % (output.name, output.type)
out_lines.append(io)
out_lines.append(" .Finalize();")
out_lines.append("")
out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
def parse_sppported_kernels_from_path(list_path):
with open(list_path) as f:
paths = set([path for path in f])
for path in paths:
print('path', path)
with open(path.strip()) as g:
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
index = path.rindex('/')
filename = path[index + 1:]
map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
filename.strip()
)
kernel_src_map_lines.append(map_element)
parse_fake_kernels_from_path(faked_kernels_list_path)
parse_sppported_kernels_from_path(faked_kernels_list_path)
parse_sppported_kernels_from_path(kernels_list_path)
with open(ops_list_path) as f:
paths = set([path for path in f])
for path in paths:
print('path', path)
with open(path.strip()) as g:
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
op_type = k.op_type,
target = k.target,
precision = k.precision,
data_layout = k.data_layout,
alias = k.alias,
)
kernel_define = fake_kernel % (
kernel_name,
k.target,
k.precision,
k.data_layout,
kernel_name,
)
out_lines.append(kernel_define)
out_lines.append("")
key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
k.op_type,
k.target,
k.precision,
k.data_layout,
'::paddle::lite::' + kernel_name,
k.alias,
)
out_lines.append(key)
for input in k.inputs:
io = ' .BindInput("%s", {%s})' % (input.name, input.type)
out_lines.append(io)
for output in k.outputs:
io = ' .BindOutput("%s", {%s})' % (output.name, output.type)
out_lines.append(io)
out_lines.append(" .Finalize();")
out_lines.append("")
out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
index = path.rindex('/')
filename = path[index + 1:]
map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
filename.strip()
)
kernel_src_map_lines.append(map_element)
with open(dest_path, 'w') as f: with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path) logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines)) f.write('\n'.join(out_lines))
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# this module will record supported ops from kernels_src.txt
from __future__ import print_function from __future__ import print_function
import sys import sys
...@@ -18,12 +19,13 @@ import logging ...@@ -18,12 +19,13 @@ import logging
from ast import RegisterLiteKernelParser from ast import RegisterLiteKernelParser
from ast import RegisterLiteOpParser from ast import RegisterLiteOpParser
if len(sys.argv) != 4: if len(sys.argv) != 5:
print("Error: record_supported_kernel_op.py requires three inputs!") print("Error: record_supported_kernel_op.py requires four inputs!")
exit(1) sys.exit(1)
kernels_list_path = sys.argv[1] kernels_list_path = sys.argv[1]
ops_list_path = sys.argv[2] faked_kernels_list_path = sys.argv[2]
kernel_op_map_dest_path = sys.argv[3] ops_list_path = sys.argv[3]
kernel_op_map_dest_path = sys.argv[4]
out_lines = [ out_lines = [
...@@ -51,11 +53,11 @@ const std::vector<std::vector<std::string>> supported_ops_target = { ...@@ -51,11 +53,11 @@ const std::vector<std::vector<std::string>> supported_ops_target = {
''' '''
] ]
ops_lines=[] ops_lines = []
# valid targets and valid_ops # valid targets and valid_ops
valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"] valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"]
valid_ops = [[],[],[],[],[],[],[],[],[],[]] valid_ops = [[], [], [], [], [], [], [], [], [], []]
class TargetType: class TargetType:
kUnk = 0 kUnk = 0
kHost = 1 kHost = 1
...@@ -78,8 +80,21 @@ with open(kernels_list_path) as f: ...@@ -78,8 +80,21 @@ with open(kernels_list_path) as f:
kernel_parser.parse() kernel_parser.parse()
for k in kernel_parser.kernels: for k in kernel_parser.kernels:
if hasattr(TargetType, k.target): if hasattr(TargetType, k.target):
index=getattr(TargetType, k.target) index = getattr(TargetType, k.target)
valid_ops[index].append(k.op_type) valid_ops[index].append(k.op_type)
# record op_info of valid kernels into `valid_ops` according to different target type
with open(faked_kernels_list_path) as f:
paths = set([path for path in f])
for path in paths:
with open(path.strip()) as g:
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
if hasattr(TargetType, k.target):
index = getattr(TargetType, k.target)
valid_ops[index].append(k.op_type)
# clear the repeated ops # clear the repeated ops
for target in valid_targets: for target in valid_targets:
...@@ -114,7 +129,7 @@ with open(kernel_op_map_dest_path, 'w') as f: ...@@ -114,7 +129,7 @@ with open(kernel_op_map_dest_path, 'w') as f:
f.write('\n'.join(out_lines)) f.write('\n'.join(out_lines))
# write kernels into head file # write kernels into head file
for target in valid_targets: for target in valid_targets:
if len(valid_ops[getattr(TargetType, target)]) == 0 : if len(valid_ops[getattr(TargetType, target)]) == 0:
f.write("\n // %s_OPS: " %target) f.write("\n // %s_OPS: " %target)
f.write('\n {},') f.write('\n {},')
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册