diff --git a/CMakeLists.txt b/CMakeLists.txt index 4793fa924042b3e457d71d209800ed8e71e3dd2d..199b3bda17f4ac22c1d657b6794446832d448440 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF) lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF) # publish options lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) +lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF) # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. if(ANDROID OR IOS OR ARMLINUX) diff --git a/cmake/lite.cmake b/cmake/lite.cmake index 0061ddea32bf4b15a53b1e2742bae70f38c1e040..7d8641d96da86cf9a2be442b797507ac79058efa 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -241,6 +241,10 @@ set(host_kernels CACHE INTERNAL "host kernels") set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt") file(WRITE ${kernels_src_list} "") # clean +if(LITE_BUILD_TAILOR) + set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list") + file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list) +endif() # add a kernel for some specific device # device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA) # level: one of (basic, extra) @@ -252,6 +256,15 @@ function(add_kernel TARGET device level) ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(LITE_BUILD_TAILOR) + foreach(src ${args_SRCS}) + list (FIND tailored_kernels_list ${src} _index) + if (${_index} EQUAL -1) + return() + endif() + endforeach() + endif() + if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA)) return() endif() @@ -338,6 +351,10 @@ endfunction() set(ops CACHE INTERNAL "ops") set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt") file(WRITE ${ops_src_list} "") # clean +if(LITE_BUILD_TAILOR) + set(tailored_ops_list_path "${LITE_OPTMODEL_DIR}/.tailored_ops_source_list") + file(STRINGS ${tailored_ops_list_path} tailored_ops_list) +endif() # add an operator # level: one of (basic, extra) function(add_operator TARGET level) @@ -348,16 +365,24 @@ function(add_operator TARGET level) ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA)) return() endif() - set(ops "${ops};${TARGET}" CACHE INTERNAL "source") foreach(src ${args_SRCS}) + if(LITE_BUILD_TAILOR) + list(FIND tailored_ops_list ${src} _index) + if (${_index} EQUAL -1) + return() + endif() + endif() file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") endforeach() + set(ops "${ops};${TARGET}" CACHE INTERNAL "source") + lite_cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${args_DEPS} X86_DEPS ${args_X86_DEPS} diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 5e2c3e59a311b9a9a678e4fe686a419e8a045350..a2b538aa77e0603f439b6b23aab875103fdbbff0 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -15,6 +15,7 @@ #include "lite/api/cxx_api.h" #include #include +#include #include #include #include @@ -23,8 +24,16 @@ namespace paddle { namespace lite { +static const char TAILORD_OPS_SOURCE_LIST_FILENAME[] = + ".tailored_ops_source_list"; +static const char TAILORD_OPS_LIST_NAME[] = ".tailored_ops_list"; +static const char TAILORD_KERNELS_SOURCE_LIST_FILENAME[] = + ".tailored_kernels_source_list"; +static const char TAILORD_KERNELS_LIST_NAME[] = ".tailored_kernels_list"; + void Predictor::SaveModel(const std::string &dir, - lite_api::LiteModelType model_type) { + lite_api::LiteModelType model_type, + bool record_info) { if (!program_) { GenRuntimeProgram(); } @@ -40,6 +49,83 @@ void Predictor::SaveModel(const std::string &dir, default: LOG(FATAL) << "Unknown model type"; } + if (record_info) { + SaveOpKernelInfo(dir); + } +} + +void Predictor::SaveOpKernelInfo(const std::string &model_dir) { + std::set ops_info; + std::set kernels_info; + const auto &instructions_ = program_->instructions(); + for (auto &node : instructions_) { + // parse op type infomation + auto op = node.op()->op_info(); + ops_info.insert(op->Type()); + // parse kernel type information + std::string kernel_type_str = + node.kernel()->op_type() + "," + TargetRepr(node.kernel()->target()) + + "," + PrecisionRepr(node.kernel()->precision()) + "," + + DataLayoutRepr(node.kernel()->layout()) + "," + node.kernel()->alias(); + kernels_info.insert(kernel_type_str); + } + + // get souce_file name from op type and kernel type + auto op2pathmap = OpKernelInfoCollector::Global().GetOp2PathDict(); + auto kernel2pathmap = OpKernelInfoCollector::Global().GetKernel2PathDict(); + + // write used op and kernel info into files + std::string opf_path = model_dir + "/" + TAILORD_OPS_LIST_NAME; + std::string opf_source_path = + model_dir + "/" + TAILORD_OPS_SOURCE_LIST_FILENAME; + std::string kpf_path = model_dir + "/" + TAILORD_KERNELS_LIST_NAME; + std::string kpf_source_path = + model_dir + "/" + TAILORD_KERNELS_SOURCE_LIST_FILENAME; + std::map op2path; + + std::FILE *opf = std::fopen(opf_path.c_str(), "w"); + std::FILE *opf_source = std::fopen(opf_source_path.c_str(), "w"); + std::FILE *kpf = std::fopen(kpf_path.c_str(), "w"); + std::FILE *kpf_source = std::fopen(kpf_source_path.c_str(), "w"); + std::vector opcompile; + std::vector kernelcompile; + + if (nullptr == opf || nullptr == opf_source || nullptr == opf || + nullptr == kpf_source) { + LOG(FATAL) << "failed to create info file into: " << model_dir; + } + for (auto op_info = ops_info.begin(); op_info != ops_info.end(); op_info++) { + fputs(op_info->c_str(), opf); + fputc('\n', opf); + std::string op_path = op2pathmap[*op_info]; + fputs(op_path.c_str(), opf_source); + fputc('\n', opf_source); + } + std::fclose(opf_source); + std::fclose(opf); + LOG(INFO) << "operators information of tailored model is stored into: " + << opf_path; + + // write Kernel_type and Kernel_path into file + for (auto kernel_info = kernels_info.begin(); + kernel_info != kernels_info.end(); + kernel_info++) { + fputs(kernel_info->c_str(), kpf); + fputc('\n', kpf); + std::string kernel_path = kernel2pathmap[*kernel_info]; + fputs(kernel_path.c_str(), kpf_source); + fputc('\n', kpf_source); + if (kernel_path == "conv_compute.cc") { + fputs( + "conv_depthwise.cc\nconv_direct.cc\nconv_gemmlike.cc\nconv_" + "winograd.cc\n", + kpf_source); + } + } + std::fclose(kpf_source); + std::fclose(kpf); + LOG(INFO) << "kernels information of tailored model is stored into: " + << kpf_path; } lite::Tensor *Predictor::GetInput(size_t offset) { @@ -61,7 +147,7 @@ void Predictor::PrepareFeedFetch() { auto current_block = program_desc_.GetBlock(0); std::vector feeds; std::vector fetchs; - for (int i = 0; i < current_block->OpsSize(); i++) { + for (size_t i = 0; i < current_block->OpsSize(); i++) { auto op = current_block->GetOp(i); if (op->Type() == "feed") { feeds.push_back(op); @@ -71,11 +157,11 @@ void Predictor::PrepareFeedFetch() { } input_names_.resize(feeds.size()); output_names_.resize(fetchs.size()); - for (int i = 0; i < feeds.size(); i++) { + for (size_t i = 0; i < feeds.size(); i++) { input_names_[feeds[i]->GetAttr("col")] = feeds[i]->Output("Out").front(); } - for (int i = 0; i < fetchs.size(); i++) { + for (size_t i = 0; i < fetchs.size(); i++) { output_names_[fetchs[i]->GetAttr("col")] = fetchs[i]->Input("X").front(); } @@ -191,7 +277,7 @@ lite::Tensor *Predictor::GetInputByName(const std::string &name) { if (element == input_names_.end()) { LOG(ERROR) << "Model do not have input named with: [" << name << "], model's inputs include:"; - for (int i = 0; i < input_names_.size(); i++) { + for (size_t i = 0; i < input_names_.size(); i++) { LOG(ERROR) << "[" << input_names_[i] << "]"; } return nullptr; diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index 5a4a6919d7d2386819b05724e0f275e90a0fa119..e1d34172ba578824228e6369a8e37d60972336e9 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -89,7 +89,9 @@ class LITE_API Predictor { // This method is disabled in mobile, for unnecessary dependencies required. void SaveModel( const std::string& dir, - lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf); + lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf, + bool record_info = false); + void SaveOpKernelInfo(const std::string& model_dir); #ifdef LITE_WITH_TRAIN void Run(const std::vector& tensors) { @@ -137,9 +139,10 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor { std::unique_ptr GetInputByName( const std::string& name) override; - void SaveOptimizedModel(const std::string& model_dir, - lite_api::LiteModelType model_type = - lite_api::LiteModelType::kProtobuf) override; + void SaveOptimizedModel( + const std::string& model_dir, + lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf, + bool record_info = false) override; private: Predictor raw_predictor_; diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 3e216a40b50b8bcb3bdfdc2c0fd9aefc415764c0..db225fb78497d5c8f31f90e59c755232adc53222 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -65,8 +65,9 @@ std::unique_ptr CxxPaddleApiImpl::GetInputByName( } void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir, - lite_api::LiteModelType model_type) { - raw_predictor_.SaveModel(model_dir, model_type); + lite_api::LiteModelType model_type, + bool record_info) { + raw_predictor_.SaveModel(model_dir, model_type, record_info); } } // namespace lite diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc index 7dd2077008bc81d328ef206a41384c45159af3b3..1aef522b2a6bb95f895449469f3c13e4a713179a 100644 --- a/lite/api/model_optimize_tool.cc +++ b/lite/api/model_optimize_tool.cc @@ -16,7 +16,10 @@ #ifdef PADDLE_WITH_TESTING #include #endif +// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during +// model_optimize_tool's compiling period #include "all_kernel_faked.cc" // NOLINT +#include "kernel_src_map.h" // NOLINT #include "lite/api/paddle_api.h" #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" @@ -35,6 +38,11 @@ DEFINE_string( "protobuf", "store type of the output optimized model. protobuf/naive_buffer"); DEFINE_bool(display_kernels, false, "Display kernel information"); +DEFINE_bool(record_tailoring_info, + false, + "Record kernels and operators information of the optimized model " + "for tailoring compiling, information are stored into optimized " + "model path as hidden files"); DEFINE_string(optimize_out, "", "path of the output optimized model"); DEFINE_string(valid_targets, "arm", @@ -104,8 +112,14 @@ void Main() { } else { LOG(FATAL) << "Unsupported Model type :" << FLAGS_optimize_out_type; } + OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map); - predictor->SaveOptimizedModel(FLAGS_optimize_out, model_type); + predictor->SaveOptimizedModel( + FLAGS_optimize_out, model_type, FLAGS_record_tailoring_info); + if (FLAGS_record_tailoring_info) { + LOG(INFO) << "Record the information of tailored model into :" + << FLAGS_optimize_out; + } } } // namespace lite_api diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc index e87885c369999470bd0d1d8875cade797630388d..db545de958743b89f99efcde131e1371fdb15409 100644 --- a/lite/api/paddle_api.cc +++ b/lite/api/paddle_api.cc @@ -145,7 +145,8 @@ lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); } void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); } void PaddlePredictor::SaveOptimizedModel(const std::string &model_dir, - LiteModelType model_type) { + LiteModelType model_type, + bool record_info) { LOG(FATAL) << "The SaveOptimizedModel API is only supported by CxxConfig predictor."; } diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index 3886c462ff3192ccb522b741debe6730e3e0e4fb..3e911b62f785b2102685de94377804cf250f57e9 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -97,7 +97,8 @@ class LITE_API PaddlePredictor { /// CxxConfig, and the persisted model can be reused for MobileConfig. virtual void SaveOptimizedModel( const std::string& model_dir, - LiteModelType model_type = LiteModelType::kProtobuf); + LiteModelType model_type = LiteModelType::kProtobuf, + bool record_info = false); virtual ~PaddlePredictor() = default; }; diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index f87c707ddbea0d4e78d195e4529892b321027e8f..69d544c3decac9f312bc9eb03cdc6c3702c5032b 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -64,8 +64,8 @@ TEST(CxxApi, run) { EXPECT_NEAR(out[1], -28.8729, 1e-3); predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2"); - predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2.naive", - LiteModelType::kNaiveBuffer); + predictor->SaveOptimizedModel( + FLAGS_model_dir + ".opt2.naive", LiteModelType::kNaiveBuffer, true); } // Demo1 for Mobile Devices :Load model from file and run diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index f083c97f7ab89640f425de4807cdcd090784fd87..a5b581335047ff18c31ea9d1c03a9785e4ddf2ed 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -71,6 +71,8 @@ add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py ${kernels_src_list} ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h + "${LITE_OPTMODEL_DIR}/.tailored_kernels_list" + LITE_BUILD_TAILOR OUTPUT kernels.h # not a real path to the output to force it execute every time. ) # A trick to generate the paddle_use_ops.h @@ -78,6 +80,8 @@ add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py ${ops_src_list} ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h + "${LITE_OPTMODEL_DIR}/.tailored_ops_list" + LITE_BUILD_TAILOR OUTPUT ops.h # not a real path to the output to force it execute every time. ) # generate fake kernels for memory_optimize_tool @@ -85,6 +89,7 @@ add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py ${kernels_src_list} ${CMAKE_BINARY_DIR}/all_kernel_faked.cc + ${CMAKE_BINARY_DIR}/kernel_src_map.h OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time. ) add_custom_target(op_list_h DEPENDS ops.h) diff --git a/lite/core/op_registry.h b/lite/core/op_registry.h index 13f83c5346cf70d2bb709fe06393cf20af06a2a6..25375b8a8f795e58194d6223f617273beac3b78e 100644 --- a/lite/core/op_registry.h +++ b/lite/core/op_registry.h @@ -32,6 +32,43 @@ using LiteType = paddle::lite::Type; +class OpKernelInfoCollector { + public: + static OpKernelInfoCollector &Global() { + static auto *x = new OpKernelInfoCollector; + return *x; + } + void AddOp2path(const std::string &op_name, const std::string &op_path) { + size_t index = op_path.find_last_of('/'); + if (index != std::string::npos) { + op2path_.insert(std::pair( + op_name, op_path.substr(index + 1))); + } + } + void AddKernel2path(const std::string &kernel_name, + const std::string &kernel_path) { + size_t index = kernel_path.find_last_of('/'); + if (index != std::string::npos) { + kernel2path_.insert(std::pair( + kernel_name, kernel_path.substr(index + 1))); + } + } + void SetKernel2path( + const std::map &kernel2path_map) { + kernel2path_ = kernel2path_map; + } + const std::map &GetOp2PathDict() { + return op2path_; + } + const std::map &GetKernel2PathDict() { + return kernel2path_; + } + + private: + std::map op2path_; + std::map kernel2path_; +}; + namespace paddle { namespace lite { @@ -59,7 +96,6 @@ class OpLiteRegistor : public Registor { }); }) {} }; - template using KernelRegistryForTarget = Factory, std::unique_ptr>; @@ -287,6 +323,7 @@ class KernelRegistor : public lite::Registor { static paddle::lite::OpLiteRegistor LITE_OP_REGISTER_INSTANCE( \ op_type__)(#op_type__); \ int touch_op_##op_type__() { \ + OpKernelInfoCollector::Global().AddOp2path(#op_type__, __FILE__); \ return LITE_OP_REGISTER_INSTANCE(op_type__).Touch(); \ } @@ -312,6 +349,9 @@ class KernelRegistor : public lite::Registor { static KernelClass LITE_KERNEL_INSTANCE( \ op_type__, target__, precision__, layout__, alias__); \ int touch_##op_type__##target__##precision__##layout__##alias__() { \ + OpKernelInfoCollector::Global().AddKernel2path( \ + #op_type__ "," #target__ "," #precision__ "," #layout__ "," #alias__, \ + __FILE__); \ LITE_KERNEL_INSTANCE(op_type__, target__, precision__, layout__, alias__) \ .Touch(); \ return 0; \ diff --git a/lite/tools/build.sh b/lite/tools/build.sh index 8463c1497ad8608358dcf3f5b561419c8af1d0a2..9423b69ee39bc2e06adc0807e4329b882f4705bd 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -17,6 +17,8 @@ BUILD_EXTRA=OFF BUILD_JAVA=ON BUILD_PYTHON=OFF BUILD_DIR=$(pwd) +OPTMODEL_DIR="" +BUILD_TAILOR=OFF readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz @@ -94,6 +96,8 @@ function make_tiny_publish_so { -DLITE_ON_TINY_PUBLISH=ON \ -DANDROID_STL_TYPE=$android_stl \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \ + -DLITE_BUILD_TAILOR=$BUILD_TAILOR \ + -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} make publish_inference -j$NUM_PROC @@ -133,6 +137,8 @@ function make_full_publish_so { -DLITE_SHUTDOWN_LOG=ON \ -DANDROID_STL_TYPE=$android_stl \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \ + -DLITE_BUILD_TAILOR=$BUILD_TAILOR \ + -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} make publish_inference -j4 @@ -317,6 +323,14 @@ function main { BUILD_DIR="${i#*=}" shift ;; + --opt_model_dir=*) + OPTMODEL_DIR="${i#*=}" + shift + ;; + --build_tailor=*) + BUILD_TAILOR="${i#*=}" + shift + ;; tiny_publish) make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL shift diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py index 9edd6d836d9c4eddbc3f9e4c1f78274abdf3b4c2..140d77320704f62dfb2492eec3ad7238fe3868ff 100644 --- a/lite/tools/cmake_tools/create_fake_kernel_registry.py +++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py @@ -20,6 +20,7 @@ from utils import * ops_list_path = sys.argv[1] dest_path = sys.argv[2] +kernelmap_path = sys.argv[3] out_lines = [ '#pragma once', @@ -47,6 +48,31 @@ class %s : public KernelLite { } // namespace paddle ''' +# create .h file to store kernel&source relationship +kernel_src_map_lines = [ +''' +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +// ATTENTION This can only include in a .cc file. + +const std::map kernel2path_map{ + +''' +] with open(ops_list_path) as f: @@ -99,7 +125,23 @@ with open(ops_list_path) as f: out_lines.append("") out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias)) - + index = path.rindex('/') + filename = path[index + 1:] + map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + k.alias, + filename.strip() + ) + kernel_src_map_lines.append(map_element) with open(dest_path, 'w') as f: logging.info("write kernel list to %s" % dest_path) f.write('\n'.join(out_lines)) + +with open(kernelmap_path, 'w') as fd: + logging.info("write kernel map to %s" % dest_path) + kernel_src_map_lines.append(' {" ", " "}') + kernel_src_map_lines.append('};') + fd.write('\n'.join(kernel_src_map_lines)) diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py index 50d28864144de11cde129233f6b9ed2e2a4f644c..f4f0b95483687d3785168c132d30ac8a4fa87c8e 100644 --- a/lite/tools/cmake_tools/parse_kernel_registry.py +++ b/lite/tools/cmake_tools/parse_kernel_registry.py @@ -18,14 +18,19 @@ from ast import RegisterLiteKernelParser ops_list_path = sys.argv[1] dest_path = sys.argv[2] +minkernels_list_path = sys.argv[3] +tailored = sys.argv[4] out_lines = [ '#pragma once', '#include "paddle_lite_factory_helper.h"', '', ] - - +minlines = set() +if tailored == "ON": + with open(minkernels_list_path) as fd: + for line in fd: + minlines.add(line.strip()) with open(ops_list_path) as f: paths = set([path for path in f]) for path in paths: @@ -35,6 +40,15 @@ with open(ops_list_path) as f: kernel_parser.parse() for k in kernel_parser.kernels: + kernel = "%s, %s, %s, %s, %s" % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + k.alias, + ) + if tailored == "ON": + if kernel not in minlines: continue key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % ( k.op_type, k.target, diff --git a/lite/tools/cmake_tools/parse_op_registry.py b/lite/tools/cmake_tools/parse_op_registry.py index 8643475f9e86e3ed57768dd1d35a3e56424652f4..db58c455a9d5863ec0c66d7783871831c73c120f 100644 --- a/lite/tools/cmake_tools/parse_op_registry.py +++ b/lite/tools/cmake_tools/parse_op_registry.py @@ -19,7 +19,8 @@ from ast import RegisterLiteOpParser ops_list_path = sys.argv[1] dest_path = sys.argv[2] - +minops_list_path = sys.argv[3] +tailored = sys.argv[4] out_lines = [ '#pragma once', '#include "paddle_lite_factory_helper.h"', @@ -30,6 +31,11 @@ paths = set() for line in open(ops_list_path): paths.add(line.strip()) +if tailored == "ON": + minlines = set() + with open(minops_list_path) as fd: + for line in fd: + minlines.add(line.strip()) for path in paths: str_info = open(path.strip()).read() op_parser = RegisterLiteOpParser(str_info) @@ -37,6 +43,8 @@ for path in paths: for op in ops: if "_grad" in op: continue + if tailored == "ON": + if op not in minlines: continue out = "USE_LITE_OP(%s);" % op out_lines.append(out)