提交 b16917a4 编写于 作者: H huzhiqiang 提交者: GitHub

model dynamic library tailoring (#2256)

* add shell file to automatically build and collect publish result test=develop
* modify API inference of model_optimize_tool and add option for tiny&full publish test=develop
上级 e914f0da
......@@ -70,6 +70,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
# publish options
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX)
......
......@@ -241,6 +241,10 @@ set(host_kernels CACHE INTERNAL "host kernels")
set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
file(WRITE ${kernels_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# level: one of (basic, extra)
......@@ -252,6 +256,15 @@ function(add_kernel TARGET device level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(LITE_BUILD_TAILOR)
foreach(src ${args_SRCS})
list (FIND tailored_kernels_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endforeach()
endif()
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
......@@ -338,6 +351,10 @@ endfunction()
set(ops CACHE INTERNAL "ops")
set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt")
file(WRITE ${ops_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_ops_list_path "${LITE_OPTMODEL_DIR}/.tailored_ops_source_list")
file(STRINGS ${tailored_ops_list_path} tailored_ops_list)
endif()
# add an operator
# level: one of (basic, extra)
function(add_operator TARGET level)
......@@ -348,16 +365,24 @@ function(add_operator TARGET level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
foreach(src ${args_SRCS})
if(LITE_BUILD_TAILOR)
list(FIND tailored_ops_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endif()
file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
......
......@@ -15,6 +15,7 @@
#include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
......@@ -23,8 +24,16 @@
namespace paddle {
namespace lite {
static const char TAILORD_OPS_SOURCE_LIST_FILENAME[] =
".tailored_ops_source_list";
static const char TAILORD_OPS_LIST_NAME[] = ".tailored_ops_list";
static const char TAILORD_KERNELS_SOURCE_LIST_FILENAME[] =
".tailored_kernels_source_list";
static const char TAILORD_KERNELS_LIST_NAME[] = ".tailored_kernels_list";
void Predictor::SaveModel(const std::string &dir,
lite_api::LiteModelType model_type) {
lite_api::LiteModelType model_type,
bool record_info) {
if (!program_) {
GenRuntimeProgram();
}
......@@ -40,6 +49,83 @@ void Predictor::SaveModel(const std::string &dir,
default:
LOG(FATAL) << "Unknown model type";
}
if (record_info) {
SaveOpKernelInfo(dir);
}
}
void Predictor::SaveOpKernelInfo(const std::string &model_dir) {
std::set<std::string> ops_info;
std::set<std::string> kernels_info;
const auto &instructions_ = program_->instructions();
for (auto &node : instructions_) {
// parse op type infomation
auto op = node.op()->op_info();
ops_info.insert(op->Type());
// parse kernel type information
std::string kernel_type_str =
node.kernel()->op_type() + "," + TargetRepr(node.kernel()->target()) +
"," + PrecisionRepr(node.kernel()->precision()) + "," +
DataLayoutRepr(node.kernel()->layout()) + "," + node.kernel()->alias();
kernels_info.insert(kernel_type_str);
}
// get souce_file name from op type and kernel type
auto op2pathmap = OpKernelInfoCollector::Global().GetOp2PathDict();
auto kernel2pathmap = OpKernelInfoCollector::Global().GetKernel2PathDict();
// write used op and kernel info into files
std::string opf_path = model_dir + "/" + TAILORD_OPS_LIST_NAME;
std::string opf_source_path =
model_dir + "/" + TAILORD_OPS_SOURCE_LIST_FILENAME;
std::string kpf_path = model_dir + "/" + TAILORD_KERNELS_LIST_NAME;
std::string kpf_source_path =
model_dir + "/" + TAILORD_KERNELS_SOURCE_LIST_FILENAME;
std::map<std::string, std::string> op2path;
std::FILE *opf = std::fopen(opf_path.c_str(), "w");
std::FILE *opf_source = std::fopen(opf_source_path.c_str(), "w");
std::FILE *kpf = std::fopen(kpf_path.c_str(), "w");
std::FILE *kpf_source = std::fopen(kpf_source_path.c_str(), "w");
std::vector<std::string> opcompile;
std::vector<std::string> kernelcompile;
if (nullptr == opf || nullptr == opf_source || nullptr == opf ||
nullptr == kpf_source) {
LOG(FATAL) << "failed to create info file into: " << model_dir;
}
for (auto op_info = ops_info.begin(); op_info != ops_info.end(); op_info++) {
fputs(op_info->c_str(), opf);
fputc('\n', opf);
std::string op_path = op2pathmap[*op_info];
fputs(op_path.c_str(), opf_source);
fputc('\n', opf_source);
}
std::fclose(opf_source);
std::fclose(opf);
LOG(INFO) << "operators information of tailored model is stored into: "
<< opf_path;
// write Kernel_type and Kernel_path into file
for (auto kernel_info = kernels_info.begin();
kernel_info != kernels_info.end();
kernel_info++) {
fputs(kernel_info->c_str(), kpf);
fputc('\n', kpf);
std::string kernel_path = kernel2pathmap[*kernel_info];
fputs(kernel_path.c_str(), kpf_source);
fputc('\n', kpf_source);
if (kernel_path == "conv_compute.cc") {
fputs(
"conv_depthwise.cc\nconv_direct.cc\nconv_gemmlike.cc\nconv_"
"winograd.cc\n",
kpf_source);
}
}
std::fclose(kpf_source);
std::fclose(kpf);
LOG(INFO) << "kernels information of tailored model is stored into: "
<< kpf_path;
}
lite::Tensor *Predictor::GetInput(size_t offset) {
......@@ -61,7 +147,7 @@ void Predictor::PrepareFeedFetch() {
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc *> feeds;
std::vector<cpp::OpDesc *> fetchs;
for (int i = 0; i < current_block->OpsSize(); i++) {
for (size_t i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") {
feeds.push_back(op);
......@@ -71,11 +157,11 @@ void Predictor::PrepareFeedFetch() {
}
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (int i = 0; i < feeds.size(); i++) {
for (size_t i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (int i = 0; i < fetchs.size(); i++) {
for (size_t i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
......@@ -191,7 +277,7 @@ lite::Tensor *Predictor::GetInputByName(const std::string &name) {
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:";
for (int i = 0; i < input_names_.size(); i++) {
for (size_t i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]";
}
return nullptr;
......
......@@ -89,7 +89,9 @@ class LITE_API Predictor {
// This method is disabled in mobile, for unnecessary dependencies required.
void SaveModel(
const std::string& dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf);
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false);
void SaveOpKernelInfo(const std::string& model_dir);
#ifdef LITE_WITH_TRAIN
void Run(const std::vector<framework::Tensor>& tensors) {
......@@ -137,9 +139,10 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
std::unique_ptr<lite_api::Tensor> GetInputByName(
const std::string& name) override;
void SaveOptimizedModel(const std::string& model_dir,
lite_api::LiteModelType model_type =
lite_api::LiteModelType::kProtobuf) override;
void SaveOptimizedModel(
const std::string& model_dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false) override;
private:
Predictor raw_predictor_;
......
......@@ -65,8 +65,9 @@ std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
}
void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir,
lite_api::LiteModelType model_type) {
raw_predictor_.SaveModel(model_dir, model_type);
lite_api::LiteModelType model_type,
bool record_info) {
raw_predictor_.SaveModel(model_dir, model_type, record_info);
}
} // namespace lite
......
......@@ -16,7 +16,10 @@
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#endif
// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
// model_optimize_tool's compiling period
#include "all_kernel_faked.cc" // NOLINT
#include "kernel_src_map.h" // NOLINT
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
......@@ -35,6 +38,11 @@ DEFINE_string(
"protobuf",
"store type of the output optimized model. protobuf/naive_buffer");
DEFINE_bool(display_kernels, false, "Display kernel information");
DEFINE_bool(record_tailoring_info,
false,
"Record kernels and operators information of the optimized model "
"for tailoring compiling, information are stored into optimized "
"model path as hidden files");
DEFINE_string(optimize_out, "", "path of the output optimized model");
DEFINE_string(valid_targets,
"arm",
......@@ -104,8 +112,14 @@ void Main() {
} else {
LOG(FATAL) << "Unsupported Model type :" << FLAGS_optimize_out_type;
}
OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
predictor->SaveOptimizedModel(FLAGS_optimize_out, model_type);
predictor->SaveOptimizedModel(
FLAGS_optimize_out, model_type, FLAGS_record_tailoring_info);
if (FLAGS_record_tailoring_info) {
LOG(INFO) << "Record the information of tailored model into :"
<< FLAGS_optimize_out;
}
}
} // namespace lite_api
......
......@@ -145,7 +145,8 @@ lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); }
void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); }
void PaddlePredictor::SaveOptimizedModel(const std::string &model_dir,
LiteModelType model_type) {
LiteModelType model_type,
bool record_info) {
LOG(FATAL)
<< "The SaveOptimizedModel API is only supported by CxxConfig predictor.";
}
......
......@@ -97,7 +97,8 @@ class LITE_API PaddlePredictor {
/// CxxConfig, and the persisted model can be reused for MobileConfig.
virtual void SaveOptimizedModel(
const std::string& model_dir,
LiteModelType model_type = LiteModelType::kProtobuf);
LiteModelType model_type = LiteModelType::kProtobuf,
bool record_info = false);
virtual ~PaddlePredictor() = default;
};
......
......@@ -64,8 +64,8 @@ TEST(CxxApi, run) {
EXPECT_NEAR(out[1], -28.8729, 1e-3);
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2");
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2.naive",
LiteModelType::kNaiveBuffer);
predictor->SaveOptimizedModel(
FLAGS_model_dir + ".opt2.naive", LiteModelType::kNaiveBuffer, true);
}
// Demo1 for Mobile Devices :Load model from file and run
......
......@@ -71,6 +71,8 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
${kernels_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
"${LITE_OPTMODEL_DIR}/.tailored_kernels_list"
LITE_BUILD_TAILOR
OUTPUT kernels.h # not a real path to the output to force it execute every time.
)
# A trick to generate the paddle_use_ops.h
......@@ -78,6 +80,8 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py
${ops_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
"${LITE_OPTMODEL_DIR}/.tailored_ops_list"
LITE_BUILD_TAILOR
OUTPUT ops.h # not a real path to the output to force it execute every time.
)
# generate fake kernels for memory_optimize_tool
......@@ -85,6 +89,7 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
${kernels_src_list}
${CMAKE_BINARY_DIR}/all_kernel_faked.cc
${CMAKE_BINARY_DIR}/kernel_src_map.h
OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
)
add_custom_target(op_list_h DEPENDS ops.h)
......
......@@ -32,6 +32,43 @@
using LiteType = paddle::lite::Type;
class OpKernelInfoCollector {
public:
static OpKernelInfoCollector &Global() {
static auto *x = new OpKernelInfoCollector;
return *x;
}
void AddOp2path(const std::string &op_name, const std::string &op_path) {
size_t index = op_path.find_last_of('/');
if (index != std::string::npos) {
op2path_.insert(std::pair<std::string, std::string>(
op_name, op_path.substr(index + 1)));
}
}
void AddKernel2path(const std::string &kernel_name,
const std::string &kernel_path) {
size_t index = kernel_path.find_last_of('/');
if (index != std::string::npos) {
kernel2path_.insert(std::pair<std::string, std::string>(
kernel_name, kernel_path.substr(index + 1)));
}
}
void SetKernel2path(
const std::map<std::string, std::string> &kernel2path_map) {
kernel2path_ = kernel2path_map;
}
const std::map<std::string, std::string> &GetOp2PathDict() {
return op2path_;
}
const std::map<std::string, std::string> &GetKernel2PathDict() {
return kernel2path_;
}
private:
std::map<std::string, std::string> op2path_;
std::map<std::string, std::string> kernel2path_;
};
namespace paddle {
namespace lite {
......@@ -59,7 +96,6 @@ class OpLiteRegistor : public Registor<OpClass> {
});
}) {}
};
template <TargetType Target, PrecisionType Precision, DataLayoutType Layout>
using KernelRegistryForTarget =
Factory<KernelLite<Target, Precision, Layout>, std::unique_ptr<KernelBase>>;
......@@ -287,6 +323,7 @@ class KernelRegistor : public lite::Registor<KernelType> {
static paddle::lite::OpLiteRegistor<OpClass> LITE_OP_REGISTER_INSTANCE( \
op_type__)(#op_type__); \
int touch_op_##op_type__() { \
OpKernelInfoCollector::Global().AddOp2path(#op_type__, __FILE__); \
return LITE_OP_REGISTER_INSTANCE(op_type__).Touch(); \
}
......@@ -312,6 +349,9 @@ class KernelRegistor : public lite::Registor<KernelType> {
static KernelClass LITE_KERNEL_INSTANCE( \
op_type__, target__, precision__, layout__, alias__); \
int touch_##op_type__##target__##precision__##layout__##alias__() { \
OpKernelInfoCollector::Global().AddKernel2path( \
#op_type__ "," #target__ "," #precision__ "," #layout__ "," #alias__, \
__FILE__); \
LITE_KERNEL_INSTANCE(op_type__, target__, precision__, layout__, alias__) \
.Touch(); \
return 0; \
......
......@@ -17,6 +17,8 @@ BUILD_EXTRA=OFF
BUILD_JAVA=ON
BUILD_PYTHON=OFF
BUILD_DIR=$(pwd)
OPTMODEL_DIR=""
BUILD_TAILOR=OFF
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
......@@ -94,6 +96,8 @@ function make_tiny_publish_so {
-DLITE_ON_TINY_PUBLISH=ON \
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make publish_inference -j$NUM_PROC
......@@ -133,6 +137,8 @@ function make_full_publish_so {
-DLITE_SHUTDOWN_LOG=ON \
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make publish_inference -j4
......@@ -317,6 +323,14 @@ function main {
BUILD_DIR="${i#*=}"
shift
;;
--opt_model_dir=*)
OPTMODEL_DIR="${i#*=}"
shift
;;
--build_tailor=*)
BUILD_TAILOR="${i#*=}"
shift
;;
tiny_publish)
make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL
shift
......
......@@ -20,6 +20,7 @@ from utils import *
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
kernelmap_path = sys.argv[3]
out_lines = [
'#pragma once',
......@@ -47,6 +48,31 @@ class %s : public KernelLite<TARGET(%s), PRECISION(%s), DATALAYOUT(%s)> {
} // namespace paddle
'''
# create .h file to store kernel&source relationship
kernel_src_map_lines = [
'''
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include<map>
// ATTENTION This can only include in a .cc file.
const std::map<std::string, std::string> kernel2path_map{
'''
]
with open(ops_list_path) as f:
......@@ -99,7 +125,23 @@ with open(ops_list_path) as f:
out_lines.append("")
out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
index = path.rindex('/')
filename = path[index + 1:]
map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
filename.strip()
)
kernel_src_map_lines.append(map_element)
with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines))
with open(kernelmap_path, 'w') as fd:
logging.info("write kernel map to %s" % dest_path)
kernel_src_map_lines.append(' {" ", " "}')
kernel_src_map_lines.append('};')
fd.write('\n'.join(kernel_src_map_lines))
......@@ -18,14 +18,19 @@ from ast import RegisterLiteKernelParser
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
minkernels_list_path = sys.argv[3]
tailored = sys.argv[4]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
'',
]
minlines = set()
if tailored == "ON":
with open(minkernels_list_path) as fd:
for line in fd:
minlines.add(line.strip())
with open(ops_list_path) as f:
paths = set([path for path in f])
for path in paths:
......@@ -35,6 +40,15 @@ with open(ops_list_path) as f:
kernel_parser.parse()
for k in kernel_parser.kernels:
kernel = "%s, %s, %s, %s, %s" % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
)
if tailored == "ON":
if kernel not in minlines: continue
key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
k.op_type,
k.target,
......
......@@ -19,7 +19,8 @@ from ast import RegisterLiteOpParser
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
minops_list_path = sys.argv[3]
tailored = sys.argv[4]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
......@@ -30,6 +31,11 @@ paths = set()
for line in open(ops_list_path):
paths.add(line.strip())
if tailored == "ON":
minlines = set()
with open(minops_list_path) as fd:
for line in fd:
minlines.add(line.strip())
for path in paths:
str_info = open(path.strip()).read()
op_parser = RegisterLiteOpParser(str_info)
......@@ -37,6 +43,8 @@ for path in paths:
for op in ops:
if "_grad" in op:
continue
if tailored == "ON":
if op not in minlines: continue
out = "USE_LITE_OP(%s);" % op
out_lines.append(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册