未验证 提交 5f72a91b 编写于 作者: H huzhiqiang 提交者: GitHub

Tailoring cherrypick (#2263)

* cherry-pick model-tailoring into release/v2.1.0 test=develop
上级 71684f98
......@@ -70,6 +70,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
# publish options
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX)
......
......@@ -241,6 +241,10 @@ set(host_kernels CACHE INTERNAL "host kernels")
set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
file(WRITE ${kernels_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# level: one of (basic, extra)
......@@ -252,6 +256,15 @@ function(add_kernel TARGET device level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(LITE_BUILD_TAILOR)
foreach(src ${args_SRCS})
list (FIND tailored_kernels_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endforeach()
endif()
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
......@@ -338,6 +351,10 @@ endfunction()
set(ops CACHE INTERNAL "ops")
set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt")
file(WRITE ${ops_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_ops_list_path "${LITE_OPTMODEL_DIR}/.tailored_ops_source_list")
file(STRINGS ${tailored_ops_list_path} tailored_ops_list)
endif()
# add an operator
# level: one of (basic, extra)
function(add_operator TARGET level)
......@@ -348,16 +365,24 @@ function(add_operator TARGET level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
foreach(src ${args_SRCS})
if(LITE_BUILD_TAILOR)
list(FIND tailored_ops_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endif()
file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
......
......@@ -9,12 +9,17 @@ if (LITE_ON_TINY_PUBLISH)
set(CMAKE_C_FLAGS_RELEASE "-Os -DNDEBUG")
endif()
set(light_lib_DEPS light_api paddle_api paddle_api_light optimizer)
if ((NOT LITE_ON_TINY_PUBLISH) AND (ARM_TARGET_OS STREQUAL "android"))
if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "android"))
#full api dynamic library
add_library(paddle_full_api_shared SHARED "")
target_sources(paddle_full_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc)
add_dependencies(paddle_full_api_shared op_list_h kernel_list_h framework_proto)
target_link_libraries(paddle_full_api_shared framework_proto)
add_dependencies(lite_compile_deps paddle_full_api_shared)
if(LITE_WITH_X86)
add_dependencies(paddle_full_api_shared xxhash)
target_link_libraries(paddle_full_api_shared xxhash)
endif()
#light api dynamic library
lite_cc_library(paddle_light_api_shared MODULE
......
......@@ -15,6 +15,7 @@
#include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
......@@ -23,8 +24,16 @@
namespace paddle {
namespace lite {
static const char TAILORD_OPS_SOURCE_LIST_FILENAME[] =
".tailored_ops_source_list";
static const char TAILORD_OPS_LIST_NAME[] = ".tailored_ops_list";
static const char TAILORD_KERNELS_SOURCE_LIST_FILENAME[] =
".tailored_kernels_source_list";
static const char TAILORD_KERNELS_LIST_NAME[] = ".tailored_kernels_list";
void Predictor::SaveModel(const std::string &dir,
lite_api::LiteModelType model_type) {
lite_api::LiteModelType model_type,
bool record_info) {
if (!program_) {
GenRuntimeProgram();
}
......@@ -40,6 +49,83 @@ void Predictor::SaveModel(const std::string &dir,
default:
LOG(FATAL) << "Unknown model type";
}
if (record_info) {
SaveOpKernelInfo(dir);
}
}
void Predictor::SaveOpKernelInfo(const std::string &model_dir) {
std::set<std::string> ops_info;
std::set<std::string> kernels_info;
const auto &instructions_ = program_->instructions();
for (auto &node : instructions_) {
// parse op type infomation
auto op = node.op()->op_info();
ops_info.insert(op->Type());
// parse kernel type information
std::string kernel_type_str =
node.kernel()->op_type() + "," + TargetRepr(node.kernel()->target()) +
"," + PrecisionRepr(node.kernel()->precision()) + "," +
DataLayoutRepr(node.kernel()->layout()) + "," + node.kernel()->alias();
kernels_info.insert(kernel_type_str);
}
// get souce_file name from op type and kernel type
auto op2pathmap = OpKernelInfoCollector::Global().GetOp2PathDict();
auto kernel2pathmap = OpKernelInfoCollector::Global().GetKernel2PathDict();
// write used op and kernel info into files
std::string opf_path = model_dir + "/" + TAILORD_OPS_LIST_NAME;
std::string opf_source_path =
model_dir + "/" + TAILORD_OPS_SOURCE_LIST_FILENAME;
std::string kpf_path = model_dir + "/" + TAILORD_KERNELS_LIST_NAME;
std::string kpf_source_path =
model_dir + "/" + TAILORD_KERNELS_SOURCE_LIST_FILENAME;
std::map<std::string, std::string> op2path;
std::FILE *opf = std::fopen(opf_path.c_str(), "w");
std::FILE *opf_source = std::fopen(opf_source_path.c_str(), "w");
std::FILE *kpf = std::fopen(kpf_path.c_str(), "w");
std::FILE *kpf_source = std::fopen(kpf_source_path.c_str(), "w");
std::vector<std::string> opcompile;
std::vector<std::string> kernelcompile;
if (nullptr == opf || nullptr == opf_source || nullptr == opf ||
nullptr == kpf_source) {
LOG(FATAL) << "failed to create info file into: " << model_dir;
}
for (auto op_info = ops_info.begin(); op_info != ops_info.end(); op_info++) {
fputs(op_info->c_str(), opf);
fputc('\n', opf);
std::string op_path = op2pathmap[*op_info];
fputs(op_path.c_str(), opf_source);
fputc('\n', opf_source);
}
std::fclose(opf_source);
std::fclose(opf);
LOG(INFO) << "operators information of tailored model is stored into: "
<< opf_path;
// write Kernel_type and Kernel_path into file
for (auto kernel_info = kernels_info.begin();
kernel_info != kernels_info.end();
kernel_info++) {
fputs(kernel_info->c_str(), kpf);
fputc('\n', kpf);
std::string kernel_path = kernel2pathmap[*kernel_info];
fputs(kernel_path.c_str(), kpf_source);
fputc('\n', kpf_source);
if (kernel_path == "conv_compute.cc") {
fputs(
"conv_depthwise.cc\nconv_direct.cc\nconv_gemmlike.cc\nconv_"
"winograd.cc\n",
kpf_source);
}
}
std::fclose(kpf_source);
std::fclose(kpf);
LOG(INFO) << "kernels information of tailored model is stored into: "
<< kpf_path;
}
lite::Tensor *Predictor::GetInput(size_t offset) {
......@@ -61,7 +147,7 @@ void Predictor::PrepareFeedFetch() {
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc *> feeds;
std::vector<cpp::OpDesc *> fetchs;
for (int i = 0; i < current_block->OpsSize(); i++) {
for (size_t i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") {
feeds.push_back(op);
......@@ -71,11 +157,11 @@ void Predictor::PrepareFeedFetch() {
}
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (int i = 0; i < feeds.size(); i++) {
for (size_t i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (int i = 0; i < fetchs.size(); i++) {
for (size_t i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
......@@ -191,7 +277,7 @@ lite::Tensor *Predictor::GetInputByName(const std::string &name) {
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:";
for (int i = 0; i < input_names_.size(); i++) {
for (size_t i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]";
}
return nullptr;
......
......@@ -89,7 +89,9 @@ class LITE_API Predictor {
// This method is disabled in mobile, for unnecessary dependencies required.
void SaveModel(
const std::string& dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf);
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false);
void SaveOpKernelInfo(const std::string& model_dir);
#ifdef LITE_WITH_TRAIN
void Run(const std::vector<framework::Tensor>& tensors) {
......@@ -137,9 +139,10 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
std::unique_ptr<lite_api::Tensor> GetInputByName(
const std::string& name) override;
void SaveOptimizedModel(const std::string& model_dir,
lite_api::LiteModelType model_type =
lite_api::LiteModelType::kProtobuf) override;
void SaveOptimizedModel(
const std::string& model_dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false) override;
private:
Predictor raw_predictor_;
......
......@@ -65,8 +65,9 @@ std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
}
void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir,
lite_api::LiteModelType model_type) {
raw_predictor_.SaveModel(model_dir, model_type);
lite_api::LiteModelType model_type,
bool record_info) {
raw_predictor_.SaveModel(model_dir, model_type, record_info);
}
} // namespace lite
......
......@@ -16,7 +16,10 @@
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#endif
// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
// model_optimize_tool's compiling period
#include "all_kernel_faked.cc" // NOLINT
#include "kernel_src_map.h" // NOLINT
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
......@@ -35,6 +38,11 @@ DEFINE_string(
"protobuf",
"store type of the output optimized model. protobuf/naive_buffer");
DEFINE_bool(display_kernels, false, "Display kernel information");
DEFINE_bool(record_tailoring_info,
false,
"Record kernels and operators information of the optimized model "
"for tailoring compiling, information are stored into optimized "
"model path as hidden files");
DEFINE_string(optimize_out, "", "path of the output optimized model");
DEFINE_string(valid_targets,
"arm",
......@@ -104,8 +112,14 @@ void Main() {
} else {
LOG(FATAL) << "Unsupported Model type :" << FLAGS_optimize_out_type;
}
OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
predictor->SaveOptimizedModel(FLAGS_optimize_out, model_type);
predictor->SaveOptimizedModel(
FLAGS_optimize_out, model_type, FLAGS_record_tailoring_info);
if (FLAGS_record_tailoring_info) {
LOG(INFO) << "Record the information of tailored model into :"
<< FLAGS_optimize_out;
}
}
} // namespace lite_api
......
......@@ -145,7 +145,8 @@ lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); }
void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); }
void PaddlePredictor::SaveOptimizedModel(const std::string &model_dir,
LiteModelType model_type) {
LiteModelType model_type,
bool record_info) {
LOG(FATAL)
<< "The SaveOptimizedModel API is only supported by CxxConfig predictor.";
}
......
......@@ -97,7 +97,8 @@ class LITE_API PaddlePredictor {
/// CxxConfig, and the persisted model can be reused for MobileConfig.
virtual void SaveOptimizedModel(
const std::string& model_dir,
LiteModelType model_type = LiteModelType::kProtobuf);
LiteModelType model_type = LiteModelType::kProtobuf,
bool record_info = false);
virtual ~PaddlePredictor() = default;
};
......
......@@ -64,8 +64,8 @@ TEST(CxxApi, run) {
EXPECT_NEAR(out[1], -28.8729, 1e-3);
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2");
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2.naive",
LiteModelType::kNaiveBuffer);
predictor->SaveOptimizedModel(
FLAGS_model_dir + ".opt2.naive", LiteModelType::kNaiveBuffer, true);
}
// Demo1 for Mobile Devices :Load model from file and run
......
......@@ -71,6 +71,8 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
${kernels_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
"${LITE_OPTMODEL_DIR}/.tailored_kernels_list"
LITE_BUILD_TAILOR
OUTPUT kernels.h # not a real path to the output to force it execute every time.
)
# A trick to generate the paddle_use_ops.h
......@@ -78,6 +80,8 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py
${ops_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
"${LITE_OPTMODEL_DIR}/.tailored_ops_list"
LITE_BUILD_TAILOR
OUTPUT ops.h # not a real path to the output to force it execute every time.
)
# generate fake kernels for memory_optimize_tool
......@@ -85,6 +89,7 @@ add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
${kernels_src_list}
${CMAKE_BINARY_DIR}/all_kernel_faked.cc
${CMAKE_BINARY_DIR}/kernel_src_map.h
OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
)
add_custom_target(op_list_h DEPENDS ops.h)
......
......@@ -16,6 +16,7 @@
#include <memory>
#include <vector>
#include "lite/core/mir/fusion/conv_bn_fuser.h"
#include "lite/core/mir/graph_visualize_pass.h"
#include "lite/core/mir/pass_registry.h"
namespace paddle {
......@@ -23,11 +24,19 @@ namespace lite {
namespace mir {
void ConvBNFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
fusion::ConvBNFuser fuser("conv2d");
fuser(graph.get());
// initialze fuser params
std::vector<bool> conv_has_bias_cases{true, false};
std::vector<std::string> conv_type_cases{"conv2d", "depthwise_conv2d"};
fusion::ConvBNFuser fuser2("depthwise_conv2d");
fuser2(graph.get());
// start fuse using params
for (auto conv_has_bias : conv_has_bias_cases) {
for (auto conv_type : conv_type_cases) {
VLOG(4) << "conv_has_bias:" << conv_has_bias
<< " conv_type:" << conv_type;
fusion::ConvBNFuser fuser(conv_type, conv_has_bias);
fuser(graph.get());
}
}
}
} // namespace mir
......@@ -35,5 +44,4 @@ void ConvBNFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
} // namespace paddle
REGISTER_MIR_PASS(lite_conv_bn_fuse_pass, paddle::lite::mir::ConvBNFusePass)
.BindTargets({TARGET(kAny)})
.BindKernel("elementwise_add");
.BindTargets({TARGET(kAny)});
......@@ -14,6 +14,7 @@
#include "lite/core/mir/fusion/conv_bn_fuser.h"
#include <memory>
#include <unordered_set>
#include <vector>
namespace paddle {
......@@ -30,7 +31,8 @@ void ConvBNFuser::BuildPattern() {
auto* conv = OpNode("conv2d", conv_type_)->assert_is_op(conv_type_);
auto* conv_out = VarNode("conv_out")
->assert_is_op_output(conv_type_, "Output")
->assert_is_op_input("batch_norm", "X");
->assert_is_op_input("batch_norm", "X")
->AsIntermediate();
auto* bn_scale = VarNode("bn_scale")
->assert_is_op_input("batch_norm", "Scale")
......@@ -61,34 +63,30 @@ void ConvBNFuser::BuildPattern() {
->assert_is_op_output("batch_norm", "SavedVariance")
->AsIntermediate();
conv->LinksFrom({conv_input, conv_weight}).LinksTo({conv_out});
if (conv_has_bias_) {
auto* conv_bias = VarNode("conv_bias")
->assert_is_op_input(conv_type_, "Bias")
->AsInput()
->AsIntermediate();
conv->LinksFrom({conv_input, conv_weight, conv_bias}).LinksTo({conv_out});
} else {
conv->LinksFrom({conv_input, conv_weight}).LinksTo({conv_out});
}
bn->LinksFrom({conv_out, bn_scale, bn_bias, bn_mean, bn_var})
.LinksTo({bn_out, bn_mean_out, bn_saved_mean, bn_saved_var, bn_var_out});
}
void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
auto op_desc = GenOpDesc(matched);
auto eltwise_op = LiteOpRegistry::Global().Create("elementwise_add");
auto conv_instruct = matched.at("conv2d")->stmt();
auto conv_op_desc = conv_instruct->mutable_op_info();
auto conv = conv_instruct->op();
auto* scope = conv->scope();
auto& valid_places = conv->valid_places();
auto conv_weight_t = scope->FindVar(matched.at("conv_weight")->arg()->name)
->GetMutable<lite::Tensor>();
auto conv_weight_dims = conv_weight_t->dims();
size_t weight_num = conv_weight_t->data_size();
// bn
auto bn_scale_t = scope->FindVar(matched.at("bn_scale")->arg()->name)
->GetMutable<lite::Tensor>();
size_t bias_size = bn_scale_t->data_size();
auto bn_scale_d = bn_scale_t->mutable_data<float>();
CHECK_EQ(bias_size, static_cast<size_t>(conv_weight_dims[0]))
<< "The BN bias's size should be equal to the size of the first "
<< "dim size of the conv weights";
auto bn_mean_t = scope->FindVar(matched.at("bn_mean")->arg()->name)
->GetMutable<lite::Tensor>();
auto bn_mean_d = bn_mean_t->mutable_data<float>();
......@@ -102,59 +100,102 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
auto bn_bias_d = bn_bias_t->mutable_data<float>();
auto eps = matched.at("bn")->stmt()->op_info()->GetAttr<float>("epsilon");
auto conv_op_desc = conv_instruct->mutable_op_info();
// conv
auto conv_weight_t = scope->FindVar(matched.at("conv_weight")->arg()->name)
->GetMutable<lite::Tensor>();
CHECK_EQ(static_cast<size_t>(bn_scale_t->data_size()),
static_cast<size_t>(conv_weight_t->dims()[0]))
<< "The BN bias's size should be equal to the size of the first "
<< "dim size of the conv weights";
size_t weight_num = conv_weight_t->data_size();
bool enable_int8 = conv_op_desc->HasAttr("enable_int8") ? true : false;
// comupte BN alpha and beta
Tensor alpha_tensor, beta_tensor;
alpha_tensor.CopyDataFrom(*bn_bias_t);
beta_tensor.CopyDataFrom(*bn_bias_t);
auto alpha_data = alpha_tensor.mutable_data<float>();
auto beta_data = beta_tensor.mutable_data<float>();
int h = bias_size;
int w = weight_num / bias_size;
int h =
bn_scale_t
->data_size(); // h == bias_size == out channel num of conv weight
int w = weight_num /
(bn_scale_t->data_size()); // w = `conv_weight_num` / bias_size = in
// channel num of conv weight
ComputeAlphaAndBeta(
bn_scale_d, bn_mean_d, bn_var_d, alpha_data, beta_data, eps, h, w);
///////////////////////////////////////////////////////////////////////////////
// Compute ConvBNFuser
// Before fusion
//
// conv(x) = conv(x) = kx + z = y
// bn(y) = ay + b
//
// Note: `alpha_data` is a, `beta_data` is b from `ComputeAlphaAndBeta`
//
// After fusion:
//
// bn(conv(x)) = a(kx + z) + b = akx + az + b
//
// Note: h == bias_size == out channel num of conv weight
// w = `conv_weight_num` / bias_size = in channel num of conv weight
// little difference for int8
///////////////////////////////////////////////////////////////////////////////
if (enable_int8) {
PADDLE_ENFORCE(conv_op_desc->HasAttr("weight_scale"),
"INT8 mode: Conv should has weight_scale attr");
auto conv_weight_d = conv_weight_t->mutable_data<int8_t>();
// compute new conv_weight for int8
auto weight_scale =
conv_op_desc->GetAttr<std::vector<float>>("weight_scale");
for (int i = 0; i < h; i++) {
weight_scale[i] *= alpha_data[i];
for (unsigned int i = 0; i < h; ++i) {
weight_scale[i] *= fabsf(alpha_data[i]);
if (alpha_data[i] < 0.f) {
auto ptr_row = conv_weight_d + i * w;
for (unsigned int j = 0; j < w; ++j) {
ptr_row[j] *= -1;
}
}
}
// Interface like this should be abandoned.
conv_op_desc->SetAttr("weight_scale", weight_scale);
auto update_conv_desc = *conv_instruct->mutable_op_info();
conv_instruct->ResetOp(update_conv_desc, graph->valid_places());
} else {
// compute new conv_weight
auto conv_weight_d = conv_weight_t->mutable_data<float>();
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) {
for (unsigned int i = 0; i < h; ++i) { // n: conv2d output channels
for (unsigned int j = 0; j < w; ++j) { // w: conv2d input channels
conv_weight_d[i * w + j] *= alpha_data[i];
}
}
}
for (int i = 0; i < bias_size; i++) {
// compute new conv_bias
if (conv_has_bias_) {
auto conv_bias_t = scope->FindVar(matched.at("conv_bias")->arg()->name)
->GetMutable<lite::Tensor>();
auto conv_bias_d = conv_bias_t->data<float>();
for (unsigned int i = 0; i < bn_bias_t->data_size();
++i) { // bias_size == h == conv2d output channls
bn_bias_d[i] += alpha_data[i] * conv_bias_d[i];
}
}
for (unsigned int i = 0; i < bn_bias_t->data_size(); ++i) {
bn_bias_d[i] += beta_data[i];
}
eltwise_op->Attach(op_desc, scope);
auto* new_op_node = graph->GraphCreateInstructNode(eltwise_op, valid_places);
IR_NODE_LINK_TO(matched.at("conv_out"), new_op_node);
IR_NODE_LINK_TO(matched.at("bn_bias"), new_op_node);
IR_NODE_LINK_TO(new_op_node, matched.at("bn_out"));
}
cpp::OpDesc ConvBNFuser::GenOpDesc(const key2nodes_t& matched) {
cpp::OpDesc op_desc;
op_desc.SetType("elementwise_add");
op_desc.SetInput("X", {matched.at("conv_out")->arg()->name});
op_desc.SetInput("Y", {matched.at("bn_bias")->arg()->name});
op_desc.SetOutput("Out", {matched.at("bn_out")->arg()->name});
op_desc.SetAttr("axis", 1);
return op_desc;
conv_op_desc->SetType(conv_type_);
conv_op_desc->SetInput("Input", {matched.at("conv_input")->arg()->name});
conv_op_desc->SetInput("Filter", {matched.at("conv_weight")->arg()->name});
conv_op_desc->SetOutput("Output", {matched.at("bn_out")->arg()->name});
conv_op_desc->SetInput("Bias",
{matched.at("bn_bias")->arg()->name}); // conv_bias
auto update_conv_desc = *conv_instruct->mutable_op_info();
conv_instruct->ResetOp(update_conv_desc, graph->valid_places());
IR_NODE_LINK_TO(matched.at("bn_bias"), matched.at("conv2d"));
IR_OP_VAR_LINK(matched.at("conv2d"), matched.at("bn_out"));
}
} // namespace fusion
......
......@@ -27,12 +27,12 @@ namespace fusion {
class ConvBNFuser : public FuseBase {
public:
explicit ConvBNFuser(const std::string& conv_type) : conv_type_(conv_type) {}
explicit ConvBNFuser(const std::string& conv_type, const bool conv_has_bias)
: conv_type_(conv_type), conv_has_bias_(conv_has_bias) {}
void BuildPattern() override;
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;
private:
cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override;
void ComputeAlphaAndBeta(float* scale_d,
float* mean_d,
float* var_d,
......@@ -51,6 +51,7 @@ class ConvBNFuser : public FuseBase {
private:
std::string conv_type_{"conv2d"};
bool conv_has_bias_{false};
};
} // namespace fusion
......
......@@ -23,14 +23,21 @@ namespace lite {
namespace mir {
void ConvElementwiseFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
fusion::ConvElementwiseFuser fuser("conv2d");
fuser(graph.get());
// initialze fuser params
// note: `true` of conv_has_bias must as first pattern to match
std::vector<bool> conv_has_bias_cases{true, false};
std::vector<std::string> conv_type_cases{
"conv2d", "depthwise_conv2d", "conv2d_transpose"};
fusion::ConvElementwiseFuser depthwise_fuser("depthwise_conv2d");
depthwise_fuser(graph.get());
fusion::ConvElementwiseFuser conv2d_transpose_fuser("conv2d_transpose");
conv2d_transpose_fuser(graph.get());
// start fuse using params
for (auto conv_has_bias : conv_has_bias_cases) {
for (auto conv_type : conv_type_cases) {
VLOG(4) << "conv_has_bias:" << conv_has_bias
<< " conv_type:" << conv_type;
fusion::ConvElementwiseFuser fuser(conv_type, conv_has_bias);
fuser(graph.get());
}
}
}
} // namespace mir
......
......@@ -33,8 +33,7 @@ void ConvElementwiseFuser::BuildPattern() {
->assert_is_persistable_var();
// create op nodes
auto* conv2d =
OpNode("conv2d", conv_type_)->assert_is_op(conv_type_)->AsIntermediate();
auto* conv2d = OpNode("conv2d", conv_type_)->assert_is_op(conv_type_);
auto* add = OpNode("add", "elementwise_add")
->assert_is_op("elementwise_add")
->AsIntermediate();
......@@ -51,6 +50,13 @@ void ConvElementwiseFuser::BuildPattern() {
// create topology.
std::vector<PMNode*> conv2d_inputs{filter, input};
// consider a special case: conv with bias
if (conv_has_bias_) {
PMNode* conv_bias = VarNode("conv_bias")
->assert_is_op_input(conv_type_, "Bias")
->AsIntermediate();
conv2d_inputs.emplace_back(conv_bias);
}
std::vector<PMNode*> add_inputs{conv2d_out, bias};
conv2d_inputs >> *conv2d >> *conv2d_out;
add_inputs >> *add >> *add_out;
......@@ -58,44 +64,49 @@ void ConvElementwiseFuser::BuildPattern() {
void ConvElementwiseFuser::InsertNewNode(SSAGraph* graph,
const key2nodes_t& matched) {
auto op_desc = GenOpDesc(matched);
auto conv_op = LiteOpRegistry::Global().Create(conv_type_);
auto conv_old = matched.at("conv2d")->stmt()->op();
auto* scope = conv_old->scope();
auto& valid_places = conv_old->valid_places();
conv_op->Attach(op_desc, scope);
auto* new_op_node = graph->GraphCreateInstructNode(conv_op, valid_places);
auto conv_instruct = matched.at("conv2d")->stmt();
auto conv_op_desc = conv_instruct->mutable_op_info();
auto* scope = conv_instruct->op()->scope();
IR_NODE_LINK_TO(matched.at("input"), new_op_node);
IR_NODE_LINK_TO(matched.at("filter"), new_op_node);
IR_NODE_LINK_TO(matched.at("bias"), new_op_node);
IR_NODE_LINK_TO(new_op_node, matched.at("output"));
}
/////////////////////////////////////////////////////////////////////////////////////
// ConvElementwiseFuser
// if `conv_bias` existed, store previous old `conv_bias` to
// `elemwise_bias`, and add `elementwise_add_bias` to `new_conv_bias`.
// if `conv_bias` not existed, set `elementwise_add_bias` as
// `new_conv_bias`.
/////////////////////////////////////////////////////////////////////////////////////
cpp::OpDesc ConvElementwiseFuser::GenOpDesc(const key2nodes_t& matched) {
auto* desc = matched.at("conv2d")->stmt()->op_info();
if (conv_has_bias_ == true && conv_op_desc->HasInput("Bias") &&
conv_op_desc->Input("Bias").size() > 0) {
auto conv_bias_var = scope->FindVar(conv_op_desc->Input("Bias").front());
if (conv_bias_var != nullptr) {
// conv bias
auto conv_bias_t = &(conv_bias_var->Get<lite::Tensor>());
auto conv_bias_d = conv_bias_t->data<float>();
cpp::OpDesc op_desc = *desc;
op_desc.SetType(conv_type_);
op_desc.SetInput("Input", {matched.at("input")->arg()->name});
op_desc.SetInput("Filter", {matched.at("filter")->arg()->name});
op_desc.SetInput("Bias", {matched.at("bias")->arg()->name});
op_desc.SetOutput("Output", {matched.at("output")->arg()->name});
// Other inputs. See operators/conv_op.h
std::vector<std::string> input_arg_names = desc->InputArgumentNames();
// elementwise_add bias
auto elementwise_add_bias_t =
scope->FindVar(matched.at("bias")->arg()->name)
->GetMutable<lite::Tensor>();
auto elementwise_add_bias_d =
elementwise_add_bias_t->mutable_data<float>();
if (std::find(input_arg_names.begin(),
input_arg_names.end(),
"ResidualData") != input_arg_names.end()) {
op_desc.SetInput("ResidualData", desc->Input("ResidualData"));
for (unsigned int i = 0; i < conv_bias_t->data_size(); ++i) {
elementwise_add_bias_d[i] += conv_bias_d[i];
}
}
}
// Only consider strides, padding, groups, dilations for now
op_desc.SetAttr("strides", desc->GetAttr<std::vector<int>>("strides"));
op_desc.SetAttr("paddings", desc->GetAttr<std::vector<int>>("paddings"));
op_desc.SetAttr("groups", desc->GetAttr<int>("groups"));
op_desc.SetAttr("dilations", desc->GetAttr<std::vector<int>>("dilations"));
return op_desc;
conv_op_desc->SetType(conv_type_);
conv_op_desc->SetInput("Input", {matched.at("input")->arg()->name});
conv_op_desc->SetInput("Filter", {matched.at("filter")->arg()->name});
conv_op_desc->SetOutput("Output", {matched.at("output")->arg()->name});
conv_op_desc->SetInput("Bias", {matched.at("bias")->arg()->name});
auto update_conv_desc = *conv_instruct->mutable_op_info();
conv_instruct->ResetOp(update_conv_desc, graph->valid_places());
IR_NODE_LINK_TO(matched.at("bias"), matched.at("conv2d"));
IR_OP_VAR_LINK(matched.at("conv2d"), matched.at("output"));
}
} // namespace fusion
......
......@@ -25,16 +25,18 @@ namespace fusion {
class ConvElementwiseFuser : public FuseBase {
public:
explicit ConvElementwiseFuser(const std::string& conv_type) {
explicit ConvElementwiseFuser(const std::string& conv_type,
const bool conv_has_bias) {
conv_type_ = conv_type;
conv_has_bias_ = conv_has_bias;
}
void BuildPattern() override;
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;
private:
cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override;
std::string conv_type_;
std::string conv_type_{"conv2d"};
bool conv_has_bias_{false};
};
} // namespace fusion
......
......@@ -32,6 +32,43 @@
using LiteType = paddle::lite::Type;
class OpKernelInfoCollector {
public:
static OpKernelInfoCollector &Global() {
static auto *x = new OpKernelInfoCollector;
return *x;
}
void AddOp2path(const std::string &op_name, const std::string &op_path) {
size_t index = op_path.find_last_of('/');
if (index != std::string::npos) {
op2path_.insert(std::pair<std::string, std::string>(
op_name, op_path.substr(index + 1)));
}
}
void AddKernel2path(const std::string &kernel_name,
const std::string &kernel_path) {
size_t index = kernel_path.find_last_of('/');
if (index != std::string::npos) {
kernel2path_.insert(std::pair<std::string, std::string>(
kernel_name, kernel_path.substr(index + 1)));
}
}
void SetKernel2path(
const std::map<std::string, std::string> &kernel2path_map) {
kernel2path_ = kernel2path_map;
}
const std::map<std::string, std::string> &GetOp2PathDict() {
return op2path_;
}
const std::map<std::string, std::string> &GetKernel2PathDict() {
return kernel2path_;
}
private:
std::map<std::string, std::string> op2path_;
std::map<std::string, std::string> kernel2path_;
};
namespace paddle {
namespace lite {
......@@ -59,7 +96,6 @@ class OpLiteRegistor : public Registor<OpClass> {
});
}) {}
};
template <TargetType Target, PrecisionType Precision, DataLayoutType Layout>
using KernelRegistryForTarget =
Factory<KernelLite<Target, Precision, Layout>, std::unique_ptr<KernelBase>>;
......@@ -287,6 +323,7 @@ class KernelRegistor : public lite::Registor<KernelType> {
static paddle::lite::OpLiteRegistor<OpClass> LITE_OP_REGISTER_INSTANCE( \
op_type__)(#op_type__); \
int touch_op_##op_type__() { \
OpKernelInfoCollector::Global().AddOp2path(#op_type__, __FILE__); \
return LITE_OP_REGISTER_INSTANCE(op_type__).Touch(); \
}
......@@ -312,6 +349,9 @@ class KernelRegistor : public lite::Registor<KernelType> {
static KernelClass LITE_KERNEL_INSTANCE( \
op_type__, target__, precision__, layout__, alias__); \
int touch_##op_type__##target__##precision__##layout__##alias__() { \
OpKernelInfoCollector::Global().AddKernel2path( \
#op_type__ "," #target__ "," #precision__ "," #layout__ "," #alias__, \
__FILE__); \
LITE_KERNEL_INSTANCE(op_type__, target__, precision__, layout__, alias__) \
.Touch(); \
return 0; \
......
......@@ -21,7 +21,7 @@ namespace lite {
namespace fluid {
using LoD = std::vector<std::vector<size_t>>;
LoD ToAbsOffset(const LoD &in) {
static LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in;
LoD result = in;
......
......@@ -17,6 +17,8 @@ BUILD_EXTRA=OFF
BUILD_JAVA=ON
BUILD_PYTHON=OFF
BUILD_DIR=$(pwd)
OPTMODEL_DIR=""
BUILD_TAILOR=OFF
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
......@@ -94,6 +96,8 @@ function make_tiny_publish_so {
-DLITE_ON_TINY_PUBLISH=ON \
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make publish_inference -j$NUM_PROC
......@@ -133,6 +137,8 @@ function make_full_publish_so {
-DLITE_SHUTDOWN_LOG=ON \
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make publish_inference -j4
......@@ -317,6 +323,14 @@ function main {
BUILD_DIR="${i#*=}"
shift
;;
--opt_model_dir=*)
OPTMODEL_DIR="${i#*=}"
shift
;;
--build_tailor=*)
BUILD_TAILOR="${i#*=}"
shift
;;
tiny_publish)
make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL
shift
......
......@@ -20,6 +20,7 @@ from utils import *
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
kernelmap_path = sys.argv[3]
out_lines = [
'#pragma once',
......@@ -47,6 +48,31 @@ class %s : public KernelLite<TARGET(%s), PRECISION(%s), DATALAYOUT(%s)> {
} // namespace paddle
'''
# create .h file to store kernel&source relationship
kernel_src_map_lines = [
'''
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include<map>
// ATTENTION This can only include in a .cc file.
const std::map<std::string, std::string> kernel2path_map{
'''
]
with open(ops_list_path) as f:
......@@ -99,7 +125,23 @@ with open(ops_list_path) as f:
out_lines.append("")
out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
index = path.rindex('/')
filename = path[index + 1:]
map_element = ' {"%s,%s,%s,%s,%s", "%s"},' % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
filename.strip()
)
kernel_src_map_lines.append(map_element)
with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines))
with open(kernelmap_path, 'w') as fd:
logging.info("write kernel map to %s" % dest_path)
kernel_src_map_lines.append(' {" ", " "}')
kernel_src_map_lines.append('};')
fd.write('\n'.join(kernel_src_map_lines))
......@@ -18,14 +18,19 @@ from ast import RegisterLiteKernelParser
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
minkernels_list_path = sys.argv[3]
tailored = sys.argv[4]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
'',
]
minlines = set()
if tailored == "ON":
with open(minkernels_list_path) as fd:
for line in fd:
minlines.add(line.strip())
with open(ops_list_path) as f:
paths = set([path for path in f])
for path in paths:
......@@ -35,6 +40,15 @@ with open(ops_list_path) as f:
kernel_parser.parse()
for k in kernel_parser.kernels:
kernel = "%s, %s, %s, %s, %s" % (
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
)
if tailored == "ON":
if kernel not in minlines: continue
key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
k.op_type,
k.target,
......
......@@ -19,7 +19,8 @@ from ast import RegisterLiteOpParser
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
minops_list_path = sys.argv[3]
tailored = sys.argv[4]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
......@@ -30,6 +31,11 @@ paths = set()
for line in open(ops_list_path):
paths.add(line.strip())
if tailored == "ON":
minlines = set()
with open(minops_list_path) as fd:
for line in fd:
minlines.add(line.strip())
for path in paths:
str_info = open(path.strip()).read()
op_parser = RegisterLiteOpParser(str_info)
......@@ -37,6 +43,8 @@ for path in paths:
for op in ops:
if "_grad" in op:
continue
if tailored == "ON":
if op not in minlines: continue
out = "USE_LITE_OP(%s);" % op
out_lines.append(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册