diff --git a/cmake/bm.cmake b/cmake/bm.cmake index f812cf948169d68258a0ad7237a34b00534709c8..3a3abb5966172ba00227e9fac7fabfe55bac7737 100644 --- a/cmake/bm.cmake +++ b/cmake/bm.cmake @@ -34,6 +34,7 @@ include_directories("${BM_SDK_ROOT}/include/bmruntime") include_directories("${BM_SDK_ROOT}/include/bmlib") include_directories("${BM_SDK_ROOT}/include/bmcompiler") include_directories("${BM_SDK_ROOT}/include/bmcpu") +include_directories("${BM_SDK_ROOT}/include/bmlog") find_library(BM_SDK_RT_LIB NAMES bmrt PATHS ${BM_SDK_ROOT}/lib/bmnn/pcie) diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index e660bbcdd606133db4e7891b6973f26983b4dd79..991308b43401512e6d2327a4f657110966169a08 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -86,9 +86,9 @@ if (NOT LITE_ON_TINY_PUBLISH) ARM_DEPS ${arm_kernels} NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass + BM_DEPS ${bm_kernels} ${bm_bridges} bm_pass CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels}) - BM_DEPS ${bm_kernels}) endif() # for light api @@ -107,7 +107,7 @@ lite_cc_library(light_api SRCS light_api.cc NPU_DEPS ${npu_kernels} XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} - FPGA_DEPS ${fpga_kernels}) + FPGA_DEPS ${fpga_kernels} BM_DEPS ${bm_kernels}) include(ExternalProject) @@ -162,7 +162,7 @@ if(WITH_TESTING) add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz) lite_cc_test(test_resnet50_lite_bm SRCS test_resnet50_lite_bm.cc DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils - ${ops} ${host_kernels} ${bm_kernels} + ${ops} ${host_kernels} ${bm_kernels} ${bm_bridges} ARGS --model_dir=${LITE_MODEL_DIR}/resnet50) endif() endif() diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 4647f20bbe476d8763f94f707f3d88da7c7544df..690c64045379b3a45dc93006f28b9d94ba5408ef 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -141,7 +141,7 @@ std::vector Predictor::GetOutputNames() { return output_names_; } void Predictor::PrepareFeedFetch() { std::vector feeds; std::vector fetchs; -#if defined(LITE_WITH_NPU) || defined(LITE_WITH_XPU) +#if defined(LITE_WITH_NPU) || defined(LITE_WITH_XPU) || defined(LITE_WITH_BM) // The shape of input tensors must be determined before generating NPU and XPU // program. auto current_block = program_desc_.GetBlock(0); diff --git a/lite/api/paddle_place.cc b/lite/api/paddle_place.cc index 894d839185ea9e1b6b47b87c398f249f044c2b51..74dcc1060263626637af0ba6b02e4120472e995c 100644 --- a/lite/api/paddle_place.cc +++ b/lite/api/paddle_place.cc @@ -55,7 +55,8 @@ const std::string& TargetToStr(TargetType target) { "any", "fpga", "npu", - "xpu"}; + "xpu", + "bm"}; auto x = static_cast(target); CHECK_LT(x, static_cast(TARGET(NUM))); return target2string[x]; @@ -93,7 +94,8 @@ const std::string& TargetRepr(TargetType target) { "kAny", "kFPGA", "kNPU", - "kXPU"}; + "kXPU", + "kBM"}; auto x = static_cast(target); CHECK_LT(x, static_cast(TARGET(NUM))); return target2string[x]; @@ -129,6 +131,7 @@ std::set ExpandValidTargets(TargetType target) { TARGET(kOpenCL), TARGET(kNPU), TARGET(kXPU), + TARGET(kBM), TARGET(kFPGA)}); if (target == TARGET(kAny)) { return valid_set; diff --git a/lite/api/paddle_place.h b/lite/api/paddle_place.h index 07284be095c05e5dfa069b0973d5982cf1f07c8a..a13abb699cea36ba53e430668e8dcd6d19d46d9e 100644 --- a/lite/api/paddle_place.h +++ b/lite/api/paddle_place.h @@ -52,8 +52,9 @@ enum class TargetType : int { kFPGA = 7, kNPU = 8, kXPU = 9, + kBM = 10, kAny = 6, // any target - NUM = 10, // number of fields. + NUM = 11, // number of fields. }; enum class PrecisionType : int { kUnk = 0, diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h index 9d56d262abf549584819ab893144e41fc399439f..72ef189c1c8f650e3deb385b894d787ebf4292be 100644 --- a/lite/api/paddle_use_passes.h +++ b/lite/api/paddle_use_passes.h @@ -26,6 +26,9 @@ USE_MIR_PASS(generate_npu_program_pass); #ifdef LITE_WITH_XPU USE_MIR_PASS(generate_xpu_program_pass); #endif +#ifdef LITE_WITH_BM +USE_MIR_PASS(generate_bm_program_pass); +#endif USE_MIR_PASS(io_copy_kernel_pick_pass); USE_MIR_PASS(argument_type_display_pass); diff --git a/lite/backends/bm/CMakeLists.txt b/lite/backends/bm/CMakeLists.txt index fc0dd3acb3f35c3731a880ccfb79a28adc1b4d70..132e521c8d38ccea3496e3877feecdf4a79cf44a 100644 --- a/lite/backends/bm/CMakeLists.txt +++ b/lite/backends/bm/CMakeLists.txt @@ -3,3 +3,4 @@ if (NOT LITE_WITH_BM) endif() lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc bm_context.cc DEPS ${bm_runtime_libs}) +lite_cc_library(bm_builder SRCS builder.cc DEPS ${bm_builder_libs}) diff --git a/lite/backends/bm/builder.cc b/lite/backends/bm/builder.cc new file mode 100644 index 0000000000000000000000000000000000000000..c44cd481dd57d3768abd1db7a57448945b2b970c --- /dev/null +++ b/lite/backends/bm/builder.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/backends/bm/builder.h" +#include +#include + +namespace paddle { +namespace lite { +namespace bm { + +std::string UniqueName(const std::string& prefix) { + static std::mutex counter_mtx; + static std::unordered_map counter_map; + std::unique_lock counter_lck(counter_mtx); + int counter = 1; + auto it = counter_map.find(prefix); + if (it == counter_map.end()) { + counter_map[prefix] = counter; + } else { + counter = ++(it->second); + } + return prefix + "_" + std::to_string(counter); +} + +bool HasInputArg(const OpInfo* op_info, + const Scope* scope, + const std::string& argname) { + auto iarg_names = op_info->input_argnames(); + if (std::find(iarg_names.begin(), iarg_names.end(), argname) != + iarg_names.end()) { + auto inputs = op_info->Input(argname); + if (inputs.empty()) { + return false; + } + auto var_name = inputs.front(); + auto var = scope->FindVar(var_name); + return var != nullptr; + } else { + return false; + } + } + +} // namespace bm +} // namespace lite +} // namespace paddle diff --git a/lite/backends/bm/builder.h b/lite/backends/bm/builder.h new file mode 100644 index 0000000000000000000000000000000000000000..840d28e2c675148f2c249ad6330719e7e7bde747 --- /dev/null +++ b/lite/backends/bm/builder.h @@ -0,0 +1,34 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/target_wrapper.h" +#include "lite/core/tensor.h" + +namespace paddle { +namespace lite { +namespace bm { + +std::string UniqueName(const std::string& prefix); +bool HasInputArg(const OpInfo* op_info, const Scope* scope, const std::string& argname); + +} // namespace bm +} // namespace lite +} // namespace paddle diff --git a/lite/core/mir/fusion/conv_activation_fuse_pass.cc b/lite/core/mir/fusion/conv_activation_fuse_pass.cc index 0d11b47db6a7f767f8cd032877d8647b0872b8d4..675e8c9ca43f408b97b07a18aba0e9917ecc95c8 100644 --- a/lite/core/mir/fusion/conv_activation_fuse_pass.cc +++ b/lite/core/mir/fusion/conv_activation_fuse_pass.cc @@ -47,5 +47,5 @@ void ConvActivationFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_conv_activation_fuse_pass, paddle::lite::mir::ConvActivationFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kXPU)}) + .ExcludeTargets({TARGET(kXPU), TARGET(kBM)}) .BindKernel("conv2d"); diff --git a/lite/core/mir/fusion/conv_bn_fuse_pass.cc b/lite/core/mir/fusion/conv_bn_fuse_pass.cc index 5ab5f8c0a4797e51cce656de43883a68d4931e9b..f93c39b2327a12101aae5595c2a0733b19778e82 100644 --- a/lite/core/mir/fusion/conv_bn_fuse_pass.cc +++ b/lite/core/mir/fusion/conv_bn_fuse_pass.cc @@ -45,4 +45,4 @@ void ConvBNFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_conv_bn_fuse_pass, paddle::lite::mir::ConvBNFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kX86), TARGET(kXPU)}); + .ExcludeTargets({TARGET(kX86), TARGET(kXPU), TARGET(kBM)}); diff --git a/lite/core/mir/fusion/conv_elementwise_fuse_pass.cc b/lite/core/mir/fusion/conv_elementwise_fuse_pass.cc index b1b492ce030c7a46d8b23936c1661f3d743eb9cb..2021bdd3482663b823dd6c1dabdb11be5b5617e2 100644 --- a/lite/core/mir/fusion/conv_elementwise_fuse_pass.cc +++ b/lite/core/mir/fusion/conv_elementwise_fuse_pass.cc @@ -47,4 +47,4 @@ void ConvElementwiseFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_conv_elementwise_fuse_pass, paddle::lite::mir::ConvElementwiseFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kXPU)}); + .ExcludeTargets({TARGET(kXPU), TARGET(kBM)}); diff --git a/lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc b/lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc index e4391cd24287cafe457074733ba73208288c3375..97f6a2657f0f7ed8963529cdbec5aad00e763807 100644 --- a/lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc +++ b/lite/core/mir/fusion/elementwise_add_activation_fuse_pass.cc @@ -35,5 +35,5 @@ void ElementwiseAddActivationFusePass::Apply( REGISTER_MIR_PASS(lite_elementwise_add_activation_fuse_pass, paddle::lite::mir::ElementwiseAddActivationFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kXPU)}) + .ExcludeTargets({TARGET(kXPU), TARGET(kBM)}) .BindKernel("fusion_elementwise_add_activation"); diff --git a/lite/core/mir/fusion/fc_fuse_pass.cc b/lite/core/mir/fusion/fc_fuse_pass.cc index 7fc449219251bbd7e639e8092099f43fe8eca626..5b8e8563ba2e44c1c855cd3d4c6a9a08c06c826f 100644 --- a/lite/core/mir/fusion/fc_fuse_pass.cc +++ b/lite/core/mir/fusion/fc_fuse_pass.cc @@ -33,5 +33,5 @@ void FcFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_fc_fuse_pass, paddle::lite::mir::FcFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kXPU)}) + .ExcludeTargets({TARGET(kXPU), TARGET(kBM)}) .BindKernel("fc"); diff --git a/lite/core/mir/memory_optimize_pass.cc b/lite/core/mir/memory_optimize_pass.cc index 4f41ba4a601ae763e6fa48c0a98de238252ea7c2..de404130b55c8fbef8f5599bec521fd8a32d96f6 100644 --- a/lite/core/mir/memory_optimize_pass.cc +++ b/lite/core/mir/memory_optimize_pass.cc @@ -256,4 +256,4 @@ void MemoryOptimizePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(memory_optimize_pass, paddle::lite::mir::MemoryOptimizePass) .BindTargets({TARGET(kARM)}) - .ExcludeTargets({TARGET(kOpenCL), TARGET(kNPU), TARGET(kXPU)}); + .ExcludeTargets({TARGET(kOpenCL), TARGET(kNPU), TARGET(kXPU), TARGET(kBM)}); diff --git a/lite/core/mir/static_kernel_pick_pass.cc b/lite/core/mir/static_kernel_pick_pass.cc index 3384a15de91b097e070e67f4b178a4c8e113a922..c49e4497099c5f04a39bf91e70ca8f48900e7ba7 100644 --- a/lite/core/mir/static_kernel_pick_pass.cc +++ b/lite/core/mir/static_kernel_pick_pass.cc @@ -33,7 +33,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { kernel_pick_factors_.ConsiderTarget(); kernel_pick_factors_.ConsiderPrecision(); kernel_pick_factors_.ConsiderDataLayout(); - CHECK(kernel_pick_factors_.any_factor_considered()) << "kernel_pick_factors should be specified first"; CHECK(graph) << "graph not valid"; @@ -50,7 +49,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { << instruct.op_type(); VLOG(4) << "instruct.kernels().size():" << instruct.kernels().size(); for (auto&& kernel : instruct.kernels()) { - float score = KernelGrade(*kernel, graph->valid_places()); + float score = KernelGrade(instruct, *kernel, graph->valid_places()); VLOG(4) << "kernel->summary():" << kernel->summary() << " score:" << score; scored.emplace_back(score, std::move(kernel)); @@ -100,7 +99,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { instruct.ResetOp(update_desc, graph->valid_places()); scored.clear(); for (auto&& kernel : instruct.kernels()) { - float score = KernelGrade(*kernel, graph->valid_places()); + float score = KernelGrade(instruct, *kernel, graph->valid_places()); scored.emplace_back(score, std::move(kernel)); } std::sort(scored.begin(), scored.end(), KernelScoreCmp); @@ -115,6 +114,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { bool all_output_type_match = true; auto expect_output_type = out_type_int8 ? PRECISION(kInt8) : PRECISION(kFloat); + for (auto& arg_name : output_arguments) { const Type* out_arg_ty = candidate.second->GetOutputDeclType(arg_name); diff --git a/lite/core/mir/subgraph/CMakeLists.txt b/lite/core/mir/subgraph/CMakeLists.txt index 95b5fe5ae13e03940bda8d83fcfc252b4ca490ab..e237b80356a4a115582686250fdcca9a337d63c6 100644 --- a/lite/core/mir/subgraph/CMakeLists.txt +++ b/lite/core/mir/subgraph/CMakeLists.txt @@ -46,5 +46,11 @@ if(LITE_WITH_XPU) endif() endif() +if(LITE_WITH_BM) + lite_cc_library(bm_pass SRCS generate_bm_program_pass.cc + DEPS mir_pass types context ${mir_fusers} ${bm_bridges} ${bm_builder_libs} graph_op subgraph_pass) + list(APPEND subgraph_passes bm_pass) +endif() + set(subgraph_passes ${subgraph_passes} CACHE INTERNAL "subgraph_passes") message(STATUS "----> subgraph_passes: ${subgraph_passes}") diff --git a/lite/core/mir/subgraph/generate_bm_program_pass.cc b/lite/core/mir/subgraph/generate_bm_program_pass.cc index b5cdc749d34b9b6827e5bd411fef4215e450381e..af689030cc48d6a72e92d3d9fcef1fadc8e65ae2 100644 --- a/lite/core/mir/subgraph/generate_bm_program_pass.cc +++ b/lite/core/mir/subgraph/generate_bm_program_pass.cc @@ -22,120 +22,66 @@ #include "lite/core/mir/pass_registry.h" #include "lite/core/mir/pattern_matcher.h" +#include "lite/kernels/bm/bridges/paddle_use_bm_bridges.h" +#include "lite/kernels/bm/bridges/registry.h" +#include "bmcompiler_if.h" +#include "bmlog.hpp" + namespace paddle { namespace lite { namespace mir { namespace subgraph { -std::shared_ptr GenerateBMProgramPass::CvtVarNode( +std::shared_ptr GenerateBMProgramPass::CvtVarNode( lite::mir::Node* var_node, const Scope* scope) { - CHECK(var_node->IsArg()); - const auto& arg = var_node->AsArg(); - VLOG(4) << "Convert var node " << arg.name; - - auto* var = scope->FindVar(arg.name); - CHECK(var); - auto* tensor = var->GetMutable(); - CHECK(tensor); - auto dims = tensor->dims(); - if (arg.is_weight) { - auto wgt = std::make_shared(arg.name); - LOG(INFO) << " Convert const var node " << arg.name; - VLOG(4) << dims; - wgt->set_attr_value(lite::npu::CvtTensor(tensor)); - return wgt; - } else { - CHECK_EQ(dims.size(), 4); - LOG(INFO) << "[NPU] Convert data var node " << arg.name; - LOG(INFO) << dims; - // TODO(xxx): support more types and dims size - ge::TensorDesc desc(ge::Shape(dims.Vectorize()), - ge::Format::FORMAT_NCHW, - ge::DataType::DT_FLOAT); - - // auto size = desc.GetShape().GetShapeSize(); - // ge::TensorUtils::SetSize(desc, size*sizeof(float)); - // ge::TensorUtils::SetRealDimCnt(desc, 4); - auto data = std::make_shared(arg.name); - data->update_input_desc_x(desc); - return data; - } return nullptr; } -void GenerateNPUProgramPass::CvtAllOpNodes( +void GenerateBMProgramPass::CvtAllOpNodes( const std::vector& nodes2cvt, - lite::kernels::npu::bridges::node_map_type* converted_vars) { - const auto& bridges = lite::kernels::npu::bridges::Factory::Instance(); - const auto& cvtfunc_map = bridges.AllFunctions(); - // return record all converted vars - // op node's inputs must be found in converted_vars - for (auto& node : nodes2cvt) { - lite::kernels::npu::bridges::node_map_type node_inputs; - auto& stmt = node->AsStmt(); - for (auto& var_node : node->inlinks) { - auto& arg = var_node->AsArg(); - // weight should be handled in the converter, so skip here - if (arg.is_weight) { - continue; - } - auto var_name = arg.name; - if (!converted_vars->count(var_name)) { - converted_vars->insert( - std::make_pair(var_name, CvtVarNode(var_node, stmt.op()->scope()))); - } - node_inputs.insert(*converted_vars->find(var_name)); + lite::kernels::bm::bridges::node_map_type* converted_vars) { + + const auto& bridges = lite::kernels::bm::bridges::Factory::Instance(); + const auto& cvtfunc_map = bridges.AllFunctions(); + + lite::kernels::bm::bridges::graph_ctx_type ctx; + ctx.bm_compiler_handle = create_bmcompiler("BM1684"); + CHECK(ctx.bm_compiler_handle != nullptr); + + //bmlog::init("paddle_bitmain"); + //bmlog::set_v(3); + + for (auto& node : nodes2cvt) { + lite::kernels::bm::bridges::node_map_type node_inputs; + auto& stmt = node->AsStmt(); + + for (auto& var_node : node->inlinks) { + auto& arg = var_node->AsArg(); + // weight should be handled in the converter, so skip here + if (arg.is_weight) { + continue; + } + auto var_name = arg.name; + if (!converted_vars->count(var_name)) { + converted_vars->insert(std::make_pair(var_name, var_name)); + } + node_inputs.insert(*converted_vars->find(var_name)); + } + + auto node_outputs = cvtfunc_map.at(stmt.op_type())(stmt.op(), &ctx, node_inputs); + converted_vars->insert(node_outputs.begin(), node_outputs.end()); } - auto node_outputs = cvtfunc_map.at(stmt.op_type())(stmt.op(), node_inputs); - converted_vars->insert(node_outputs.begin(), node_outputs.end()); - } -} - -std::string GenerateNPUProgramPass::BuildNPUGraph( - const std::unordered_set& op_nodes, - const std::unordered_set& in_data_vars, - const std::unordered_set& out_data_vars, - int sub_id) { - auto ordered_nodes = GetTopologicalOrder(op_nodes); - lite::kernels::npu::bridges::node_map_type converted_vars; - CvtAllOpNodes(ordered_nodes, &converted_vars); - - std::vector in_var_names; - std::vector out_var_names; - std::vector inputs; - std::vector outputs; - for (auto i : in_data_vars) { - auto argname = i->AsArg().name; - in_var_names.push_back(argname); - inputs.push_back(*converted_vars.at(argname)); - } - for (auto i : out_data_vars) { - auto argname = i->AsArg().name; - out_var_names.push_back(argname); - outputs.push_back(*converted_vars.at(argname)); - } - - std::string weight_var_name = "graph" + std::to_string(sub_id) + "_weights"; - auto any_op = (*op_nodes.begin())->AsStmt().op(); - auto weight = any_op->scope()->Var(weight_var_name)->GetMutable(); - weight->set_persistable(true); - weight->set_precision(PRECISION(kInt8)); - // Compiling IR graph to NPU model and store mode data into weight tensor with - // persistable=true, Sothat the model parser can recognize it and save it to - // param files - if (!lite::npu::BuildModel(inputs, outputs, weight)) { - LOG(WARNING) << "[NPU] Build NPU graph failed (subgraph=" << sub_id << ")"; - throw std::runtime_error("Build NPU graph failed."); - } - LOG(INFO) << "[NPU] Build NPU graph success (subgraph=" << sub_id << ")"; - return weight_var_name; + + std::string net_name = "paddle_bitmain"; + __bmcompile_opt(ctx.bm_compiler_handle, const_cast(net_name.c_str()), 2); + finish_bmcompiler(ctx.bm_compiler_handle); } void GenerateBMProgramPass::GenSubgraph( const std::unique_ptr& graph, const std::unordered_set& op_nodes, int sub_id) { -#if 0 + std::unordered_set in_data_vars; std::unordered_set in_wgt_vars; std::unordered_set out_data_vars; @@ -143,27 +89,31 @@ void GenerateBMProgramPass::GenSubgraph( FindInputOutputVars( op_nodes, &in_data_vars, &in_wgt_vars, &out_data_vars, &out_unused_vars); - auto weight_var_name = - BuildNPUGraph(op_nodes, in_data_vars, out_data_vars, sub_id); - - auto any_op = (*op_nodes.begin())->AsStmt().op(); - InsertNewNode(graph, - weight_var_name, - any_op->scope(), - any_op->valid_places(), - in_data_vars, - in_wgt_vars, - out_data_vars, - out_unused_vars); - - auto nodes2rm = GetNode2rm( - op_nodes, {in_data_vars, in_wgt_vars, out_data_vars, out_unused_vars}); - - GraphSafeRemoveNodes(graph.get(), nodes2rm); -#endif + auto ordered_nodes = GetTopologicalOrder(op_nodes); + lite::kernels::bm::bridges::node_map_type converted_vars; + CvtAllOpNodes(ordered_nodes, &converted_vars); } void GenerateBMProgramPass::Apply(const std::unique_ptr& graph) { + const auto& bridges = lite::kernels::bm::bridges::Factory::Instance(); + const auto& op_map = bridges.AllFunctions(); + std::vector supported_op_types; + for (auto& i : op_map) { + //LOG(INFO) << "[BM] Supported type: " << i.first; + supported_op_types.push_back(i.first); + } + + int num_subgraph = FuseSubgraph(graph, supported_op_types); + InferOnce(graph); + auto op_nodes_all = ClassifySubgraph(graph); + CHECK_EQ(op_nodes_all.size(), num_subgraph); + + int id = 1; + for (auto& op_nodes : op_nodes_all) { + //LOG(INFO) << "[BM] Converting Subgraph " << id; + GenSubgraph(graph, op_nodes.second, id); + id++; + } } diff --git a/lite/core/mir/subgraph/generate_bm_program_pass.h b/lite/core/mir/subgraph/generate_bm_program_pass.h index 36f12000f64534afc340589d7acc19410f4fcfd4..47077f85146d318da641e101233321d43dbcf41f 100644 --- a/lite/core/mir/subgraph/generate_bm_program_pass.h +++ b/lite/core/mir/subgraph/generate_bm_program_pass.h @@ -24,6 +24,8 @@ #include "lite/core/mir/pass.h" #include "lite/core/mir/subgraph/subgraph_program_pass.h" +#include "lite/kernels/bm/bridges/registry.h" + namespace paddle { namespace lite { namespace mir { @@ -40,9 +42,9 @@ class GenerateBMProgramPass : public SubgraphProgramPass { // nodes2cvt: op nodes to convert // return cvted_vars: converted var nodes void CvtAllOpNodes(const std::vector& nodes2cvt, - lite::kernels::npu::bridges::node_map_type* cvted_vars); + lite::kernels::bm::bridges::node_map_type* cvted_vars); - std::shared_ptr CvtVarNode(lite::mir::Node* var_node, + std::shared_ptr CvtVarNode(lite::mir::Node* var_node, const Scope* scope); std::string BuildGraph(const std::unordered_set& op_nodes, @@ -50,6 +52,9 @@ class GenerateBMProgramPass : public SubgraphProgramPass { const std::unordered_set& out_data_vars, int sub_id); + void GenSubgraph(const std::unique_ptr& graph, + const std::unordered_set& op_nodes, + int sub_id); private: std::vector insts_; }; diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h index 38c9d0e29d5766dec21de76b740c1032ad44da7e..5737e64a269a749db9d4a2ba4c68f36ca0862a41 100644 --- a/lite/core/optimizer.h +++ b/lite/core/optimizer.h @@ -33,6 +33,9 @@ #ifdef LITE_WITH_XPU #include "lite/core/mir/subgraph/generate_xpu_program_pass.h" #endif +#ifdef LITE_WITH_BM +#include "lite/core/mir/subgraph/generate_bm_program_pass.h" +#endif namespace paddle { namespace lite { @@ -59,7 +62,8 @@ class Optimizer { SpecifyKernelPickTactic(kernel_pick_factor); InitTargetTypeTransformPass(); - if (passes.empty()) { + //if (passes.empty()) { + if (0) { std::vector passes_local{ {"lite_quant_dequant_fuse_pass", // "lite_conv_elementwise_fuse_pass", // conv-elemwise-bn @@ -125,7 +129,9 @@ class Optimizer { // of input tensors. so GenRuntimeProgram() must be called after the shapes // of input tensors are determined. std::vector subgraph_passes{"generate_npu_program_pass", - "generate_xpu_program_pass"}; + "generate_xpu_program_pass", + "generate_bm_program_pass"}; + RunPasses(subgraph_passes); auto pass = mir::PassManager::Global().LookUp( diff --git a/lite/kernels/bm/bridges/CMakeLists.txt b/lite/kernels/bm/bridges/CMakeLists.txt index 690b38d2e1d1b93ed9f636ea5d93839eec2a9d45..1a09f9bfe7b95bfa289a442cd36c31aa247c3a56 100644 --- a/lite/kernels/bm/bridges/CMakeLists.txt +++ b/lite/kernels/bm/bridges/CMakeLists.txt @@ -1,6 +1,6 @@ lite_cc_library(bm_bridge_registry SRCS registry.cc) -set(bm_bridge_deps bm_bridge_registry op) +set(bm_bridge_deps bm_bridge_registry bm_builder op) lite_cc_library(bm_bridge_act_op SRCS act_op.cc DEPS ${bm_bridge_deps}) lite_cc_library(bm_bridge_conv_op SRCS conv_op.cc DEPS ${bm_bridge_deps}) @@ -9,6 +9,7 @@ lite_cc_library(bm_bridge_pool_op SRCS pool_op.cc DEPS ${bm_bridge_deps}) lite_cc_library(bm_bridge_softmax_op SRCS softmax_op.cc DEPS ${bm_bridge_deps}) lite_cc_library(bm_bridge_mul_op SRCS mul_op.cc DEPS ${bm_bridge_deps}) lite_cc_library(bm_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS ${bm_bridge_deps}) +lite_cc_library(bm_bridge_scale_op SRCS scale_op.cc DEPS ${bm_bridge_deps}) set(bm_bridges bm_bridge_registry @@ -19,5 +20,6 @@ set(bm_bridges bm_bridge_softmax_op bm_bridge_mul_op bm_bridge_batch_norm_op + bm_bridge_scale_op CACHE INTERNAL "bm_bridges") diff --git a/lite/kernels/bm/bridges/act_op.cc b/lite/kernels/bm/bridges/act_op.cc index 5779c39d7188c86ebbf983f5f81d36545727e026..3416fc0ae890649737336a8efe5f2d5b763d0d28 100644 --- a/lite/kernels/bm/bridges/act_op.cc +++ b/lite/kernels/bm/bridges/act_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +21,50 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type ActConverter(const std::shared_ptr op, +node_map_type ActConverter(const std::shared_ptr act_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + + auto scope = act_op->scope(); + auto op_info = act_op->op_info(); + auto op_type = op_info->Type(); + + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + + int i_x_shape_data[x_dims.size()]; + int i_output_shape_data[output_dims.size()]; + + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + CHECK(op_type == "relu"); + add_relu_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_shape_data), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + 0.f, + -1.f); + + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges diff --git a/lite/kernels/bm/bridges/batch_norm_op.cc b/lite/kernels/bm/bridges/batch_norm_op.cc index 015b5fb153bc20220688367841f4521acd38df05..6d8f254290e9d4b289635e31995fcbb0dfc8d406 100644 --- a/lite/kernels/bm/bridges/batch_norm_op.cc +++ b/lite/kernels/bm/bridges/batch_norm_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +22,98 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type BatchNormConverter(const std::shared_ptr op, +node_map_type BatchNormConverter(const std::shared_ptr bn_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + + auto scope = bn_op->scope(); + auto op_info = bn_op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_name = lite::bm::UniqueName(op_type); + + // input + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + + auto scale_var_name = op_info->Input("Scale").front(); + auto scale = scope->FindVar(scale_var_name)->GetMutable(); + + auto bias_var_name = op_info->Input("Bias").front(); + auto bias = scope->FindVar(bias_var_name)->GetMutable(); + + auto mean_var_name = op_info->Input("Mean").front(); + auto mean = scope->FindVar(mean_var_name)->GetMutable(); + + auto variance_var_name = op_info->Input("Variance").front(); + auto variance = scope->FindVar(variance_var_name)->GetMutable(); + + // output + auto output_var_name = op_info->Output("Y").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + auto epsilon = op_info->GetAttr("epsilon"); + auto unique_bn_out_name = lite::bm::UniqueName("batch_norm_out"); + + add_batchnorm_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_shape_data), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(unique_bn_out_name.c_str()), + static_cast(unique_op_name.c_str()), + static_cast(mean->mutable_data()), + static_cast(variance->mutable_data()), + 1.f, + epsilon, + 0, + 1); + + const int input_num = 1; + int **shape = new int *[input_num]; + int *dim = new int[input_num]; + const char **name = new const char *[input_num]; + + name[0] = static_cast(unique_bn_out_name.c_str()); + dim[0] = output_dims.size(); + shape[0] = i_output_shape_data; + + auto unique_scale_name = lite::bm::UniqueName("scale"); + add_scale_layer(graph_ctx->bm_compiler_handle, + input_num, + shape, + dim, + name, + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + static_cast(unique_scale_name.c_str()), + static_cast(scale->mutable_data()), + static_cast(bias->mutable_data()), + 1, + 1, + 0); + + delete [] shape; + delete [] name; + delete [] dim; + + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges diff --git a/lite/kernels/bm/bridges/conv_op.cc b/lite/kernels/bm/bridges/conv_op.cc index c996293923bb486b80ef808663b5d6c42c3496c9..59213b3ba3af6655df566c173895492410473a0a 100644 --- a/lite/kernels/bm/bridges/conv_op.cc +++ b/lite/kernels/bm/bridges/conv_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,10 +22,84 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type ConvConverter(const std::shared_ptr op, +node_map_type ConvConverter(const std::shared_ptr conv_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { // output converted nodes node_map_type output_nodes; + + auto scope = conv_op->scope(); + auto op_info = conv_op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_name = lite::bm::UniqueName(op_type); + + auto input_var_name = op_info->Input("Input").front(); + auto input = scope->FindVar(input_var_name)->GetMutable(); + auto input_dims = input->dims(); + auto output_var_name = op_info->Output("Output").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + auto filter_var_name = op_info->Input("Filter").front(); + auto filter = scope->FindVar(filter_var_name)->GetMutable(); + auto filter_dims = filter->dims(); + + CHECK(input_dims.size() == 4); + CHECK(output_dims.size() == 4); + CHECK(filter_dims.size() == 4); + + bool has_bias = lite::bm::HasInputArg(op_info, scope, "Bias"); + float* bias_data = nullptr; + if (has_bias) { + auto bias_var_name = op_info->Input("Bias").front(); + auto* bias = scope->FindVar(bias_var_name)->GetMutable(); + bias_data = static_cast(bias->mutable_data()); + } + + const long int* input_shape_data = const_cast(&input_dims.data()[0]); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + + int i_input_shape_data[input_dims.size()]; + int i_output_shape_data[output_dims.size()]; + + for (size_t i = 0; i < input_dims.size(); i++) { + i_input_shape_data[i] = static_cast(input_shape_data[i]); + } + + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + const float* filter_data = const_cast(filter->mutable_data()); + + auto groups = op_info->GetAttr("groups"); + auto paddings = op_info->GetAttr>("paddings"); + auto strides = op_info->GetAttr>("strides"); + auto dilations = op_info->GetAttr>("dilations"); + + add_conv_layer(graph_ctx->bm_compiler_handle, + const_cast(i_input_shape_data), + input_dims.size(), + static_cast(input_var_name.c_str()), + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + static_cast(unique_op_name.c_str()), + filter_data, + bias_data, + filter_dims.data()[2], + filter_dims.data()[3], + groups, + paddings[0], + paddings[0], + paddings[1], + paddings[1], + strides[0], + strides[1], + dilations[0], + dilations[1], + static_cast(has_bias)); + + output_nodes[output_var_name] = output_var_name; return output_nodes; } diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc index 31f0c73f2026eb5f583f94b17d144f1713a30175..c38e94f7d922dd60631fc7ec7b85bd6c0dd11937 100644 --- a/lite/kernels/bm/bridges/elementwise_ops.cc +++ b/lite/kernels/bm/bridges/elementwise_ops.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "bmcompiler_if.h" +#include "bmcompiler_if_lite.h" namespace paddle { namespace lite { @@ -20,11 +22,118 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type ElementwiseConverter(const std::shared_ptr op, +node_map_type ElementwiseConverter(const std::shared_ptr elementwise_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + auto scope = elementwise_op->scope(); + auto op_info = elementwise_op->op_info(); + auto op_type = op_info->Type(); + + // input + const int input_num = 2; + int **shape = new int *[input_num]; + int *dim = new int[input_num]; + const char **name = new const char *[input_num]; + + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + name[0] = static_cast(x_var_name.c_str()); + dim[0] = x_dims.size(); + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + shape[0] = i_x_shape_data; + + auto y_var_name = op_info->Input("Y").front(); + + auto y = scope->FindVar(y_var_name)->GetMutable(); + auto y_dims = y->dims(); + name[1] = static_cast(y_var_name.c_str()); + dim[1] = y_dims.size(); + const long int* y_shape_data = const_cast(&y_dims.data()[0]); + int i_y_shape_data[y_dims.size()]; + for (size_t i = 0; i < y_dims.size(); i++) { + i_y_shape_data[i] = static_cast(y_shape_data[i]); + } + shape[1] = i_y_shape_data; + + bool y_is_const = input_nodes.find(y_var_name) == input_nodes.end(); + + // output + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + if (y_is_const) { + CHECK(op_type == "elementwise_add"); + } + + int op_code{-1}; + float coeff[2] = {1.f, 1.f}; + + if (op_type == "elementwise_mul") { + op_code = 0; + } else if (op_type == "elementwise_add") { + op_code = 1; + } else if(op_type == "elementwise_sub") { + op_code = 1; + coeff[1] = -1.f; + } else { + LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type; + } + + if (!y_is_const) { + add_eltwise_layer(graph_ctx->bm_compiler_handle, + input_num, + shape, + dim, + name, + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + op_code, + coeff); + } else { + const float* y_data = const_cast(y->mutable_data()); + bm_add_const_tensor(graph_ctx->bm_compiler_handle, + name[0], + shape[0], + dim[0], + static_cast(0), + static_cast(y_data)); + + + add_binary_layer_v2(graph_ctx->bm_compiler_handle, + name[0], + shape[0], + dim[0], + 0, + nullptr, + name[0], + shape[0], + dim[0], + 0, + nullptr, + static_cast(output_var_name.c_str()), + 0); + } + + delete [] shape; + delete [] name; + delete [] dim; + + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges @@ -33,4 +142,4 @@ node_map_type ElementwiseConverter(const std::shared_ptr op, } // namespace lite } // namespace paddle -REGISTER_BM_BRIDGE(elementwise, paddle::lite::kernels::bm::bridges::ElementwiseConverter); +REGISTER_BM_BRIDGE(elementwise_add, paddle::lite::kernels::bm::bridges::ElementwiseConverter); diff --git a/lite/kernels/bm/bridges/mul_op.cc b/lite/kernels/bm/bridges/mul_op.cc index 3b85731640c8ee9c367a475ae5376aecb5476573..600ee9f1f7e33afc15d5bcfaaab91a43eb835a73 100644 --- a/lite/kernels/bm/bridges/mul_op.cc +++ b/lite/kernels/bm/bridges/mul_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +22,77 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type MulConverter(const std::shared_ptr op, +node_map_type MulConverter(const std::shared_ptr mul_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + + auto scope = mul_op->scope(); + auto op_info = mul_op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_name = lite::bm::UniqueName(op_type); + + // only support y is const + + // input + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + + // add reshape layer + int i_x_reshape_shape_data[2]; + for (size_t i = 0; i < 2; i++) { + i_x_reshape_shape_data[i] = static_cast(x_shape_data[i]); + } + int reshape_param[] = {0, -1}; + auto unique_op_reshape_name = lite::bm::UniqueName(op_type + "_reshape"); + add_reshape_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_shape_data), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(i_x_reshape_shape_data), + 2, + static_cast(unique_op_reshape_name.c_str()), + const_cast(reshape_param)); + + auto y_var_name = op_info->Input("Y").front(); + auto y = scope->FindVar(y_var_name)->GetMutable(); + auto y_dims = y->dims(); + + // output + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + add_fc_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_reshape_shape_data), + 2, + static_cast(unique_op_reshape_name.c_str()), + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + static_cast(unique_op_name.c_str()), + i_x_reshape_shape_data[1], + i_output_shape_data[1], + static_cast(y->mutable_data()), + nullptr, + 0, + 0); + + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges diff --git a/lite/kernels/bm/bridges/paddle_use_bm_bridges.h b/lite/kernels/bm/bridges/paddle_use_bm_bridges.h new file mode 100644 index 0000000000000000000000000000000000000000..dce674a94cf77a060bb829ec537c56679e191d4d --- /dev/null +++ b/lite/kernels/bm/bridges/paddle_use_bm_bridges.h @@ -0,0 +1,26 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "lite/kernels/bm/bridges/registry.h" + +USE_BM_BRIDGE(relu); +USE_BM_BRIDGE(conv2d); +USE_BM_BRIDGE(elementwise_add); +USE_BM_BRIDGE(pool2d); +USE_BM_BRIDGE(softmax); +USE_BM_BRIDGE(mul); +USE_BM_BRIDGE(batch_norm); +USE_BM_BRIDGE(scale); diff --git a/lite/kernels/bm/bridges/pool_op.cc b/lite/kernels/bm/bridges/pool_op.cc index 81b45832926c53d1eaee9fcc6af9499e9dcaa870..7db6d9871a64270f2cdc4988e7289acedb8d4539 100644 --- a/lite/kernels/bm/bridges/pool_op.cc +++ b/lite/kernels/bm/bridges/pool_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +22,81 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type PoolConverter(const std::shared_ptr op, +node_map_type PoolConverter(const std::shared_ptr pool_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + auto scope = pool_op->scope(); + auto op_info = pool_op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_name = lite::bm::UniqueName(op_type); + + // input + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + + // output + int *shape[1]; + int dim[1]; + const char *name[1]; + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + shape[0] = i_output_shape_data; + name[0] = static_cast(output_var_name.c_str()); + dim[0] = output_dims.size(); + + auto pooling_type = op_info->GetAttr("pooling_type"); + CHECK(pooling_type == "max" || pooling_type == "avg"); + + auto ksize = op_info->GetAttr>("ksize"); + auto paddings = op_info->GetAttr>("paddings"); + auto strides = op_info->GetAttr>("strides"); + auto global_pooling = op_info->GetAttr("global_pooling"); + auto ceil_mode = op_info->GetAttr("ceil_mode"); + + bool average_exclusive = false; + if (pooling_type == "avg") { + average_exclusive = op_info->GetAttr("exclusive"); + } + + add_pooling_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_shape_data), + x_dims.size(), + static_cast(x_var_name.c_str()), + 1, + shape, + dim, + name, + ksize[0], + ksize[1], + paddings[0], + paddings[0], + paddings[1], + paddings[1], + strides[0], + strides[1], + (ksize[0] > 1 && ksize[1] > 1) && pooling_type == "max" ? 0 : 1, + static_cast(average_exclusive), + static_cast(global_pooling), + static_cast(ceil_mode), + static_cast(unique_op_name.c_str()), + nullptr); + + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges diff --git a/lite/kernels/bm/bridges/registry.h b/lite/kernels/bm/bridges/registry.h index 73101aebb2aa2d9209324236083ef3b0adf0a0f4..cf12c58e16fc80b14c4f748d1e21c495715a19e4 100644 --- a/lite/kernels/bm/bridges/registry.h +++ b/lite/kernels/bm/bridges/registry.h @@ -28,11 +28,17 @@ namespace kernels { namespace bm { namespace bridges { +class graph_ctx_type{ + public: + void* bm_compiler_handle{nullptr}; +}; + // var_name, bm node point using node_map_type = - std::unordered_map>; + std::unordered_map; using func_type = std::function, + graph_ctx_type*, const node_map_type&)>; using cvt_map_type = std::unordered_map; class Factory { diff --git a/lite/kernels/bm/bridges/scale_op.cc b/lite/kernels/bm/bridges/scale_op.cc index 39c6d602183b13eef963433ec07bacac51fc96ee..1a4f4ecad4217a14c17579c1d52db8d4d1f5b242 100644 --- a/lite/kernels/bm/bridges/scale_op.cc +++ b/lite/kernels/bm/bridges/scale_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +22,73 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type ScaleConverter(const std::shared_ptr op, +node_map_type ScaleConverter(const std::shared_ptr scale_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + + auto scope = scale_op->scope(); + auto op_info = scale_op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_name = lite::bm::UniqueName(op_type); + + // input + const int input_num = 1; + int **shape = new int *[input_num]; + int *dim = new int[input_num]; + const char **name = new const char *[input_num]; + + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + name[0] = static_cast(x_var_name.c_str()); + dim[0] = x_dims.size(); + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + shape[0] = i_x_shape_data; + + // output + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + auto scale = op_info->GetAttr("scale"); + auto bias = op_info->GetAttr("bias"); + auto bias_after_scale = op_info->GetAttr("bias_after_scale"); + if (bias_after_scale) { + bias *= scale; + } + + add_scale_layer(graph_ctx->bm_compiler_handle, + input_num, + shape, + dim, + name, + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + static_cast(unique_op_name.c_str()), + &scale, + &bias, + 1, + 1, + 0); + + delete [] shape; + delete [] dim; + delete [] name; + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges diff --git a/lite/kernels/bm/bridges/softmax_op.cc b/lite/kernels/bm/bridges/softmax_op.cc index e1f99b02ab3621c687f2b771bed541fc7f4239c6..f7693ede91c715e16111b1c9ad19edfc6a9176eb 100644 --- a/lite/kernels/bm/bridges/softmax_op.cc +++ b/lite/kernels/bm/bridges/softmax_op.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "lite/kernels/bm/bridges/registry.h" +#include "lite/backends/bm/builder.h" +#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -20,11 +22,54 @@ namespace kernels { namespace bm { namespace bridges { -node_map_type SoftmaxConverter(const std::shared_ptr op, +node_map_type SoftmaxConverter(const std::shared_ptr softmax_op, + graph_ctx_type* graph_ctx, const node_map_type& input_nodes) { - // output converted nodes - node_map_type output_nodes; - return output_nodes; + // output converted nodes + node_map_type output_nodes; + auto scope = softmax_op->scope(); + auto op_info = softmax_op->op_info(); + + // input + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + const long int* x_shape_data = const_cast(&x_dims.data()[0]); + int i_x_shape_data[x_dims.size()]; + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_shape_data[i]); + } + + // output + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + const long int* output_shape_data = const_cast(&output_dims.data()[0]); + int i_output_shape_data[output_dims.size()]; + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_shape_data[i]); + } + + auto axis = op_info->GetAttr("axis"); + if (axis < 0) { + axis += x_dims.size(); + } + + int outer_num = x_dims.Slice(0, axis).production(); + int inner_num = x_dims.Slice(axis + 1, x_dims.size()).production(); + + add_softmax_layer(graph_ctx->bm_compiler_handle, + const_cast(i_x_shape_data), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(i_output_shape_data), + output_dims.size(), + static_cast(output_var_name.c_str()), + inner_num, + outer_num, + x_dims[axis]); + output_nodes[output_var_name] = output_var_name; + return output_nodes; } } // namespace bridges