提交 662e4d7c 编写于 作者: C cen.li

* change to bridge way

* fix code_style test=develop
上级 87271863
...@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) { ...@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) {
//DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); //DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", valid_places); predictor.Build(FLAGS_model_dir, "", "", valid_places);
#if 0
auto* input_tensor = predictor.GetInput(0); auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224}))); input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>(); auto* data = input_tensor->mutable_data<float>();
...@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) { ...@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) {
fs >> data[i]; fs >> data[i];
} }
} }
for (int i = 0; i < FLAGS_warmup; ++i) { for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run(); predictor.Run();
} }
...@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) { ...@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) {
} }
} }
LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg; LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
#endif
} }
TEST(ResNet50, test_bm) { TEST(ResNet50, test_bm) {
......
...@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM) ...@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
return() return()
endif() endif()
lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs}) lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc bm_context.cc DEPS ${bm_runtime_libs})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/context.h"
#include "bmcompiler_if.h"
namespace paddle {
namespace lite {
static const char* CHIP_NAME = "BM1684";
void BMContext::InitOnce() {
compiler_handle_ = create_bmcompiler(CHIP_NAME);
CHECK(NULL != compiler_handle_);
}
} // namespace lite
} // namespace paddle
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <map> #include <map>
#include "lite/backends/bm/target_wrapper.h" #include "lite/backends/bm/target_wrapper.h"
#include "bmlib_runtime.h" #include "bmlib_runtime.h"
#include "bmcompiler_if.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) { ...@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) {
if (g_bm_handles.find(id) == g_bm_handles.end()) { if (g_bm_handles.find(id) == g_bm_handles.end()) {
bm_handle_t bm_handle; bm_handle_t bm_handle;
bm_dev_request(&bm_handle, id); bm_status_t ret = bm_dev_request(&bm_handle, id);
CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: " << (int)ret;
g_bm_handles.insert(std::pair<int, bm_handle_t>(id, bm_handle)); g_bm_handles.insert(std::pair<int, bm_handle_t>(id, bm_handle));
} }
return; return;
...@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) { ...@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) {
void* TargetWrapperBM::Malloc(size_t size) { void* TargetWrapperBM::Malloc(size_t size) {
void* ptr{}; void* ptr{};
if (g_bm_handles.find(g_current_device_id) == g_bm_handles.end()) {
SetDevice(g_current_device_id);
}
bm_handle_t bm_handle = g_bm_handles.at(g_current_device_id); bm_handle_t bm_handle = g_bm_handles.at(g_current_device_id);
bm_device_mem_t* p_mem = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t)); bm_device_mem_t* p_mem = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t));
bm_malloc_device_byte(bm_handle, p_mem, size); bm_malloc_device_byte(bm_handle, p_mem, size);
......
...@@ -96,11 +96,17 @@ class Context<TargetType::kBM> { ...@@ -96,11 +96,17 @@ class Context<TargetType::kBM> {
Context() {} Context() {}
explicit Context(const BMContext& ctx); explicit Context(const BMContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler // NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {} void InitOnce();
void CopySharedTo(BMContext* ctx) {} void CopySharedTo(BMContext* ctx) {}
std::string name() const { return "BMContext"; } std::string name() const { return "BMContext"; }
}; void* compiler_handle() {
return compiler_handle_;
}
private:
void* compiler_handle_{nullptr};
};
#endif #endif
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
...@@ -340,7 +346,6 @@ class ContextScheduler { ...@@ -340,7 +346,6 @@ class ContextScheduler {
std::unique_ptr<KernelContext> NewContext(TargetType target) { std::unique_ptr<KernelContext> NewContext(TargetType target) {
std::unique_ptr<KernelContext> ctx(new KernelContext); std::unique_ptr<KernelContext> ctx(new KernelContext);
LOG(INFO) << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa " << int(target) << " " << int(TARGET(kBM));
switch (target) { switch (target) {
case TARGET(kHost): case TARGET(kHost):
kernel_contexts_[TargetType::kHost].As<HostContext>().CopySharedTo( kernel_contexts_[TargetType::kHost].As<HostContext>().CopySharedTo(
......
...@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) { ...@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
data = TargetWrapper<TARGET(kFPGA)>::Malloc(size); data = TargetWrapper<TARGET(kFPGA)>::Malloc(size);
break; break;
#endif // LITE_WITH_OPENCL #endif // LITE_WITH_OPENCL
#ifdef LITE_WITH_BM
case TargetType::kBM:
data = TargetWrapper<TARGET(kBM)>::Malloc(size);
break;
#endif
default: default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target); LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
} }
...@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) { ...@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
TargetWrapper<TARGET(kFPGA)>::Free(data); TargetWrapper<TARGET(kFPGA)>::Free(data);
break; break;
#endif // LITE_WITH_CUDA #endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
case TargetType::kBM:
TargetWrapper<TARGET(kBM)>::Free(data);
break;
#endif
default: default:
LOG(FATAL) << "Unknown type"; LOG(FATAL) << "Unknown type";
} }
...@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) { ...@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
dst, src, size, IoDirection::DtoD); dst, src, size, IoDirection::DtoD);
break; break;
#endif #endif
#ifdef LITE_WITH_BM
case TargetType::kBM:
TargetWrapper<TARGET(kBM)>::MemcpySync(
dst, src, size, IoDirection::DtoD);
break;
#endif
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
case TargetType::kOpenCL: case TargetType::kOpenCL:
TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD); TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD);
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
#include "lite/backends/cuda/target_wrapper.h" #include "lite/backends/cuda/target_wrapper.h"
#endif // LITE_WITH_CUDA #endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
#include "lite/backends/bm/target_wrapper.h"
#endif // LITE_WITH_BM
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) { ...@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
case TARGET(kFPGA): case TARGET(kFPGA):
TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir); TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir);
break; break;
#endif
#ifdef LITE_WITH_BM
case TARGET(kBM):
TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, dir);
break;
#endif #endif
} }
} }
......
...@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
kernel_pick_factors_.ConsiderTarget(); kernel_pick_factors_.ConsiderTarget();
kernel_pick_factors_.ConsiderPrecision(); kernel_pick_factors_.ConsiderPrecision();
kernel_pick_factors_.ConsiderDataLayout(); kernel_pick_factors_.ConsiderDataLayout();
CHECK(kernel_pick_factors_.any_factor_considered()) CHECK(kernel_pick_factors_.any_factor_considered())
<< "kernel_pick_factors should be specified first"; << "kernel_pick_factors should be specified first";
CHECK(graph) << "graph not valid"; CHECK(graph) << "graph not valid";
...@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
bool all_output_type_match = true; bool all_output_type_match = true;
auto expect_output_type = auto expect_output_type =
out_type_int8 ? PRECISION(kInt8) : PRECISION(kFloat); out_type_int8 ? PRECISION(kInt8) : PRECISION(kFloat);
for (auto& arg_name : output_arguments) { for (auto& arg_name : output_arguments) {
const Type* out_arg_ty = const Type* out_arg_ty =
candidate.second->GetOutputDeclType(arg_name); candidate.second->GetOutputDeclType(arg_name);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/mir/subgraph/generate_bm_program_pass.h"
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "lite/core/mir/graph_visualize_pass.h"
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/pattern_matcher.h"
namespace paddle {
namespace lite {
namespace mir {
namespace subgraph {
std::shared_ptr<ge::Operator> GenerateBMProgramPass::CvtVarNode(
lite::mir::Node* var_node, const Scope* scope) {
CHECK(var_node->IsArg());
const auto& arg = var_node->AsArg();
VLOG(4) << "Convert var node " << arg.name;
auto* var = scope->FindVar(arg.name);
CHECK(var);
auto* tensor = var->GetMutable<lite::Tensor>();
CHECK(tensor);
auto dims = tensor->dims();
if (arg.is_weight) {
auto wgt = std::make_shared<ge::op::Const>(arg.name);
LOG(INFO) << " Convert const var node " << arg.name;
VLOG(4) << dims;
wgt->set_attr_value(lite::npu::CvtTensor(tensor));
return wgt;
} else {
CHECK_EQ(dims.size(), 4);
LOG(INFO) << "[NPU] Convert data var node " << arg.name;
LOG(INFO) << dims;
// TODO(xxx): support more types and dims size
ge::TensorDesc desc(ge::Shape(dims.Vectorize()),
ge::Format::FORMAT_NCHW,
ge::DataType::DT_FLOAT);
// auto size = desc.GetShape().GetShapeSize();
// ge::TensorUtils::SetSize(desc, size*sizeof(float));
// ge::TensorUtils::SetRealDimCnt(desc, 4);
auto data = std::make_shared<ge::op::Data>(arg.name);
data->update_input_desc_x(desc);
return data;
}
return nullptr;
}
void GenerateNPUProgramPass::CvtAllOpNodes(
const std::vector<Node*>& nodes2cvt,
lite::kernels::npu::bridges::node_map_type* converted_vars) {
const auto& bridges = lite::kernels::npu::bridges::Factory::Instance();
const auto& cvtfunc_map = bridges.AllFunctions();
// return record all converted vars
// op node's inputs must be found in converted_vars
for (auto& node : nodes2cvt) {
lite::kernels::npu::bridges::node_map_type node_inputs;
auto& stmt = node->AsStmt();
for (auto& var_node : node->inlinks) {
auto& arg = var_node->AsArg();
// weight should be handled in the converter, so skip here
if (arg.is_weight) {
continue;
}
auto var_name = arg.name;
if (!converted_vars->count(var_name)) {
converted_vars->insert(
std::make_pair(var_name, CvtVarNode(var_node, stmt.op()->scope())));
}
node_inputs.insert(*converted_vars->find(var_name));
}
auto node_outputs = cvtfunc_map.at(stmt.op_type())(stmt.op(), node_inputs);
converted_vars->insert(node_outputs.begin(), node_outputs.end());
}
}
std::string GenerateNPUProgramPass::BuildNPUGraph(
const std::unordered_set<Node*>& op_nodes,
const std::unordered_set<Node*>& in_data_vars,
const std::unordered_set<Node*>& out_data_vars,
int sub_id) {
auto ordered_nodes = GetTopologicalOrder(op_nodes);
lite::kernels::npu::bridges::node_map_type converted_vars;
CvtAllOpNodes(ordered_nodes, &converted_vars);
std::vector<std::string> in_var_names;
std::vector<std::string> out_var_names;
std::vector<ge::Operator> inputs;
std::vector<ge::Operator> outputs;
for (auto i : in_data_vars) {
auto argname = i->AsArg().name;
in_var_names.push_back(argname);
inputs.push_back(*converted_vars.at(argname));
}
for (auto i : out_data_vars) {
auto argname = i->AsArg().name;
out_var_names.push_back(argname);
outputs.push_back(*converted_vars.at(argname));
}
std::string weight_var_name = "graph" + std::to_string(sub_id) + "_weights";
auto any_op = (*op_nodes.begin())->AsStmt().op();
auto weight = any_op->scope()->Var(weight_var_name)->GetMutable<Tensor>();
weight->set_persistable(true);
weight->set_precision(PRECISION(kInt8));
// Compiling IR graph to NPU model and store mode data into weight tensor with
// persistable=true, Sothat the model parser can recognize it and save it to
// param files
if (!lite::npu::BuildModel(inputs, outputs, weight)) {
LOG(WARNING) << "[NPU] Build NPU graph failed (subgraph=" << sub_id << ")";
throw std::runtime_error("Build NPU graph failed.");
}
LOG(INFO) << "[NPU] Build NPU graph success (subgraph=" << sub_id << ")";
return weight_var_name;
}
void GenerateBMProgramPass::GenSubgraph(
const std::unique_ptr<SSAGraph>& graph,
const std::unordered_set<Node*>& op_nodes,
int sub_id) {
#if 0
std::unordered_set<Node*> in_data_vars;
std::unordered_set<Node*> in_wgt_vars;
std::unordered_set<Node*> out_data_vars;
std::unordered_set<Node*> out_unused_vars;
FindInputOutputVars(
op_nodes, &in_data_vars, &in_wgt_vars, &out_data_vars, &out_unused_vars);
auto weight_var_name =
BuildNPUGraph(op_nodes, in_data_vars, out_data_vars, sub_id);
auto any_op = (*op_nodes.begin())->AsStmt().op();
InsertNewNode(graph,
weight_var_name,
any_op->scope(),
any_op->valid_places(),
in_data_vars,
in_wgt_vars,
out_data_vars,
out_unused_vars);
auto nodes2rm = GetNode2rm(
op_nodes, {in_data_vars, in_wgt_vars, out_data_vars, out_unused_vars});
GraphSafeRemoveNodes(graph.get(), nodes2rm);
#endif
}
void GenerateBMProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
}
std::unique_ptr<RuntimeProgram> GenerateBMProgramPass::GenProgram() {
std::unique_ptr<RuntimeProgram> program(
new RuntimeProgram(std::move(insts_)));
return program;
}
} // namespace subgraph
} // namespace mir
} // namespace lite
} // namespace paddle
REGISTER_MIR_PASS(generate_bm_program_pass,
paddle::lite::mir::subgraph::GenerateBMProgramPass)
.BindTargets({TARGET(kBM)});
...@@ -12,48 +12,49 @@ ...@@ -12,48 +12,49 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/kernels/bm/relu_compute.h" #pragma once
#include <map>
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "lite/core/op_registry.h" #include "lite/core/context.h"
#include "lite/core/type_system.h" #include "lite/core/mir/pass.h"
#include "lite/core/mir/subgraph/subgraph_program_pass.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace mir {
namespace bm { namespace subgraph {
class GenerateBMProgramPass : public SubgraphProgramPass {
public:
using key2nodes_t = std::map<std::string, Node*>;
void ReluCompute::PrepareForRun() { void Apply(const std::unique_ptr<SSAGraph>& graph) override;
return; std::unique_ptr<RuntimeProgram> GenProgram();
}
void ReluCompute::Run() { protected:
return; // nodes2cvt: op nodes to convert
} // return cvted_vars: converted var nodes
void CvtAllOpNodes(const std::vector<Node*>& nodes2cvt,
lite::kernels::npu::bridges::node_map_type* cvted_vars);
template <PrecisionType Ptype_out> std::shared_ptr<ge::Operator> CvtVarNode(lite::mir::Node* var_node,
void ReluComputeInt8<Ptype_out>::PrepareForRun() { const Scope* scope);
return;
}
template <PrecisionType Ptype_out> std::string BuildGraph(const std::unordered_set<Node*>& op_nodes,
void ReluComputeInt8<Ptype_out>::Run() { const std::unordered_set<Node*>& in_data_vars,
return; const std::unordered_set<Node*>& out_data_vars,
} int sub_id);
} // namespace bm private:
} // namespace kernels std::vector<Instruction> insts_;
};
} // namespace subgraph
} // namespace mir
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_KERNEL(
relu, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ReluCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
relu, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ReluComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
...@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM) ...@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM)
return () return ()
endif() endif()
add_kernel(conv_compute_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps}) add_kernel(graph_compute_bm BM basic SRCS graph_compute.cc DEPS ${lite_kernel_deps} )
add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps})
add_kernel(pool_compute_bm BM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps})
add_kernel(elementwise_compute_bm BM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
add_kernel(relu_compute_bm BM basic SRCS relu_compute.cc DEPS ${lite_kernel_deps})
add_kernel(softmax_compute_bm BM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps})
add_kernel(batch_norm_compute_bm BM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
add_kernel(scale_compute_bm BM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
add_kernel(mul_compute_bm BM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps})
add_kernel(io_copy_compute_bm BM basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps})
message(STATUS "compile with lite BM kernels")
add_subdirectory(bridges)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/batch_norm_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void BatchNormCompute::PrepareForRun() {
return;
}
void BatchNormCompute::Run() {
return;
}
template <PrecisionType Ptype_out>
void BatchNormComputeInt8<Ptype_out>::PrepareForRun() {
return;
}
template <PrecisionType Ptype_out>
void BatchNormComputeInt8<Ptype_out>::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
batch_norm, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::BatchNormCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
batch_norm, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::BatchNormComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
lite_cc_library(bm_bridge_registry SRCS registry.cc)
set(bm_bridge_deps bm_bridge_registry op)
lite_cc_library(bm_bridge_act_op SRCS act_op.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_conv_op SRCS conv_op.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_pool_op SRCS pool_op.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_softmax_op SRCS softmax_op.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_mul_op SRCS mul_op.cc DEPS ${bm_bridge_deps})
lite_cc_library(bm_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS ${bm_bridge_deps})
set(bm_bridges
bm_bridge_registry
bm_bridge_act_op
bm_bridge_conv_op
bm_bridge_elementwise_ops
bm_bridge_pool_op
bm_bridge_softmax_op
bm_bridge_mul_op
bm_bridge_batch_norm_op
CACHE INTERNAL "bm_bridges")
...@@ -12,35 +12,25 @@ ...@@ -12,35 +12,25 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #include "lite/kernels/bm/bridges/registry.h"
#include "lite/core/kernel.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
namespace bridges {
class ConvCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> { node_map_type ActConverter(const std::shared_ptr<lite::OpLite> op,
public: const node_map_type& input_nodes) {
using param_t = operators::ConvParam; // output converted nodes
node_map_type output_nodes;
void PrepareForRun() {}; return output_nodes;
void Run() {}; }
virtual ~ConvCompute() = default;
};
template <PrecisionType Ptype_out>
class ConvComputeInt8
: public KernelLite<TARGET(kBM), PRECISION(kInt8), DATALAYOUT(kNCHW)> {
public:
using param_t = operators::ConvParam;
void PrepareForRun() {};
void Run() {};
virtual ~ConvComputeInt8() = default;
};
} // namespace bridges
} // namespace bm } // namespace bm
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_BM_BRIDGE(relu, paddle::lite::kernels::bm::bridges::ActConverter);
...@@ -12,39 +12,25 @@ ...@@ -12,39 +12,25 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #include "lite/kernels/bm/bridges/registry.h"
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/mul_op.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
namespace bridges {
class MulCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> { node_map_type BatchNormConverter(const std::shared_ptr<lite::OpLite> op,
public: const node_map_type& input_nodes) {
using param_t = operators::MulParam; // output converted nodes
node_map_type output_nodes;
void PrepareForRun() override; return output_nodes;
void Run() override; }
virtual ~MulCompute() = default;
};
template <PrecisionType Ptype_out>
class MulComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::MulParam;
void PrepareForRun() override;
void Run() override;
virtual ~MulComputeInt8() = default;
};
} // namespace bridges
} // namespace bm } // namespace bm
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_BM_BRIDGE(batch_norm, paddle::lite::kernels::bm::bridges::BatchNormConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(conv2d, paddle::lite::kernels::bm::bridges::ConvConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type ElementwiseConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(elementwise, paddle::lite::kernels::bm::bridges::ElementwiseConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type MulConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(mul, paddle::lite::kernels::bm::bridges::MulConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(pool2d, paddle::lite::kernels::bm::bridges::PoolConverter);
...@@ -12,39 +12,29 @@ ...@@ -12,39 +12,29 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #include "lite/kernels/bm/bridges/registry.h"
#include "lite/core/kernel.h" #include <utility>
#include "lite/operators/calib_op.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
namespace bridges {
class CalibComputeFp32ToInt8 Factory& Factory::Instance() {
: public KernelLite<TARGET(kBM), PRECISION(kInt8)> { static Factory g_bm_bridge;
public: return g_bm_bridge;
using param_t = operators::CalibParam; }
void Run() override; bool Factory::HasType(const std::string& op_type) const {
return map_.count(op_type);
}
~CalibComputeFp32ToInt8() override{}; void Factory::Insert(const std::string& op_type, const func_type& func_name) {
map_.insert(std::make_pair(op_type, func_name));
private: }
};
class CalibComputeInt8ToFp32
: public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibComputeInt8ToFp32() override{};
private:
};
} // namespace bridges
} // namespace bm } // namespace bm
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
// var_name, bm node point
using node_map_type =
std::unordered_map<std::string, std::shared_ptr<void*>>;
using func_type = std::function<node_map_type(const std::shared_ptr<OpLite>,
const node_map_type&)>;
using cvt_map_type = std::unordered_map<std::string, func_type>;
class Factory {
public:
static Factory& Instance();
const cvt_map_type& AllFunctions() const { return map_; }
bool HasType(const std::string& op_type) const;
void Insert(const std::string& op_type, const func_type& func_name);
Factory() = default;
private:
cvt_map_type map_;
DISALLOW_COPY_AND_ASSIGN(Factory);
};
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
// some platform-independent defintion
#if defined(_WIN32)
#define UNUSED
#define __builtin_expect(EXP, C) (EXP)
#else
#define UNUSED __attribute__((unused))
#endif
#define STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(uniq_name, msg) \
struct __test_global_namespace_##uniq_name##__ {}; \
static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
__test_global_namespace_##uniq_name##__>::value, \
msg)
#define REGISTER_BM_BRIDGE(op_type, cvt_func_name) \
STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE( \
__reg_bm_bridge_##op_type##__, \
"REGISTER_BM_BRIDGE must be called in global namespace only once!"); \
int __reg_bm_bridge_##op_type##_Insert() { \
paddle::lite::kernels::bm::bridges::Factory::Instance().Insert( \
#op_type, cvt_func_name); \
return 0; \
}
#define USE_BM_BRIDGE(op_type) \
extern int __reg_bm_bridge_##op_type##_Insert(); \
static int __reg_bm_bridge_##op_type##_Insert_return UNUSED = \
__reg_bm_bridge_##op_type##_Insert();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type ScaleConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(scale, paddle::lite::kernels::bm::bridges::ScaleConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
namespace bridges {
node_map_type SoftmaxConverter(const std::shared_ptr<lite::OpLite> op,
const node_map_type& input_nodes) {
// output converted nodes
node_map_type output_nodes;
return output_nodes;
}
} // namespace bridges
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_BM_BRIDGE(softmax, paddle::lite::kernels::bm::bridges::SoftmaxConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/calib_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void CalibComputeFp32ToInt8::Run() {
}
void CalibComputeInt8ToFp32::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(calib,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
fp32_to_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.Finalize();
REGISTER_LITE_KERNEL(calib,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
int8_to_fp32)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.Finalize();
REGISTER_LITE_KERNEL(calib_once,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
fp32_to_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.Finalize();
REGISTER_LITE_KERNEL(calib_once,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
int8_to_fp32)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/conv_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
template class ConvComputeInt8<PRECISION(kInt8)>;
template class ConvComputeInt8<PRECISION(kFloat)>;
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
conv2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ConvCompute, def)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.BindInput("Bias",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindInput("Filter",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.BindOutput("Output",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.Finalize();
REGISTER_LITE_KERNEL(
conv2d,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::ConvComputeInt8<PRECISION(kInt8)>,
int8_out)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindInput("Bias",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindInput("Filter",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindOutput("Output",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/elementwise_compute.h"
#include <string>
#include <vector>
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void ElementwiseAddCompute::Run() {
}
template <PrecisionType Ptype_out>
void ElementwiseAddComputeInt8<Ptype_out>::Run() {
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(elementwise_add,
kBM,
kFloat,
kNCHW,
paddle::lite::kernels::bm::ElementwiseAddCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(elementwise_add,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::ElementwiseAddComputeInt8<PRECISION(kInt8)>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
...@@ -12,39 +12,37 @@ ...@@ -12,39 +12,37 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #include "lite/kernels/bm/graph_compute.h"
#include <algorithm> #include <sys/time.h>
#include "lite/core/kernel.h" #include <time.h>
#include "lite/operators/batch_norm_op.h" #include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
class BatchNormCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> { void GraphCompute::PrepareForRun() {
public: }
using param_t = operators::BatchNormParam;
void PrepareForRun() override;
void Run() override;
virtual ~BatchNormCompute() = default;
};
template <PrecisionType Ptype_out>
class BatchNormComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::BatchNormParam;
void PrepareForRun() override;
void Run() override;
virtual ~BatchNormComputeInt8() = default;
};
void GraphCompute::Run() {
}
} // namespace bm } // namespace bm
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_KERNEL(graph_op,
kBM,
kFloat,
kNCHW,
paddle::lite::kernels::bm::GraphCompute,
def)
.BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Weight", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
...@@ -13,29 +13,28 @@ ...@@ -13,29 +13,28 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/core/types.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
class ElementwiseAddCompute class GraphCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
: public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public: public:
void Run() override; using param_t = operators::GraphParam;
virtual ~ElementwiseAddCompute() = default; void PrepareForRun() override;
};
template <PrecisionType Ptype_out>
class ElementwiseAddComputeInt8
: public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
void Run() override; void Run() override;
virtual ~ElementwiseAddComputeInt8() = default;
virtual ~GraphCompute() = default;
}; };
} // namespace bm } // namespace bm
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/bm/target_wrapper.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
using TargetW = TargetWrapper<TARGET(kBM)>;
// Host to BM memory.
void CopyFromHostSync(void* target, const void* source, size_t size) {
TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
}
void CopyFromHostAsync(void* target,
const void* source,
size_t size,
TargetW::stream_t stream) {
TargetW::MemcpyAsync(target, source, size, IoDirection::HtoD, stream);
}
// Host to Host memory.
void CopyToHostSync(void* target, const void* source, size_t size) {
TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
}
/*
* This kernel copies a tensor from host to BM space.
*/
class IoCopyHostToBMCompute
: public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kX86));
auto mem_size = param.x->memory_size();
VLOG(4) << "copy size " << mem_size;
auto* data = param.y->mutable_data(TARGET(kBM), mem_size);
CopyFromHostSync(data, param.x->raw_data(), mem_size);
}
std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
*res = [](const std::map<std::string, const Type*>& inputs,
const std::string& out) -> const Type* {
CHECK(!inputs.empty());
auto* type = inputs.at("Input");
CHECK(type->target() == TARGET(kHost));
auto out_place = type->place();
out_place.target = TARGET(kBM);
auto* out_type = Type::Get(type->id(),
out_place.target,
out_place.precision,
out_place.layout,
out_place.device);
return out_type;
};
return res;
}
std::string doc() const override { return "Copy IO from HOST to BM"; }
};
/*
* This kernel copies a tensor from BM to host space.
*/
class IoCopyBMToHostCompute
: public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kBM));
auto mem_size = param.x->memory_size();
VLOG(4) << "io copy bm to host " << mem_size;
auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
CopyToHostSync(data, param.x->raw_data(), mem_size);
}
std::string doc() const override { return "Copy IO from BM to HOST"; }
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(io_copy,
kBM,
kAny,
kAny,
paddle::lite::kernels::bm::IoCopyHostToBMCompute,
host_to_device)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy,
kBM,
kAny,
kAny,
paddle::lite::kernels::bm::IoCopyBMToHostCompute,
device_to_host)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy_once,
kBM,
kAny,
kAny,
paddle::lite::kernels::bm::IoCopyHostToBMCompute,
host_to_device)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy_once,
kBM,
kAny,
kAny,
paddle::lite::kernels::bm::IoCopyBMToHostCompute,
device_to_host)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/mul_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void MulCompute::PrepareForRun() {
return;
}
void MulCompute::Run() {
return;
}
template <PrecisionType Ptype_out>
void MulComputeInt8<Ptype_out>::PrepareForRun() {
return;
}
template <PrecisionType Ptype_out>
void MulComputeInt8<Ptype_out>::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
mul, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::MulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
mul, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::MulComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/pool_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void PoolCompute::PrepareForRun() {
return;
}
void PoolCompute::Run() {
return;
}
template <PrecisionType Ptype_out>
void PoolComputeInt8<Ptype_out>::PrepareForRun() {
return;
}
template <PrecisionType Ptype_out>
void PoolComputeInt8<Ptype_out>::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
pool2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::PoolCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
pool2d, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::PoolComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/pool_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class PoolCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::PoolParam;
void PrepareForRun() override;
void Run() override;
virtual ~PoolCompute() = default;
};
template <PrecisionType Ptype_out>
class PoolComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::PoolParam;
void PrepareForRun() override;
void Run() override;
virtual ~PoolComputeInt8() = default;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/relu_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class ReluCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void PrepareForRun() override;
void Run() override;
virtual ~ReluCompute() = default;
};
template <PrecisionType Ptype_out>
class ReluComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::ActivationParam;
void PrepareForRun() override;
void Run() override;
virtual ~ReluComputeInt8() = default;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/scale_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void ScaleCompute::PrepareForRun() {
return;
}
void ScaleCompute::Run() {
return;
}
template <PrecisionType Ptype_out>
void ScaleComputeInt8<Ptype_out>::PrepareForRun() {
return;
}
template <PrecisionType Ptype_out>
void ScaleComputeInt8<Ptype_out>::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
scale, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ScaleCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
scale, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ScaleComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/scale_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class ScaleCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::ScaleParam;
void PrepareForRun() override;
void Run() override;
virtual ~ScaleCompute() = default;
};
template <PrecisionType Ptype_out>
class ScaleComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::ScaleParam;
void PrepareForRun() override;
void Run() override;
virtual ~ScaleComputeInt8() = default;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/softmax_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void SoftmaxCompute::PrepareForRun() {
return;
}
void SoftmaxCompute::Run() {
return;
}
template <PrecisionType Ptype_out>
void SoftmaxComputeInt8<Ptype_out>::PrepareForRun() {
return;
}
template <PrecisionType Ptype_out>
void SoftmaxComputeInt8<Ptype_out>::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
softmax, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::SoftmaxCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
REGISTER_LITE_KERNEL(
softmax, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::SoftmaxComputeInt8<PRECISION(kInt8)>, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/softmax_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class SoftmaxCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::SoftmaxParam;
void PrepareForRun() override;
void Run() override;
virtual ~SoftmaxCompute() = default;
};
template <PrecisionType Ptype_out>
class SoftmaxComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::SoftmaxParam;
void PrepareForRun() override;
void Run() override;
virtual ~SoftmaxComputeInt8() = default;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册