From 10db87ed8f47b314c7c6509f60d9541376b49eb3 Mon Sep 17 00:00:00 2001 From: "baolei.an" Date: Sat, 11 Jan 2020 09:49:03 +0800 Subject: [PATCH] pass code style check --- lite/core/arena/CMakeLists.txt | 2 +- lite/kernels/bm/bridges/act_op.cc | 44 ++++---- lite/kernels/bm/bridges/batch_norm_op.cc | 34 ++---- lite/kernels/bm/bridges/conv_op.cc | 38 +++---- lite/kernels/bm/bridges/elementwise_ops.cc | 105 +++++++++-------- lite/kernels/bm/bridges/mul_op.cc | 56 +++++----- lite/kernels/bm/bridges/paddle_use_bridges.h | 16 +-- lite/kernels/bm/bridges/pool_op.cc | 71 ++++++------ lite/kernels/bm/bridges/scale_op.cc | 51 ++++----- lite/kernels/bm/bridges/softmax_op.cc | 32 +++--- lite/kernels/bm/subgraph_compute.cc | 52 +++++---- lite/kernels/npu/bridges/CMakeLists.txt | 2 +- lite/tests/kernels/CMakeLists.txt | 64 +++++------ lite/tools/build_bm.sh | 112 +++++++++++++++++++ 14 files changed, 379 insertions(+), 300 deletions(-) create mode 100755 lite/tools/build_bm.sh diff --git a/lite/core/arena/CMakeLists.txt b/lite/core/arena/CMakeLists.txt index 1c85353d53..0f3f36768b 100644 --- a/lite/core/arena/CMakeLists.txt +++ b/lite/core/arena/CMakeLists.txt @@ -6,5 +6,5 @@ endif() lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest) if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) - lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${bm_kernels} ${npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() diff --git a/lite/kernels/bm/bridges/act_op.cc b/lite/kernels/bm/bridges/act_op.cc index 2daba24948..92317f5b05 100644 --- a/lite/kernels/bm/bridges/act_op.cc +++ b/lite/kernels/bm/bridges/act_op.cc @@ -12,54 +12,51 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" -#include "bmcompiler_if.h" namespace paddle { namespace lite { namespace subgraph { namespace bm { -int ActConverter(void* ctx, OpLite* op, KernelBase* kernel){ +int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); auto graph = static_cast(ctx); auto scope = op->scope(); auto op_info = op->op_info(); auto op_type = op_info->Type(); - auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); auto output_var_name = op_info->Output("Out").front(); - auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output = + scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - - int i_x_shape_data[x_dims.size()]; - int i_output_shape_data[output_dims.size()]; - + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - - CHECK(op_type == "relu"); + CHECK_EQ(op_type, "relu"); add_relu_layer(graph->GetCompilerHandle(), - const_cast(i_x_shape_data), - x_dims.size(), - static_cast(x_var_name.c_str()), - const_cast(i_output_shape_data), - output_dims.size(), - static_cast(output_var_name.c_str()), - 0.f, - -1.f); + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + 0.f, + -1.f); graph->AddNode(output_var_name); return SUCCESS; } @@ -69,4 +66,5 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel){ } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, relu, paddle::lite::subgraph::bm::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(relu, kBM, + paddle::lite::subgraph::bm::ActConverter); diff --git a/lite/kernels/bm/bridges/batch_norm_op.cc b/lite/kernels/bm/bridges/batch_norm_op.cc index 62dfad7ba3..2df0b44dea 100644 --- a/lite/kernels/bm/bridges/batch_norm_op.cc +++ b/lite/kernels/bm/bridges/batch_norm_op.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -30,49 +30,41 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto op_info = op->op_info(); auto op_type = op_info->Type(); auto unique_op_name = lite::subgraph::bm::UniqueName(op_type); - // input auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - int i_x_shape_data[x_dims.size()]; + const int64_t* x_shape_data = const_cast(&x_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - int channel_size = x_dims[1]; - auto scale_var_name = op_info->Input("Scale").front(); auto scale = scope->FindVar(scale_var_name)->GetMutable(); - auto bias_var_name = op_info->Input("Bias").front(); auto bias = scope->FindVar(bias_var_name)->GetMutable(); - auto mean_var_name = op_info->Input("Mean").front(); auto mean = scope->FindVar(mean_var_name)->GetMutable(); - auto variance_var_name = op_info->Input("Variance").front(); - auto variance = scope->FindVar(variance_var_name)->GetMutable(); - + auto variance = + scope->FindVar(variance_var_name)->GetMutable(); // output auto output_var_name = op_info->Output("Y").front(); auto output = scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - int i_output_shape_data[output_dims.size()]; + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - auto epsilon = op_info->GetAttr("epsilon"); auto unique_bn_out_name = lite::subgraph::bm::UniqueName("batch_norm_out"); - auto* scale_data = scale->mutable_data(); auto* bias_data = bias->mutable_data(); auto* mean_data = mean->mutable_data(); auto* variance_data = variance->mutable_data(); - for (int c = 0; c < channel_size; c++) { float inv_scale = 1.f / (std::sqrt(variance_data[c] + epsilon)); bias_data[c] = bias_data[c] - inv_scale * scale_data[c] * mean_data[c]; @@ -83,17 +75,15 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { int **shape = new int *[input_num]; int *dim = new int[input_num]; const char **name = new const char *[input_num]; - name[0] = static_cast(x_var_name.c_str()); dim[0] = x_dims.size(); - shape[0] = i_x_shape_data; - + shape[0] = &i_x_shape_data[0]; add_scale_layer(graph->GetCompilerHandle(), input_num, shape, dim, name, - const_cast(i_output_shape_data), + const_cast(&i_output_shape_data[0]), output_dims.size(), static_cast(output_var_name.c_str()), static_cast(unique_op_name.c_str()), @@ -102,7 +92,6 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { 1, 1, 1); - delete [] shape; delete [] name; delete [] dim; @@ -116,4 +105,5 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, batch_norm, paddle::lite::subgraph::bm::BatchNormConverter); +REGISTER_SUBGRAPH_BRIDGE(batch_norm, kBM, + paddle::lite::subgraph::bm::BatchNormConverter); diff --git a/lite/kernels/bm/bridges/conv_op.cc b/lite/kernels/bm/bridges/conv_op.cc index e33585ec01..7f5517bc92 100644 --- a/lite/kernels/bm/bridges/conv_op.cc +++ b/lite/kernels/bm/bridges/conv_op.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include "lite/operators/conv_op.h" #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" + namespace paddle { namespace lite { @@ -26,13 +27,11 @@ namespace bm { int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); - auto graph = static_cast(ctx); auto scope = op->scope(); auto op_info = op->op_info(); auto op_type = op_info->Type(); auto unique_op_name = lite::subgraph::bm::UniqueName(op_type); - auto input_var_name = op_info->Input("Input").front(); auto input = scope->FindVar(input_var_name)->GetMutable(); auto input_dims = input->dims(); @@ -42,11 +41,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto filter_var_name = op_info->Input("Filter").front(); auto filter = scope->FindVar(filter_var_name)->GetMutable(); auto filter_dims = filter->dims(); - - CHECK(input_dims.size() == 4); - CHECK(output_dims.size() == 4); - CHECK(filter_dims.size() == 4); - + CHECK_EQ(input_dims.size(), 4); + CHECK_EQ(output_dims.size(), 4); + CHECK_EQ(filter_dims.size(), 4); bool has_bias = lite::subgraph::bm::HasInputArg(op_info, scope, "Bias"); float* bias_data = nullptr; if (has_bias) { @@ -54,33 +51,31 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto* bias = scope->FindVar(bias_var_name)->GetMutable(); bias_data = static_cast(bias->mutable_data()); } - - const long int* input_shape_data = const_cast(&input_dims.data()[0]); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - - int i_input_shape_data[input_dims.size()]; - int i_output_shape_data[output_dims.size()]; + const int64_t* input_shape_data = + const_cast(&input_dims.data()[0]); + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_input_shape_data(input_dims.size()); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < input_dims.size(); i++) { i_input_shape_data[i] = static_cast(input_shape_data[i]); } - for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - - const float* filter_data = const_cast(filter->mutable_data()); - + const float* filter_data = + const_cast(filter->mutable_data()); auto groups = op_info->GetAttr("groups"); auto paddings = op_info->GetAttr>("paddings"); auto strides = op_info->GetAttr>("strides"); auto dilations = op_info->GetAttr>("dilations"); add_conv_layer(graph->GetCompilerHandle(), - const_cast(i_input_shape_data), + const_cast(&i_input_shape_data[0]), input_dims.size(), static_cast(input_var_name.c_str()), - const_cast(i_output_shape_data), + const_cast(&i_output_shape_data[0]), output_dims.size(), static_cast(output_var_name.c_str()), static_cast(unique_op_name.c_str()), @@ -107,4 +102,5 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, conv2d, paddle::lite::subgraph::bm::ConvConverter); +REGISTER_SUBGRAPH_BRIDGE(conv2d, kBM, + paddle::lite::subgraph::bm::ConvConverter); diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc index 28c35a587e..fa11f8e376 100644 --- a/lite/kernels/bm/bridges/elementwise_ops.cc +++ b/lite/kernels/bm/bridges/elementwise_ops.cc @@ -11,13 +11,12 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include +#include +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" -#include "bmcompiler_if_lite.h" -#include "bmcompiler_defs.h" namespace paddle { namespace lite { @@ -27,111 +26,106 @@ namespace bm { int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); - auto graph = static_cast(ctx); auto scope = op->scope(); auto op_info = op->op_info(); auto op_type = op_info->Type(); - // input const int input_num = 2; int **shape = new int *[input_num]; int *dim = new int[input_num]; const char **name = new const char *[input_num]; - auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); name[0] = static_cast(x_var_name.c_str()); dim[0] = x_dims.size(); - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - int i_x_shape_data[x_dims.size()]; + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - shape[0] = i_x_shape_data; - + shape[0] = &i_x_shape_data[0]; auto y_var_name = op_info->Input("Y").front(); auto y = scope->FindVar(y_var_name)->GetMutable(); auto y_dims = y->dims(); name[1] = static_cast(y_var_name.c_str()); dim[1] = y_dims.size(); - const long int* y_shape_data = const_cast(&y_dims.data()[0]); - int i_y_shape_data[y_dims.size()]; + const int64_t* y_shape_data = + const_cast(&y_dims.data()[0]); + std::vector i_y_shape_data(y_dims.size()); for (size_t i = 0; i < y_dims.size(); i++) { i_y_shape_data[i] = static_cast(y_shape_data[i]); } - shape[1] = i_y_shape_data; + shape[1] = &i_y_shape_data[0]; bool y_is_const = !graph->HasNode(y_var_name); - // output auto output_var_name = op_info->Output("Out").front(); - auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output = + scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - int i_output_shape_data[output_dims.size()]; + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - if (y_is_const) { - CHECK(op_type == "elementwise_add"); + CHECK_EQ(op_type, "elementwise_add"); } - int op_code{-1}; float coeff[2] = {1.f, 1.f}; - if (op_type == "elementwise_mul") { op_code = 0; } else if (op_type == "elementwise_add") { op_code = 1; - } else if(op_type == "elementwise_sub") { + } else if (op_type == "elementwise_sub") { op_code = 1; coeff[1] = -1.f; } else { LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type; } - if (!y_is_const) { add_eltwise_layer(graph->GetCompilerHandle(), - input_num, - shape, - dim, - name, - const_cast(i_output_shape_data), - output_dims.size(), - static_cast(output_var_name.c_str()), - op_code, - coeff); + input_num, + shape, + dim, + name, + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + op_code, + coeff); } else { - const float* y_data = const_cast(y->mutable_data()); - const float* x_data = const_cast(x->mutable_data()); + const float* y_data = + const_cast(y->mutable_data()); + const float* x_data = + const_cast(x->mutable_data()); bm_add_const_tensor(graph->GetCompilerHandle(), - name[1], - shape[0], - dim[0], - static_cast(DTYPE_FP32), - static_cast(y_data)); + name[1], + shape[0], + dim[0], + static_cast(DTYPE_FP32), + static_cast(y_data)); add_binary_layer_v2(graph->GetCompilerHandle(), - name[0], - shape[0], - dim[0], - 0, - static_cast(x_data), - name[1], - shape[0], - dim[0], - 0, - static_cast(y_data), - static_cast(output_var_name.c_str()), - 0); + name[0], + shape[0], + dim[0], + 0, + static_cast(x_data), + name[1], + shape[0], + dim[0], + 0, + static_cast(y_data), + static_cast(output_var_name.c_str()), + 0); } - delete [] shape; delete [] name; delete [] dim; - graph->AddNode(output_var_name); return SUCCESS; } @@ -141,4 +135,5 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, elementwise_add, paddle::lite::subgraph::bm::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(elementwise_add, kBM, + paddle::lite::subgraph::bm::ElementwiseConverter); diff --git a/lite/kernels/bm/bridges/mul_op.cc b/lite/kernels/bm/bridges/mul_op.cc index fc6a9b66f4..099a9550a6 100644 --- a/lite/kernels/bm/bridges/mul_op.cc +++ b/lite/kernels/bm/bridges/mul_op.cc @@ -11,11 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -30,32 +29,30 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto op_info = op->op_info(); auto op_type = op_info->Type(); auto unique_op_name = lite::subgraph::bm::UniqueName(op_type); - // only support y is const - // input auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - int i_x_shape_data[x_dims.size()]; + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - // add reshape layer int i_x_reshape_shape_data[2]; for (size_t i = 0; i < 2; i++) { i_x_reshape_shape_data[i] = static_cast(x_shape_data[i]); } int reshape_param[] = {0, -1}; - auto unique_op_reshape_name = lite::subgraph::bm::UniqueName(op_type + "_reshape"); + auto unique_op_reshape_name = + lite::subgraph::bm::UniqueName(op_type + "_reshape"); add_reshape_layer(graph->GetCompilerHandle(), - const_cast(i_x_shape_data), + const_cast(&i_x_shape_data[0]), x_dims.size(), static_cast(x_var_name.c_str()), - const_cast(i_x_reshape_shape_data), + const_cast(&i_x_reshape_shape_data[0]), 2, static_cast(unique_op_reshape_name.c_str()), const_cast(reshape_param)); @@ -63,32 +60,30 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto y_var_name = op_info->Input("Y").front(); auto y = scope->FindVar(y_var_name)->GetMutable(); auto y_dims = y->dims(); - // output auto output_var_name = op_info->Output("Out").front(); auto output = scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - int i_output_shape_data[output_dims.size()]; + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - add_fc_layer(graph->GetCompilerHandle(), - const_cast(i_x_reshape_shape_data), - 2, - static_cast(unique_op_reshape_name.c_str()), - const_cast(i_output_shape_data), - output_dims.size(), - static_cast(output_var_name.c_str()), - static_cast(unique_op_name.c_str()), - i_x_reshape_shape_data[1], - i_output_shape_data[1], - static_cast(y->mutable_data()), - nullptr, - 0, - 0); - + const_cast(&i_x_reshape_shape_data[0]), + 2, + static_cast(unique_op_reshape_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + static_cast(unique_op_name.c_str()), + i_x_reshape_shape_data[1], + i_output_shape_data[1], + static_cast(y->mutable_data()), + nullptr, + 0, + 0); graph->AddNode(output_var_name); return SUCCESS; } @@ -98,4 +93,5 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, mul, paddle::lite::subgraph::bm::MulConverter); +REGISTER_SUBGRAPH_BRIDGE(mul, + kBM, paddle::lite::subgraph::bm::MulConverter); diff --git a/lite/kernels/bm/bridges/paddle_use_bridges.h b/lite/kernels/bm/bridges/paddle_use_bridges.h index 58452caf39..417d016c78 100644 --- a/lite/kernels/bm/bridges/paddle_use_bridges.h +++ b/lite/kernels/bm/bridges/paddle_use_bridges.h @@ -14,11 +14,11 @@ #pragma once -USE_SUBGRAPH_BRIDGE(BM, relu); -USE_SUBGRAPH_BRIDGE(BM, conv2d); -USE_SUBGRAPH_BRIDGE(BM, elementwise_add); -USE_SUBGRAPH_BRIDGE(BM, pool2d); -USE_SUBGRAPH_BRIDGE(BM, softmax); -USE_SUBGRAPH_BRIDGE(BM, mul); -USE_SUBGRAPH_BRIDGE(BM, batch_norm); -USE_SUBGRAPH_BRIDGE(BM, scale); +USE_SUBGRAPH_BRIDGE(relu, kBM); +USE_SUBGRAPH_BRIDGE(conv2d, kBM); +USE_SUBGRAPH_BRIDGE(elementwise_add, kBM); +USE_SUBGRAPH_BRIDGE(pool2d, kBM); +USE_SUBGRAPH_BRIDGE(softmax, kBM); +USE_SUBGRAPH_BRIDGE(mul, kBM); +USE_SUBGRAPH_BRIDGE(batch_norm, kBM); +USE_SUBGRAPH_BRIDGE(scale, kBM); diff --git a/lite/kernels/bm/bridges/pool_op.cc b/lite/kernels/bm/bridges/pool_op.cc index e9e98f3faf..e4e7b47fbd 100644 --- a/lite/kernels/bm/bridges/pool_op.cc +++ b/lite/kernels/bm/bridges/pool_op.cc @@ -11,11 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -30,69 +29,65 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto op_info = op->op_info(); auto op_type = op_info->Type(); auto unique_op_name = lite::subgraph::bm::UniqueName(op_type); - // input auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - int i_x_shape_data[x_dims.size()]; + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - // output - int *shape[1]; - int dim[1]; + int32_t *shape[1]; + int32_t dim[1]; const char *name[1]; auto output_var_name = op_info->Output("Out").front(); auto output = scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - int i_output_shape_data[output_dims.size()]; + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + std::vector i_output_shape_data(output_dims.size()); for (size_t i = 0; i < output_dims.size(); i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - shape[0] = i_output_shape_data; + shape[0] = &i_output_shape_data[0]; name[0] = static_cast(output_var_name.c_str()); dim[0] = output_dims.size(); - auto pooling_type = op_info->GetAttr("pooling_type"); CHECK(pooling_type == "max" || pooling_type == "avg"); - auto ksize = op_info->GetAttr>("ksize"); auto paddings = op_info->GetAttr>("paddings"); auto strides = op_info->GetAttr>("strides"); auto global_pooling = op_info->GetAttr("global_pooling"); auto ceil_mode = op_info->GetAttr("ceil_mode"); - bool average_exclusive = false; if (pooling_type == "avg") { average_exclusive = op_info->GetAttr("exclusive"); } - add_pooling_layer(graph->GetCompilerHandle(), - const_cast(i_x_shape_data), - x_dims.size(), - static_cast(x_var_name.c_str()), - 1, - shape, - dim, - name, - ksize[0], - ksize[1], - paddings[0], - paddings[0], - paddings[1], - paddings[1], - strides[0], - strides[1], - (ksize[0] > 1 && ksize[1] > 1) && pooling_type == "max" ? 0 : 1, - static_cast(average_exclusive), - static_cast(global_pooling), - static_cast(ceil_mode), - static_cast(unique_op_name.c_str()), - nullptr); + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + 1, + shape, + dim, + name, + ksize[0], + ksize[1], + paddings[0], + paddings[0], + paddings[1], + paddings[1], + strides[0], + strides[1], + (ksize[0] > 1 && ksize[1] > 1) && pooling_type == "max" ? 0 : 1, + static_cast(average_exclusive), + static_cast(global_pooling), + static_cast(ceil_mode), + static_cast(unique_op_name.c_str()), + nullptr); graph->AddNode(output_var_name); return SUCCESS; } @@ -101,5 +96,5 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace subgraph } // namespace lite } // namespace paddle - -REGISTER_SUBGRAPH_BRIDGE(BM, pool2d, paddle::lite::subgraph::bm::PoolConverter); +REGISTER_SUBGRAPH_BRIDGE(pool2d, kBM, + paddle::lite::subgraph::bm::PoolConverter); diff --git a/lite/kernels/bm/bridges/scale_op.cc b/lite/kernels/bm/bridges/scale_op.cc index e03e56b384..312719cf8a 100644 --- a/lite/kernels/bm/bridges/scale_op.cc +++ b/lite/kernels/bm/bridges/scale_op.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_op_code.h" -#include "bmcompiler_if.h" + namespace paddle { namespace lite { @@ -32,50 +33,41 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto op_info = op->op_info(); auto op_type = op_info->Type(); auto unique_op_name = lite::subgraph::bm::UniqueName(op_type); - // input auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - - int i_x_shape_data[x_dims.size()]; + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - // output auto output_var_name = op_info->Output("Out").front(); - auto scale = op_info->GetAttr("scale"); auto bias = op_info->GetAttr("bias"); auto bias_after_scale = op_info->GetAttr("bias_after_scale"); - if (!bias_after_scale) { bias *= scale; } - - - auto unique_op_scale_name = lite::subgraph::bm::UniqueName(op_type); - add_const_binary_layer(graph->GetCompilerHandle(), - static_cast(x_var_name.c_str()), - const_cast(i_x_shape_data), - x_dims.size(), - scale, - static_cast(unique_op_scale_name.c_str()), - BINARY_MUL, - 0); - - + auto unique_op_scale_name = lite::subgraph::bm::UniqueName(op_type); add_const_binary_layer(graph->GetCompilerHandle(), - static_cast(unique_op_scale_name.c_str()), - const_cast(i_x_shape_data), + static_cast(x_var_name.c_str()), + const_cast(&i_x_shape_data[0]), x_dims.size(), - bias, - static_cast(output_var_name.c_str()), - BINARY_ADD, + scale, + static_cast(unique_op_scale_name.c_str()), + BINARY_MUL, 0); - + add_const_binary_layer(graph->GetCompilerHandle(), + static_cast(unique_op_scale_name.c_str()), + const_cast(&i_x_shape_data[0]), + x_dims.size(), + bias, + static_cast(output_var_name.c_str()), + BINARY_ADD, + 0); graph->AddNode(output_var_name); return SUCCESS; } @@ -85,4 +77,5 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, scale, paddle::lite::subgraph::bm::ScaleConverter); +REGISTER_SUBGRAPH_BRIDGE(scale, kBM, + paddle::lite::subgraph::bm::ScaleConverter); diff --git a/lite/kernels/bm/bridges/softmax_op.cc b/lite/kernels/bm/bridges/softmax_op.cc index f69162c8b6..f077c68709 100644 --- a/lite/kernels/bm/bridges/softmax_op.cc +++ b/lite/kernels/bm/bridges/softmax_op.cc @@ -11,11 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/bm/bridges/utility.h" -#include "bmcompiler_if.h" namespace paddle { namespace lite { @@ -28,46 +27,44 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto graph = static_cast(ctx); auto scope = op->scope(); auto op_info = op->op_info(); - // input auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - const long int* x_shape_data = const_cast(&x_dims.data()[0]); - int i_x_shape_data[x_dims.size()]; - for (size_t i = 0; i < x_dims.size(); i++) { + const int64_t* x_shape_data = + const_cast(&x_dims.data()[0]); + size_t length = x_dims.size(); + std::vector i_x_shape_data(length); + for (size_t i = 0; i < length; i++) { i_x_shape_data[i] = static_cast(x_shape_data[i]); } - // output auto output_var_name = op_info->Output("Out").front(); auto output = scope->FindVar(output_var_name)->GetMutable(); auto output_dims = output->dims(); - const long int* output_shape_data = const_cast(&output_dims.data()[0]); - int i_output_shape_data[output_dims.size()]; - for (size_t i = 0; i < output_dims.size(); i++) { + const int64_t* output_shape_data = + const_cast(&output_dims.data()[0]); + length = output_dims.size(); + std::vector i_output_shape_data(length); + for (size_t i = 0; i < length; i++) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } - auto axis = op_info->GetAttr("axis"); if (axis < 0) { axis += x_dims.size(); } - int outer_num = x_dims.Slice(0, axis).production(); int inner_num = x_dims.Slice(axis + 1, x_dims.size()).production(); - add_softmax_layer(graph->GetCompilerHandle(), - const_cast(i_x_shape_data), + const_cast(&i_x_shape_data[0]), x_dims.size(), static_cast(x_var_name.c_str()), - const_cast(i_output_shape_data), + const_cast(&i_output_shape_data[0]), output_dims.size(), static_cast(output_var_name.c_str()), inner_num, outer_num, x_dims[axis]); - graph->AddNode(output_var_name); return SUCCESS; } @@ -77,4 +74,5 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(BM, softmax, paddle::lite::subgraph::bm::SoftmaxConverter); +REGISTER_SUBGRAPH_BRIDGE(softmax, kBM, + paddle::lite::subgraph::bm::SoftmaxConverter); diff --git a/lite/kernels/bm/subgraph_compute.cc b/lite/kernels/bm/subgraph_compute.cc index 1652e0cbf7..a3f11ac78f 100644 --- a/lite/kernels/bm/subgraph_compute.cc +++ b/lite/kernels/bm/subgraph_compute.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "lite/core/op_registry.h" #include "lite/core/type_system.h" #include "lite/kernels/bm/bridges/graph.h" @@ -34,18 +35,17 @@ int SubgraphEngine::BuildDeviceProgram() { const auto& bridges = subgraph::Registry::Instance(); graph.CreateCompilerHandle(); auto& ctx = this->ctx_->template As(); - for (auto& inst : origin_program_) { auto op = inst.op(); CHECK(op); op->CheckShape(); op->InferShape(); std::string op_type = op->op_info()->Type(); - if (!bridges.Exists("BM", op_type)) { + if (!bridges.Exists(op_type, "kBM")) { return subgraph::FAILED; } auto kernel = inst.kernel(); - status |= bridges.Select("BM", op_type)(reinterpret_cast(&graph), + status |= bridges.Select(op_type, "kBM")(reinterpret_cast(&graph), const_cast(op), const_cast(kernel)); if (subgraph::CHECK_FAILED(status)) { @@ -54,8 +54,8 @@ int SubgraphEngine::BuildDeviceProgram() { } std::string net_name = "paddle_bitmain"; - __bmcompile_opt(graph.GetCompilerHandle(), const_cast(net_name.c_str()), 2); - + __bmcompile_opt(graph.GetCompilerHandle(), + const_cast(net_name.c_str()), 2); void* bmodel_data = nullptr; unsigned int data_size = 0; bm_hd_ = static_cast(ctx.GetHandle()); @@ -64,32 +64,30 @@ int SubgraphEngine::BuildDeviceProgram() { if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) { return subgraph::FAILED; } - bmrt_get_network_names(bmrt_hd_, &net_names_); net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]); auto &stage = net_info_->stages[0]; - // input origin_idims_.resize(input_names_.size()); origin_itensors_.resize(input_names_.size()); - device_inputs_.resize(input_names_.size()); + device_inputs_.resize(input_names_.size()); for (size_t i = 0; i < input_names_.size(); i++) { origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]); CHECK(origin_itensors_[i]); - origin_idims_[i] = origin_itensors_[i]->dims(); - bm_device_mem_t* p_mem = static_cast(malloc(sizeof(bm_device_mem_t))); + origin_idims_[i] = origin_itensors_[i]->dims(); + bm_device_mem_t* p_mem = + static_cast(malloc(sizeof(bm_device_mem_t))); CHECK(p_mem != nullptr); - CHECK(bm_malloc_device_byte(bm_hd_, p_mem, origin_itensors_[i]->memory_size()) == BM_SUCCESS); + CHECK_EQ(bm_malloc_device_byte(bm_hd_, + p_mem, origin_itensors_[i]->memory_size()), BM_SUCCESS); bmrt_tensor_with_device(&device_inputs_[i], *p_mem, net_info_->input_dtypes[i], stage.input_shapes[i]); } - - // output + // output origin_odims_.resize(output_names_.size()); origin_otensors_.resize(output_names_.size()); device_outputs_.resize(output_names_.size()); - for (size_t i = 0; i < output_names_.size(); i++) { origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]); CHECK(origin_otensors_[i]); @@ -97,12 +95,13 @@ int SubgraphEngine::BuildDeviceProgram() { output_map_.insert(std::pair(output_names_[i], i)); origin_otensors_[i]->mutable_data(); } - for (size_t i = 0; i < output_names_.size(); i++) { int mapping_index = output_map_.at(net_info_->output_names[i]); - bm_device_mem_t* p_mem = static_cast(malloc(sizeof(bm_device_mem_t))); + bm_device_mem_t* p_mem = + static_cast(malloc(sizeof(bm_device_mem_t))); CHECK(p_mem != nullptr); - CHECK(bm_malloc_device_byte(bm_hd_, p_mem, origin_otensors_[mapping_index]->memory_size()) == BM_SUCCESS); + CHECK_EQ(bm_malloc_device_byte(bm_hd_, + p_mem, origin_otensors_[mapping_index]->memory_size()), BM_SUCCESS); bmrt_tensor_with_device(&device_outputs_[i], *p_mem, net_info_->output_dtypes[i], stage.output_shapes[i]); @@ -113,14 +112,21 @@ int SubgraphEngine::BuildDeviceProgram() { int SubgraphEngine::LaunchDeviceProgram() { for (size_t i = 0; i < device_inputs_.size(); i++) { - bm_memcpy_s2d(bm_hd_, device_inputs_[i].device_mem, const_cast(origin_itensors_[i]->raw_data())); + bm_memcpy_s2d(bm_hd_, + device_inputs_[i].device_mem, + const_cast(origin_itensors_[i]->raw_data())); } - - bmrt_launch_tensor_ex(bmrt_hd_, net_names_[0], static_cast(&device_inputs_[0]), - net_info_->input_num, static_cast(&device_outputs_[0]), net_info_->output_num, true, false); - bm_thread_sync(bm_hd_); + bmrt_launch_tensor_ex(bmrt_hd_, + net_names_[0], + static_cast(&device_inputs_[0]), + net_info_->input_num, + static_cast(&device_outputs_[0]), + net_info_->output_num, true, false); + bm_thread_sync(bm_hd_); for (size_t i = 0; i < device_outputs_.size(); i++) { - bm_memcpy_d2s(bm_hd_, const_cast(origin_otensors_[i]->raw_data()), device_outputs_[i].device_mem); + bm_memcpy_d2s(bm_hd_, + const_cast(origin_otensors_[i]->raw_data()), + device_outputs_[i].device_mem); } return 0; } diff --git a/lite/kernels/npu/bridges/CMakeLists.txt b/lite/kernels/npu/bridges/CMakeLists.txt index 2c516e47e4..e667cc719b 100644 --- a/lite/kernels/npu/bridges/CMakeLists.txt +++ b/lite/kernels/npu/bridges/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XPU) +if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XPU AND NOT LITE_WITH_BM) return() endif() diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index a7ae414573..84e9a7db56 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -1,4 +1,4 @@ -if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) +if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) @@ -35,36 +35,36 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) if(LITE_BUILD_EXTRA) - lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() - lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() diff --git a/lite/tools/build_bm.sh b/lite/tools/build_bm.sh new file mode 100755 index 0000000000..f4cfee5ec6 --- /dev/null +++ b/lite/tools/build_bm.sh @@ -0,0 +1,112 @@ +#!/bin/bash +set -ex + +# global variables with default value +BM_SDK_ROOT="$(pwd)/../BM_SDK" # BM SDK +TARGET_NAME="BM1682" # default target +BUILD_EXTRA=OFF # ON(with sequence ops)/OFF +WITH_TESTING=ON # ON/OFF + +function print_usage { + echo -e "\nUSAGE:" + echo + echo "----------------------------------------" + echo -e "--bm_sdk_root=" + echo -e "--target_name=" + echo "----------------------------------------" + echo +} + +# readonly variables with default value +readonly CMAKE_COMMON_OPTIONS="-DWITH_LITE=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF \ + -DWITH_PYTHON=OFF \ + -DLITE_WITH_ARM=OFF" + +readonly NUM_CORES_FOR_COMPILE=${LITE_BUILD_THRLITE_BUILD_THREADSEADS:-1} + +readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz +readonly workspace=$(pwd) + +function prepare_thirdparty { + if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then + rm -rf $workspace/third-party + + if [ ! -f $workspace/third-party-05b862.tar.gz ]; then + wget $THIRDPARTY_TAR + fi + tar xzf third-party-05b862.tar.gz + else + git submodule update --init --recursive + fi +} + +# for code gen, a source file is generated after a test, but is dependended by some targets in cmake. +# here we fake an empty file to make cmake works. +function prepare_workspace { + # in build directory + # 1. Prepare gen_code file + GEN_CODE_PATH_PREFIX=lite/gen_code + mkdir -p ./${GEN_CODE_PATH_PREFIX} + touch ./${GEN_CODE_PATH_PREFIX}/__generated_code__.cc + + # 2.Prepare debug tool + DEBUG_TOOL_PATH_PREFIX=lite/tools/debug + mkdir -p ./${DEBUG_TOOL_PATH_PREFIX} + cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/ + + # clone submodule + # git submodule update --init --recursive + prepare_thirdparty +} + +function build_bm { + build_dir=${workspace}/build.lite.bm + mkdir -p $build_dir + cd $build_dir + + prepare_workspace + cmake .. \ + ${CMAKE_COMMON_OPTIONS} \ + -DWITH_GPU=OFF \ + -DWITH_MKLDNN=OFF \ + -DLITE_WITH_X86=ON \ + -DWITH_MKL=ON \ + -DLITE_BUILD_EXTRA=ON \ + -DLITE_WITH_XPU=OFF \ + -DLITE_WITH_BM=ON \ + -DWITH_TESTING=${WITH_TESTING} \ + -DBM_SDK_ROOT=${BM_SDK_ROOT} + + make -j$NUM_CORES_FOR_COMPILE + + cd - + echo "Done" +} + +function main { + # Parse command line. + for i in "$@"; do + case $i in + --target_name=*) + TARGET_NAME="${i#*=}" + shift + ;; + --bm_sdk_root=*) + BM_SDK_ROOT="${i#*=}" + shift + ;; + bm) + build_bm + shift + ;; + *) + # unknown option + print_usage + exit 1 + ;; + esac + done +} + +main $@ -- GitLab