From 470753d553afd00f1d7140f74dee4b3f6c9dff79 Mon Sep 17 00:00:00 2001 From: liuqi Date: Thu, 21 Dec 2017 11:33:27 +0800 Subject: [PATCH] Add enable_neon rule to leave neon code alone when compiling opencl. --- mace/BUILD | 8 ++++++++ mace/examples/BUILD | 8 ++++---- mace/kernels/BUILD | 10 +++++----- mace/mace.bzl | 6 ++++++ mace/ops/BUILD | 5 +++-- mace/ops/addn.cc | 4 ++-- mace/ops/batch_norm.cc | 4 ++-- mace/ops/conv_2d.cc | 4 ++-- mace/ops/depthwise_conv2d.cc | 4 ++-- mace/ops/global_avg_pooling.cc | 4 ++-- mace/ops/global_avg_pooling.h | 2 +- mace/ops/ops_test_util.h | 1 + mace/ops/pooling.cc | 4 ++-- mace/ops/relu.cc | 4 ++-- mace/ops/resize_bilinear.cc | 4 ++-- tools/bazel-adb-run.sh | 6 +++++- tools/validate_gcn.sh | 2 -- 17 files changed, 49 insertions(+), 31 deletions(-) diff --git a/mace/BUILD b/mace/BUILD index 8ae6e6d6..65eb1e8d 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -32,6 +32,14 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "enable_neon", + define_values = { + "neon": "true", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "embed_binary_program", define_values = { diff --git a/mace/examples/BUILD b/mace/examples/BUILD index 7b6a268d..ffdb4f72 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -1,5 +1,5 @@ # Examples -load("//mace:mace.bzl", "if_android", "if_embed_binary_program") +load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program") cc_binary( name = "helloworld", @@ -7,7 +7,7 @@ cc_binary( "helloworld.cc", ], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), deps = [ "//mace/core", "//mace/ops", @@ -20,7 +20,7 @@ cc_test( testonly = 1, srcs = ["benchmark_example.cc"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), linkstatic = 1, deps = [ "//mace/core", @@ -32,7 +32,7 @@ cc_binary( name = "mace_run", srcs = ["mace_run.cc"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), linkstatic = 1, deps = [ "//mace/codegen:generated_models_lib", diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index e03e63e2..da1ee8ec 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -7,22 +7,22 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_android_arm64", "if_embed_binary_program") +load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program") cc_library( name = "kernels", - srcs = glob(["*.cc", "opencl/*.cc"]) + if_android_arm64(glob([ + srcs = glob(["*.cc", "opencl/*.cc"]) + if_enable_neon(glob([ "neon/*.cc", ])), - hdrs = glob(["*.h", "opencl/*.h"]) + if_android_arm64(glob([ + hdrs = glob(["*.h", "opencl/*.h"]) + if_enable_neon(glob([ "neon/*.h", ])), copts = [ "-std=c++11", - "-fopenmp", "-D_GLIBCXX_USE_C99_MATH_TR1", ] + - if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), + if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]) + + if_enable_neon(["-fopenmp"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", diff --git a/mace/mace.bzl b/mace/mace.bzl index d7d10109..2cf6e2ae 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -35,3 +35,9 @@ def if_embed_binary_program(a): "//mace:embed_binary_program": a, "//conditions:default": [], }) + +def if_enable_neon(a): + return select({ + "//mace:enable_neon": a, + "//conditions:default": [], + }) diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 1da29725..2395ded6 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android") +load("//mace:mace.bzl", "if_android", "if_enable_neon") cc_library( name = "test", @@ -34,7 +34,8 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + + if_enable_neon(["-DMACE_ENABLE_NEON"]), deps = [ "//mace/kernels", ], diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index ba0bb380..d29944de 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") .Build(), AddNOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") .TypeConstraint("T") .Build(), AddNOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") .TypeConstraint("T") diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 76723b2d..6136c814 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") .Build(), BatchNormOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") .TypeConstraint("T") .Build(), BatchNormOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") .TypeConstraint("T") diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 617bd2c5..ad771df6 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -16,12 +16,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") .Build(), Conv2dOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") .TypeConstraint("T") .Build(), Conv2dOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") .TypeConstraint("T") diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index b8cb2e5b..ed13d099 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .Build(), DepthwiseConv2dOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .TypeConstraint("T") .Build(), DepthwiseConv2dOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .TypeConstraint("T") diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc index 53437844..f495c712 100644 --- a/mace/ops/global_avg_pooling.cc +++ b/mace/ops/global_avg_pooling.cc @@ -11,11 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") .Build(), GlobalAvgPoolingOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") .TypeConstraint("T") .Build(), GlobalAvgPoolingOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON } // namespace mace diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index c5ccf345..55deb2a9 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -1,4 +1,4 @@ -// +//DMACE_ENABLE_NEON // Copyright (c) 2017 XiaoMi All rights reserved. // diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index d03e43de..04bb3c67 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -11,6 +11,7 @@ #include "mace/core/common.h" #include "mace/core/net.h" #include "mace/core/tensor.h" +#include "mace/core/workspace.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/utils.h" diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 17031378..3a467d12 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -15,12 +15,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") .Build(), PoolingOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") .TypeConstraint("T") .Build(), PoolingOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") .TypeConstraint("T") diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index f471ae64..0197e65c 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") .Build(), ReluOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") .TypeConstraint("T") .Build(), ReluOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") .TypeConstraint("T") diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 8eae7181..89f460fe 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") .Build(), ResizeBilinearOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") .TypeConstraint("T") .Build(), ResizeBilinearOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") .TypeConstraint("T") diff --git a/tools/bazel-adb-run.sh b/tools/bazel-adb-run.sh index e80ecd83..116e64db 100755 --- a/tools/bazel-adb-run.sh +++ b/tools/bazel-adb-run.sh @@ -27,7 +27,11 @@ PROFILINE="--define profiling=true" BRANCH=$(git symbolic-ref --short HEAD) COMMIT_ID=$(git rev-parse --short HEAD) -bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI +bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \ + --crosstool_top=//external:android/crosstool \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + --cpu=$ANDROID_ABI \ + --define neon=true if [ $? -ne 0 ]; then exit 1 diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 84e95f7f..062b6f65 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -51,7 +51,6 @@ build_and_run() adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} - num_threads=${1:-4} if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then tuning_flag=1 round=0 # only warm up @@ -64,7 +63,6 @@ build_and_run() MACE_CPP_MIN_VLOG_LEVEL=0 \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_KERNEL_PATH=$KERNEL_DIR \ - OMP_NUM_THREADS=$num_threads \ ${PHONE_DATA_DIR}/mace_run \ --model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \ --input=mace_input_node \ -- GitLab