diff --git a/mace/BUILD b/mace/BUILD index 8ae6e6d6adbcfca7b87ed14a308e186fccfa3114..65eb1e8d6f8f5f8ff59891ecffd7ecdd36b4326e 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -32,6 +32,14 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "enable_neon", + define_values = { + "neon": "true", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "embed_binary_program", define_values = { diff --git a/mace/examples/BUILD b/mace/examples/BUILD index 7b6a268d56a78bce9879d9707d71f9f8eb4f29c6..ffdb4f7265da572368f6a61cffa81c43c96a1218 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -1,5 +1,5 @@ # Examples -load("//mace:mace.bzl", "if_android", "if_embed_binary_program") +load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program") cc_binary( name = "helloworld", @@ -7,7 +7,7 @@ cc_binary( "helloworld.cc", ], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), deps = [ "//mace/core", "//mace/ops", @@ -20,7 +20,7 @@ cc_test( testonly = 1, srcs = ["benchmark_example.cc"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), linkstatic = 1, deps = [ "//mace/core", @@ -32,7 +32,7 @@ cc_binary( name = "mace_run", srcs = ["mace_run.cc"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = ["-fopenmp"], + linkopts = if_enable_neon(["-fopenmp"]), linkstatic = 1, deps = [ "//mace/codegen:generated_models_lib", diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index e03e63e2eabefb4d66ab498f77a3531fc112d522..da1ee8ec3e5314e79c5ef286626f39504f99ea42 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -7,22 +7,22 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_android_arm64", "if_embed_binary_program") +load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program") cc_library( name = "kernels", - srcs = glob(["*.cc", "opencl/*.cc"]) + if_android_arm64(glob([ + srcs = glob(["*.cc", "opencl/*.cc"]) + if_enable_neon(glob([ "neon/*.cc", ])), - hdrs = glob(["*.h", "opencl/*.h"]) + if_android_arm64(glob([ + hdrs = glob(["*.h", "opencl/*.h"]) + if_enable_neon(glob([ "neon/*.h", ])), copts = [ "-std=c++11", - "-fopenmp", "-D_GLIBCXX_USE_C99_MATH_TR1", ] + - if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), + if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]) + + if_enable_neon(["-fopenmp"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", diff --git a/mace/mace.bzl b/mace/mace.bzl index d7d10109f75e2d7ba630528242ed53a0e79aecf6..2cf6e2aeab688a3785662ad10eb2d73d7e568ecd 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -35,3 +35,9 @@ def if_embed_binary_program(a): "//mace:embed_binary_program": a, "//conditions:default": [], }) + +def if_enable_neon(a): + return select({ + "//mace:enable_neon": a, + "//conditions:default": [], + }) diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 1da2972539c157e286ca8f32460b575e247046c2..2395ded6179dc8c7102f1ca4c0096aeaedbc9e79 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android") +load("//mace:mace.bzl", "if_android", "if_enable_neon") cc_library( name = "test", @@ -34,7 +34,8 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + + if_enable_neon(["-DMACE_ENABLE_NEON"]), deps = [ "//mace/kernels", ], diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index ba0bb38019fbfc6274d09dfa81d9efd8e83ed789..d29944dec3237ffd7a72d15317353dc9ad59cd27 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") .Build(), AddNOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") .TypeConstraint("T") .Build(), AddNOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") .TypeConstraint("T") diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 76723b2dc2c369257b79fb66b8c472752253700d..6136c814f4ff6f11bc5697295729b4b54ea8e299 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") .Build(), BatchNormOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") .TypeConstraint("T") .Build(), BatchNormOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") .TypeConstraint("T") diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 617bd2c5600670513f67140979fd3ccee3ed6c98..ad771df67a4bb266be6e265e081fb54dcc9b9a2e 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -16,12 +16,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") .Build(), Conv2dOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") .TypeConstraint("T") .Build(), Conv2dOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") .TypeConstraint("T") diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index b8cb2e5be759a4838351ceb0405f075a3bbbf364..ed13d0994336a398156e2aa056ec354fc35f8d72 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .Build(), DepthwiseConv2dOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .TypeConstraint("T") .Build(), DepthwiseConv2dOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") .TypeConstraint("T") diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc index 534378445ca59b05af2d5c7e89b46d198b14c4f4..f495c71246b9763940d8169eba5b24bb984aada0 100644 --- a/mace/ops/global_avg_pooling.cc +++ b/mace/ops/global_avg_pooling.cc @@ -11,11 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") .Build(), GlobalAvgPoolingOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") .TypeConstraint("T") .Build(), GlobalAvgPoolingOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON } // namespace mace diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index c5ccf3457bc0b763ab1a7a92e54f9e1b8e2c5310..55deb2a9b5119c6c8b8c0fc3d41873c744623a4f 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -1,4 +1,4 @@ -// +//DMACE_ENABLE_NEON // Copyright (c) 2017 XiaoMi All rights reserved. // diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index d03e43de91f2c79a4424a9df2efb2250b340f43b..04bb3c67cf1b896ca617f92a6abbad18bf29abf1 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -11,6 +11,7 @@ #include "mace/core/common.h" #include "mace/core/net.h" #include "mace/core/tensor.h" +#include "mace/core/workspace.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/utils.h" diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 17031378f7e93ac6924f794ec352d3009181179d..3a467d1237c8508fade69d53162e2630fb48d83f 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -15,12 +15,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") .Build(), PoolingOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") .TypeConstraint("T") .Build(), PoolingOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") .TypeConstraint("T") diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index f471ae64665f34ed9b109fdf5c3f2c1c79ce7320..0197e65cf6297f8addd0dc3acb5bf07425b6a1c7 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") .Build(), ReluOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") .TypeConstraint("T") .Build(), ReluOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") .TypeConstraint("T") diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 8eae71819537a99cc08454e1585844f7d77f52e3..89f460fec98adda501aca49388badc4b67da3db7 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") .Build(), ResizeBilinearOp); -#if __ARM_NEON +#if MACE_ENABLE_NEON REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") .TypeConstraint("T") .Build(), ResizeBilinearOp); -#endif // __ARM_NEON +#endif // MACE_ENABLE_NEON REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") .TypeConstraint("T") diff --git a/tools/bazel-adb-run.sh b/tools/bazel-adb-run.sh index e80ecd83122e4322051e42c0f0760f6c94dc297f..116e64dbfa1dd41faa5222b829604cc17a50f2ed 100755 --- a/tools/bazel-adb-run.sh +++ b/tools/bazel-adb-run.sh @@ -27,7 +27,11 @@ PROFILINE="--define profiling=true" BRANCH=$(git symbolic-ref --short HEAD) COMMIT_ID=$(git rev-parse --short HEAD) -bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI +bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \ + --crosstool_top=//external:android/crosstool \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + --cpu=$ANDROID_ABI \ + --define neon=true if [ $? -ne 0 ]; then exit 1 diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 84e95f7fbe81266caec401b752ae63cb4a8eb913..062b6f65653dedebd691aa63004c48055e0a9ad1 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -51,7 +51,6 @@ build_and_run() adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} - num_threads=${1:-4} if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then tuning_flag=1 round=0 # only warm up @@ -64,7 +63,6 @@ build_and_run() MACE_CPP_MIN_VLOG_LEVEL=0 \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_KERNEL_PATH=$KERNEL_DIR \ - OMP_NUM_THREADS=$num_threads \ ${PHONE_DATA_DIR}/mace_run \ --model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \ --input=mace_input_node \