提交 470753d5 编写于 作者: L liuqi

Add enable_neon rule to leave neon code alone when compiling opencl.

上级 f291ce31
...@@ -32,6 +32,14 @@ config_setting( ...@@ -32,6 +32,14 @@ config_setting(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
config_setting(
name = "enable_neon",
define_values = {
"neon": "true",
},
visibility = ["//visibility:public"],
)
config_setting( config_setting(
name = "embed_binary_program", name = "embed_binary_program",
define_values = { define_values = {
......
# Examples # Examples
load("//mace:mace.bzl", "if_android", "if_embed_binary_program") load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program")
cc_binary( cc_binary(
name = "helloworld", name = "helloworld",
...@@ -7,7 +7,7 @@ cc_binary( ...@@ -7,7 +7,7 @@ cc_binary(
"helloworld.cc", "helloworld.cc",
], ],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"], linkopts = if_enable_neon(["-fopenmp"]),
deps = [ deps = [
"//mace/core", "//mace/core",
"//mace/ops", "//mace/ops",
...@@ -20,7 +20,7 @@ cc_test( ...@@ -20,7 +20,7 @@ cc_test(
testonly = 1, testonly = 1,
srcs = ["benchmark_example.cc"], srcs = ["benchmark_example.cc"],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"], linkopts = if_enable_neon(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//mace/core", "//mace/core",
...@@ -32,7 +32,7 @@ cc_binary( ...@@ -32,7 +32,7 @@ cc_binary(
name = "mace_run", name = "mace_run",
srcs = ["mace_run.cc"], srcs = ["mace_run.cc"],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"], linkopts = if_enable_neon(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//mace/codegen:generated_models_lib", "//mace/codegen:generated_models_lib",
......
...@@ -7,22 +7,22 @@ package( ...@@ -7,22 +7,22 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_android_arm64", "if_embed_binary_program") load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program")
cc_library( cc_library(
name = "kernels", name = "kernels",
srcs = glob(["*.cc", "opencl/*.cc"]) + if_android_arm64(glob([ srcs = glob(["*.cc", "opencl/*.cc"]) + if_enable_neon(glob([
"neon/*.cc", "neon/*.cc",
])), ])),
hdrs = glob(["*.h", "opencl/*.h"]) + if_android_arm64(glob([ hdrs = glob(["*.h", "opencl/*.h"]) + if_enable_neon(glob([
"neon/*.h", "neon/*.h",
])), ])),
copts = [ copts = [
"-std=c++11", "-std=c++11",
"-fopenmp",
"-D_GLIBCXX_USE_C99_MATH_TR1", "-D_GLIBCXX_USE_C99_MATH_TR1",
] + ] +
if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]) +
if_enable_neon(["-fopenmp"]),
linkopts = if_android(["-lm"]), linkopts = if_android(["-lm"]),
deps = [ deps = [
"//mace/core", "//mace/core",
......
...@@ -35,3 +35,9 @@ def if_embed_binary_program(a): ...@@ -35,3 +35,9 @@ def if_embed_binary_program(a):
"//mace:embed_binary_program": a, "//mace:embed_binary_program": a,
"//conditions:default": [], "//conditions:default": [],
}) })
def if_enable_neon(a):
return select({
"//mace:enable_neon": a,
"//conditions:default": [],
})
...@@ -7,7 +7,7 @@ package( ...@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android") load("//mace:mace.bzl", "if_android", "if_enable_neon")
cc_library( cc_library(
name = "test", name = "test",
...@@ -34,7 +34,8 @@ cc_library( ...@@ -34,7 +34,8 @@ cc_library(
["*.h"], ["*.h"],
exclude = ["ops_test_util.h"], exclude = ["ops_test_util.h"],
), ),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] +
if_enable_neon(["-DMACE_ENABLE_NEON"]),
deps = [ deps = [
"//mace/kernels", "//mace/kernels",
], ],
......
...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") ...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN")
.Build(), .Build(),
AddNOp<DeviceType::CPU, float>); AddNOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
AddNOp<DeviceType::NEON, float>); AddNOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") ...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm")
.Build(), .Build(),
BatchNormOp<DeviceType::CPU, float>); BatchNormOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
BatchNormOp<DeviceType::NEON, float>); BatchNormOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -16,12 +16,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") ...@@ -16,12 +16,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D")
.Build(), .Build(),
Conv2dOp<DeviceType::CPU, half>); Conv2dOp<DeviceType::CPU, half>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
Conv2dOp<DeviceType::NEON, float>); Conv2dOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") ...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.Build(), .Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>); DepthwiseConv2dOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
DepthwiseConv2dOp<DeviceType::NEON, float>); DepthwiseConv2dOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -11,11 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") ...@@ -11,11 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
.Build(), .Build(),
GlobalAvgPoolingOp<DeviceType::CPU, float>); GlobalAvgPoolingOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
GlobalAvgPoolingOp<DeviceType::NEON, float>); GlobalAvgPoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
} // namespace mace } // namespace mace
// //DMACE_ENABLE_NEON
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h" #include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
......
...@@ -15,12 +15,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") ...@@ -15,12 +15,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling")
.Build(), .Build(),
PoolingOp<DeviceType::CPU, half>); PoolingOp<DeviceType::CPU, half>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
PoolingOp<DeviceType::NEON, float>); PoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") ...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu")
.Build(), .Build(),
ReluOp<DeviceType::CPU, float>); ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ReluOp<DeviceType::NEON, float>); ReluOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") ...@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear")
.Build(), .Build(),
ResizeBilinearOp<DeviceType::CPU, float>); ResizeBilinearOp<DeviceType::CPU, float>);
#if __ARM_NEON #if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
.Build(), .Build(),
ResizeBilinearOp<DeviceType::NEON, float>); ResizeBilinearOp<DeviceType::NEON, float>);
#endif // __ARM_NEON #endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T") .TypeConstraint<float>("T")
......
...@@ -27,7 +27,11 @@ PROFILINE="--define profiling=true" ...@@ -27,7 +27,11 @@ PROFILINE="--define profiling=true"
BRANCH=$(git symbolic-ref --short HEAD) BRANCH=$(git symbolic-ref --short HEAD)
COMMIT_ID=$(git rev-parse --short HEAD) COMMIT_ID=$(git rev-parse --short HEAD)
bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=$ANDROID_ABI \
--define neon=true
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
exit 1 exit 1
......
...@@ -51,7 +51,6 @@ build_and_run() ...@@ -51,7 +51,6 @@ build_and_run()
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
num_threads=${1:-4}
if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then
tuning_flag=1 tuning_flag=1
round=0 # only warm up round=0 # only warm up
...@@ -64,7 +63,6 @@ build_and_run() ...@@ -64,7 +63,6 @@ build_and_run()
MACE_CPP_MIN_VLOG_LEVEL=0 \ MACE_CPP_MIN_VLOG_LEVEL=0 \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \ MACE_KERNEL_PATH=$KERNEL_DIR \
OMP_NUM_THREADS=$num_threads \
${PHONE_DATA_DIR}/mace_run \ ${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \ --model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \ --input=mace_input_node \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册