提交 470753d5 编写于 作者: L liuqi

Add enable_neon rule to leave neon code alone when compiling opencl.

上级 f291ce31
......@@ -32,6 +32,14 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "enable_neon",
define_values = {
"neon": "true",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "embed_binary_program",
define_values = {
......
# Examples
load("//mace:mace.bzl", "if_android", "if_embed_binary_program")
load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program")
cc_binary(
name = "helloworld",
......@@ -7,7 +7,7 @@ cc_binary(
"helloworld.cc",
],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"],
linkopts = if_enable_neon(["-fopenmp"]),
deps = [
"//mace/core",
"//mace/ops",
......@@ -20,7 +20,7 @@ cc_test(
testonly = 1,
srcs = ["benchmark_example.cc"],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"],
linkopts = if_enable_neon(["-fopenmp"]),
linkstatic = 1,
deps = [
"//mace/core",
......@@ -32,7 +32,7 @@ cc_binary(
name = "mace_run",
srcs = ["mace_run.cc"],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = ["-fopenmp"],
linkopts = if_enable_neon(["-fopenmp"]),
linkstatic = 1,
deps = [
"//mace/codegen:generated_models_lib",
......
......@@ -7,22 +7,22 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_android_arm64", "if_embed_binary_program")
load("//mace:mace.bzl", "if_android", "if_enable_neon", "if_embed_binary_program")
cc_library(
name = "kernels",
srcs = glob(["*.cc", "opencl/*.cc"]) + if_android_arm64(glob([
srcs = glob(["*.cc", "opencl/*.cc"]) + if_enable_neon(glob([
"neon/*.cc",
])),
hdrs = glob(["*.h", "opencl/*.h"]) + if_android_arm64(glob([
hdrs = glob(["*.h", "opencl/*.h"]) + if_enable_neon(glob([
"neon/*.h",
])),
copts = [
"-std=c++11",
"-fopenmp",
"-D_GLIBCXX_USE_C99_MATH_TR1",
] +
if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]),
if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]) +
if_enable_neon(["-fopenmp"]),
linkopts = if_android(["-lm"]),
deps = [
"//mace/core",
......
......@@ -35,3 +35,9 @@ def if_embed_binary_program(a):
"//mace:embed_binary_program": a,
"//conditions:default": [],
})
def if_enable_neon(a):
return select({
"//mace:enable_neon": a,
"//conditions:default": [],
})
......@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
load("//mace:mace.bzl", "if_android", "if_enable_neon")
cc_library(
name = "test",
......@@ -34,7 +34,8 @@ cc_library(
["*.h"],
exclude = ["ops_test_util.h"],
),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] +
if_enable_neon(["-DMACE_ENABLE_NEON"]),
deps = [
"//mace/kernels",
],
......
......@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN")
.Build(),
AddNOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN")
.TypeConstraint<float>("T")
.Build(),
AddNOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN")
.TypeConstraint<float>("T")
......
......@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm")
.Build(),
BatchNormOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T")
.Build(),
BatchNormOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm")
.TypeConstraint<float>("T")
......
......@@ -16,12 +16,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D")
.Build(),
Conv2dOp<DeviceType::CPU, half>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T")
.Build(),
Conv2dOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
.TypeConstraint<float>("T")
......
......@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.Build(),
DepthwiseConv2dOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T")
.Build(),
DepthwiseConv2dOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d")
.TypeConstraint<float>("T")
......
......@@ -11,11 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
.Build(),
GlobalAvgPoolingOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling")
.TypeConstraint<float>("T")
.Build(),
GlobalAvgPoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
} // namespace mace
//
//DMACE_ENABLE_NEON
// Copyright (c) 2017 XiaoMi All rights reserved.
//
......
......@@ -11,6 +11,7 @@
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
......
......@@ -15,12 +15,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling")
.Build(),
PoolingOp<DeviceType::CPU, half>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling")
.TypeConstraint<float>("T")
.Build(),
PoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling")
.TypeConstraint<float>("T")
......
......@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu")
.Build(),
ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu")
.TypeConstraint<float>("T")
.Build(),
ReluOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu")
.TypeConstraint<float>("T")
......
......@@ -11,12 +11,12 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear")
.Build(),
ResizeBilinearOp<DeviceType::CPU, float>);
#if __ARM_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T")
.Build(),
ResizeBilinearOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear")
.TypeConstraint<float>("T")
......
......@@ -27,7 +27,11 @@ PROFILINE="--define profiling=true"
BRANCH=$(git symbolic-ref --short HEAD)
COMMIT_ID=$(git rev-parse --short HEAD)
bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI
bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=$ANDROID_ABI \
--define neon=true
if [ $? -ne 0 ]; then
exit 1
......
......@@ -51,7 +51,6 @@ build_and_run()
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
num_threads=${1:-4}
if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then
tuning_flag=1
round=0 # only warm up
......@@ -64,7 +63,6 @@ build_and_run()
MACE_CPP_MIN_VLOG_LEVEL=0 \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
OMP_NUM_THREADS=$num_threads \
${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册