diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 89916b0959ed482f0c9dcdfd767945d43643f21b..0b0e2436368096a3d72e571f8106e0666b22943b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,16 +42,16 @@ build_docs: cmake_build_android-armeabi-v7a: stage: build script: - - RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh + - RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh - LIBMACE32_FULL_SIZE=`stat -c%s build/cmake-build/armeabi-v7a/install/lib/libmace.so` - - if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi + - if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi cmake_build_android-arm64-v8: stage: build script: - - RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh + - RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-arm64-v8a.sh - LIBMACE64_FULL_SIZE=`stat -c%s build/cmake-build/arm64-v8a/install/lib/libmace.so` - - if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi + - if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi bazel_build: stage: build @@ -59,10 +59,14 @@ bazel_build: - bash tools/bazel_build_standalone_lib.sh - bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu - bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu + - LIBMACE32_FULL_SIZE=`stat -c%s build/lib/armeabi-v7a/libmace.so` + - if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi - bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,dsp - bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,apu - bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu - bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu + - LIBMACE64_FULL_SIZE=`stat -c%s build/lib/arm64-v8a/libmace.so` + - if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi - bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,dsp - bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,apu - bash tools/bazel_build_standalone_lib.sh --abi=arm_linux_gnueabihf --runtimes=cpu diff --git a/docs/user_guide/advanced_usage_cmake.rst b/docs/user_guide/advanced_usage_cmake.rst index 3956b9757ede1ab22dc9079e5720f3837dbdac6e..a3d2d489bf77437b4bd1800f2c773d995e3b9afc 100644 --- a/docs/user_guide/advanced_usage_cmake.rst +++ b/docs/user_guide/advanced_usage_cmake.rst @@ -175,7 +175,7 @@ After that you can rebuild the engine. .. code-block:: bash - RUNTIME=GPU RUNMODE=code bash tools/cmake/cmake-build-armeabi-v7a.sh + RUNTIME=GPU RUNMODE=code QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh ``RUNMODE=code`` means you compile and link model library with MACE engine. diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 57c888fb07a7cc9fd5bd20b43a370a93323f748d..c9b19cdcc6ce375229cdc330073cd7a2ecd07a43 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -45,7 +45,7 @@ Here we use the mobilenet-v2 model as an example. cd path/to/mace # Build library # output lib path: build/lib - bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-static] + bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-quantize][-static] .. note:: diff --git a/docs/user_guide/basic_usage_cmake.rst b/docs/user_guide/basic_usage_cmake.rst index 131e375f52346cd1b97d6a00f47c4825bfa8622f..0f33a80e60e04e0b2568ad37a72b26fbc95c8868 100644 --- a/docs/user_guide/basic_usage_cmake.rst +++ b/docs/user_guide/basic_usage_cmake.rst @@ -20,7 +20,7 @@ Please make sure you have CMake installed. .. code-block:: sh - RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh + RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh which generate libraries in ``build/cmake-build/armeabi-v7a``, you can use either static libraries or the ``libmace.so`` shared library. diff --git a/docs/zh/user_guide/basic_usage.rst b/docs/zh/user_guide/basic_usage.rst index 3fd04b0b4e281847007e83ee0b02ad0ff3b3866c..82685301fb127f14b228e05f69bafe65d4081eb8 100644 --- a/docs/zh/user_guide/basic_usage.rst +++ b/docs/zh/user_guide/basic_usage.rst @@ -20,7 +20,7 @@ .. code-block:: sh - RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh + RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh 编译安装位置为 ``build/cmake-build/armeabi-v7a``, 可以使用 libmace 静态库或者动态库。 diff --git a/mace/core/runtime/opencl/cl2_header.h b/mace/core/runtime/opencl/cl2_header.h index 45063fca79150b8050e8754ad4367cbedbf1784f..00aa326aee3fc9e74438825fce0e3d49d2f21cae 100644 --- a/mace/core/runtime/opencl/cl2_header.h +++ b/mace/core/runtime/opencl/cl2_header.h @@ -37,7 +37,7 @@ #pragma GCC diagnostic ignored "-Wignored-attributes" #endif // MACE_OS_MAC -#include "CL/cl2.hpp" +#include #ifdef MACE_OS_MAC #pragma GCC diagnostic pop diff --git a/mace/ops/arm/base/common_neon.h b/mace/ops/arm/base/common_neon.h index e7bd3180163c78b0b54f854e0a16ff83389ab019..5b6d7899ec6260e53cb08d188c237f0be32287dc 100644 --- a/mace/ops/arm/base/common_neon.h +++ b/mace/ops/arm/base/common_neon.h @@ -144,7 +144,7 @@ inline void vst3q(float *ptr, float32x4x3_t v) { } inline float32x8_t vld1o(float *ptr) { - return {vld1q_f32(ptr), vld1q_f32(ptr + 4)}; + return {{vld1q_f32(ptr), vld1q_f32(ptr + 4)}}; } inline void vst1o(float *ptr, float32x8_t v) { @@ -209,8 +209,8 @@ inline float32x4_t vld1q(const BFloat16 *ptr) { // load of 2 4D vectors and perform de-interleaving inline float32x4x2_t vld2q_bf16(const uint16_t *ptr) { uint16x4x2_t u = vld2_u16(ptr); - return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)), - vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))}; + return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)), + vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))}}; } inline float32x4x2_t vld2q_bf16(const BFloat16 *ptr) { @@ -228,9 +228,9 @@ inline float32x4x2_t vld2q(const BFloat16 *ptr) { // load of 3 4D vectors and perform de-interleaving inline float32x4x3_t vld3q_bf16(const uint16_t *ptr) { uint16x4x3_t u = vld3_u16(ptr); - return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)), - vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)), - vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))}; + return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)), + vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)), + vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))}}; } inline float32x4x3_t vld3q_bf16(const BFloat16 *ptr) { @@ -264,8 +264,8 @@ inline void vst1q(BFloat16 *ptr, const float32x4_t v) { // store of 2 4D vectors and perform interleaving inline void vst2q_bf16(uint16_t *ptr, const float32x4x2_t v) { - uint16x4x2_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16), - vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)}; + uint16x4x2_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16), + vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)}}; vst2_u16(ptr, u); } @@ -283,9 +283,9 @@ inline void vst2q(BFloat16 *ptr, const float32x4x2_t v) { // store of 3 4D vectors and perform interleaving inline void vst3q_bf16(uint16_t *ptr, const float32x4x3_t v) { - uint16x4x3_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16), + uint16x4x3_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16), vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16), - vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)}; + vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)}}; vst3_u16(ptr, u); } @@ -304,8 +304,8 @@ inline void vst3q(BFloat16 *ptr, const float32x4x3_t v) { // load of 8D vector inline float32x8_t vld1o_bf16(const uint16_t *ptr) { uint16x8_t u = vld1q_u16(ptr); - return {vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)), - vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))}; + return {{vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)), + vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))}}; } inline float32x8_t vld1o_bf16(const BFloat16 *ptr) { diff --git a/test/ccunit/mace/ops/depthwise_conv2d_test.cc b/test/ccunit/mace/ops/depthwise_conv2d_test.cc index ae1bac04bd9f7754de2a8793f51894ef62c7afc7..b0db1b3deb1f6344956e1d879b88ad6ae5b1e403 100644 --- a/test/ccunit/mace/ops/depthwise_conv2d_test.cc +++ b/test/ccunit/mace/ops/depthwise_conv2d_test.cc @@ -492,6 +492,7 @@ TEST_F(DepthwiseConv2dOpTest, Quant) { TestQuant(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2}); } +#ifdef MACE_ENABLE_BFLOAT16 namespace { void TestBFloat16(const index_t batch, const index_t multiplier, @@ -557,6 +558,8 @@ TEST_F(DepthwiseConv2dOpTest, BFloat16) { TestBFloat16(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2}); } +#endif // MACE_ENABLE_BFLOAT16 + } // namespace test } // namespace ops } // namespace mace diff --git a/third_party/opencl-clhpp/opencl-clhpp.BUILD b/third_party/opencl-clhpp/opencl-clhpp.BUILD index 494006c450725c503950d0e86c8f07f345d704df..22c554d36d5b10912ebb05c826feeb333f661422 100644 --- a/third_party/opencl-clhpp/opencl-clhpp.BUILD +++ b/third_party/opencl-clhpp/opencl-clhpp.BUILD @@ -12,9 +12,12 @@ genrule( cmd = "workdir=$$(mktemp -d -t opencl-clhpp-build.XXXXXXXXXX); cp -aL $$(dirname $(location CMakeLists.txt))/* $$workdir; pushd $$workdir; mkdir build; pushd build; cmake ../ -DBUILD_DOCS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_TESTS=OFF; make generate_clhpp generate_cl2hpp; popd; popd; cp -a $$workdir/build/* $(@D); rm -rf $$workdir; echo installing to $(@D)", ) +# The `srcs` is not used in c++ Code, but we need it to trigger the `genrule`, +# So we add the "include/CL/cl.hpp", "include/CL/cl2.hpp" into `srcs`, these +# two files is imported by the `includes` instead of `srcs`. cc_library( name = "opencl_clhpp", - hdrs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"], - strip_include_prefix = "include", + includes = ["include"], + srcs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"], visibility = ["//visibility:public"], ) diff --git a/tools/bazel.rc b/tools/bazel.rc index 27aa8695a19c80919063ef347e50567f094c94d2..38f2453e52570ff67a052d94820b6a8070e4a325 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -10,6 +10,7 @@ build --copt=-D_GLIBCXX_USE_C99_MATH_TR1 build --copt=-DMACE_OBFUSCATE_LITERALS build --copt=-DGEMMLOWP_USE_MACE_THREAD_POOL build --copt=-DMACE_DEPTHWISE_U8_USE_MULTI_THREAD +build --copt=-O2 # Usage example: bazel build --config android build:android --define linux_base=true @@ -20,6 +21,7 @@ build:android --linkopt=-lm build:android --distinct_host_configuration=true build:android --crosstool_top=//external:android/crosstool build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:android --copt=-Oz build:android --copt -Wall build:android --copt -Wno-mismatched-tags build:android --copt -Wno-missing-braces @@ -75,20 +77,27 @@ build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough # Usage example: bazel build --config optimization build:optimization -c opt -build:optimization --copt=-O3 build:optimization --linkopt=-Wl,--strip-all build:optimization --copt=-ffunction-sections build:optimization --copt=-fdata-sections build:optimization --linkopt=-Wl,--gc-sections +build:optimization --copt=-fno-rtti +build:optimization --copt=-fno-exceptions +build:optimization --copt=-DGOOGLE_PROTOBUF_NO_RTTI +build:optimization --copt=-DPROTOBUF_USE_EXCEPTIONS=0 # Usage example: bazel build --config optimization_darwin -build:optimization_darwin --copt=-O3 build:optimization_darwin --copt=-ffunction-sections build:optimization_darwin --copt=-fdata-sections build:optimization_darwin --linkopt=-Wl,-dead_strip +build:optimization_darwin --copt=-fno-rtti +build:optimization_darwin --copt=-fno-exceptions +build:optimization_darwin --copt=-DGOOGLE_PROTOBUF_NO_RTTI +build:optimization_darwin --copt=-DPROTOBUF_USE_EXCEPTIONS=0 # Usage example: bazel build --config symbol_hidden build:symbol_hidden --copt=-fvisibility=hidden +build:symbol_hidden --copt=-fvisibility-inlines-hidden # Usage example: bazel build --config debug build:debug -c dbg diff --git a/tools/bazel_build_standalone_lib.sh b/tools/bazel_build_standalone_lib.sh index c1a67a3618a5f63ad3854d03758faa71adff7453..f6bcbaa192d8724b4d5f6766ffef312dd252c8f4 100755 --- a/tools/bazel_build_standalone_lib.sh +++ b/tools/bazel_build_standalone_lib.sh @@ -40,8 +40,8 @@ enable_cpu=true enable_gpu=false enable_dsp=false enable_apu=false -enable_quantize=true -enable_bfloat16=true +enable_quantize=false +enable_bfloat16=false enable_rpcmem=true static_lib=false symbol_hidden= @@ -97,6 +97,12 @@ for opt in "${@}";do static|-static|--static) static_lib=true ;; + quantize|-quantize|--quantize) + enable_quantize=true + ;; + bfloat16|-bfloat16|--bfloat16) + enable_bfloat16=true + ;; help|-help|--help) helper ;; diff --git a/tools/cmake/cmake-build-aarch64-linux-gnu.sh b/tools/cmake/cmake-build-aarch64-linux-gnu.sh index 4241168f8255788a07f766b5b9ff5ab888206933..7a5f77075b42b8466813e21e800ae6864e05b142 100755 --- a/tools/cmake/cmake-build-aarch64-linux-gnu.sh +++ b/tools/cmake/cmake-build-aarch64-linux-gnu.sh @@ -17,14 +17,24 @@ if [[ "$RUNMODE" == "code" ]]; then MACE_ENABLE_CODE_MODE=ON fi +MACE_ENABLE_QUANTIZE=OFF +if [[ "$QUANTIZE" == "ON" ]]; then + MACE_ENABLE_QUANTIZE=ON +fi + +DMACE_ENABLE_BFLOAT16=OFF +if [[ "$BFLOAT16" == "ON" ]]; then + DMACE_ENABLE_BFLOAT16=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DCROSSTOOL_ROOT=${LINARO_AARCH64_LINUX_GNU} \ -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/aarch64-linux-gnu.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DMACE_ENABLE_NEON=ON \ - -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \ -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ - -DMACE_ENABLE_BFLOAT16=ON \ + -DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DMACE_ENABLE_TESTS=ON \ diff --git a/tools/cmake/cmake-build-arm-linux-gnueabihf.sh b/tools/cmake/cmake-build-arm-linux-gnueabihf.sh index 40a879e4312a42050149a5f789b1ae92a2a065d9..5d77aef8da57aa67a173e8ffef0de2597c54c619 100755 --- a/tools/cmake/cmake-build-arm-linux-gnueabihf.sh +++ b/tools/cmake/cmake-build-arm-linux-gnueabihf.sh @@ -17,19 +17,29 @@ if [[ "$RUNTIME" == "GPU" ]]; then MACE_ENABLE_OPENCL=ON fi +MACE_ENABLE_QUANTIZE=OFF +if [[ "$QUANTIZE" == "ON" ]]; then + MACE_ENABLE_QUANTIZE=ON +fi + +DMACE_ENABLE_BFLOAT16=OFF +if [[ "$BFLOAT16" == "ON" ]]; then + DMACE_ENABLE_BFLOAT16=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DCROSSTOOL_ROOT=${LINARO_ARM_LINUX_GNUEABIHF} \ -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/arm-linux-gnueabihf.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DMACE_ENABLE_NEON=ON \ - -DMACE_ENABLE_QUANTIZE=ON \ - -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ - -DMACE_ENABLE_BFLOAT16=ON \ + -DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \ + -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ + -DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ - -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ -DCMAKE_INSTALL_PREFIX=install \ ../../.. make -j$(nproc) VERBOSE=1 && make install diff --git a/tools/cmake/cmake-build-arm64-v8a.sh b/tools/cmake/cmake-build-arm64-v8a.sh index 18616e39acf700efac3c657d08504067d7e1e4a5..2bad6c44fa240158a3dae8ca7b072111263db3f1 100755 --- a/tools/cmake/cmake-build-arm64-v8a.sh +++ b/tools/cmake/cmake-build-arm64-v8a.sh @@ -26,6 +26,16 @@ if [[ "$RUNMODE" == "code" ]]; then MACE_ENABLE_CODE_MODE=ON fi +MACE_ENABLE_QUANTIZE=OFF +if [[ "$QUANTIZE" == "ON" ]]; then + MACE_ENABLE_QUANTIZE=ON +fi + +DMACE_ENABLE_BFLOAT16=OFF +if [[ "$BFLOAT16" == "ON" ]]; then + DMACE_ENABLE_BFLOAT16=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DANDROID_ABI="arm64-v8a" \ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \ @@ -33,12 +43,12 @@ cmake -DANDROID_ABI="arm64-v8a" \ -DCMAKE_BUILD_TYPE=Release \ -DANDROID_STL=c++_shared \ -DMACE_ENABLE_NEON=ON \ - -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \ -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ -DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \ -DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \ -DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \ - -DMACE_ENABLE_BFLOAT16=ON \ + -DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DMACE_ENABLE_TESTS=ON \ diff --git a/tools/cmake/cmake-build-armeabi-v7a.sh b/tools/cmake/cmake-build-armeabi-v7a.sh index 53f8cc25a5d72ea60673b104c5d9e4ca08637a5b..760901f7c338c251c5069ce732eb179035a18c08 100755 --- a/tools/cmake/cmake-build-armeabi-v7a.sh +++ b/tools/cmake/cmake-build-armeabi-v7a.sh @@ -27,6 +27,16 @@ if [[ "$RUNMODE" == "code" ]]; then MACE_ENABLE_CODE_MODE=ON fi +MACE_ENABLE_QUANTIZE=OFF +if [[ "$QUANTIZE" == "ON" ]]; then + MACE_ENABLE_QUANTIZE=ON +fi + +DMACE_ENABLE_BFLOAT16=OFF +if [[ "$BFLOAT16" == "ON" ]]; then + DMACE_ENABLE_BFLOAT16=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DANDROID_ABI="armeabi-v7a" \ -DANDROID_ARM_NEON=ON \ @@ -35,12 +45,12 @@ cmake -DANDROID_ABI="armeabi-v7a" \ -DCMAKE_BUILD_TYPE=Release \ -DANDROID_STL=c++_shared \ -DMACE_ENABLE_NEON=ON \ - -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \ -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ -DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \ -DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \ -DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \ - -DMACE_ENABLE_BFLOAT16=ON \ + -DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DMACE_ENABLE_TESTS=ON \ diff --git a/tools/cmake/cmake-build-host.sh b/tools/cmake/cmake-build-host.sh index ee39b7426bd23e2df7f7c02b0bc1cac3c794fca4..c7726312620b6d583a8ff59ba621c61e56825825 100755 --- a/tools/cmake/cmake-build-host.sh +++ b/tools/cmake/cmake-build-host.sh @@ -13,12 +13,16 @@ if [[ "$RUNMODE" == "code" ]]; then MACE_ENABLE_CODE_MODE=ON fi +DMACE_ENABLE_BFLOAT16=OFF +if [[ "$BFLOAT16" == "ON" ]]; then + DMACE_ENABLE_BFLOAT16=ON +fi mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DMACE_ENABLE_NEON=OFF \ -DMACE_ENABLE_QUANTIZE=OFF \ -DMACE_ENABLE_OPENCL=OFF \ - -DMACE_ENABLE_BFLOAT16=ON \ + -DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \