提交 37cbf203 编写于 作者: 李滨

Merge branch 'fix_cmake' into 'master'

opt: reduce the so size for bazel and fix error in cmake compile

See merge request applied-machine-learning/sysml/mace!1307
......@@ -42,16 +42,16 @@ build_docs:
cmake_build_android-armeabi-v7a:
stage: build
script:
- RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
- RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
- LIBMACE32_FULL_SIZE=`stat -c%s build/cmake-build/armeabi-v7a/install/lib/libmace.so`
- if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi
cmake_build_android-arm64-v8:
stage: build
script:
- RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh
- RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-arm64-v8a.sh
- LIBMACE64_FULL_SIZE=`stat -c%s build/cmake-build/arm64-v8a/install/lib/libmace.so`
- if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi
bazel_build:
stage: build
......@@ -59,10 +59,14 @@ bazel_build:
- bash tools/bazel_build_standalone_lib.sh
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu
- LIBMACE32_FULL_SIZE=`stat -c%s build/lib/armeabi-v7a/libmace.so`
- if (( LIBMACE32_FULL_SIZE > 1400000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,dsp
- bash tools/bazel_build_standalone_lib.sh --abi=armeabi-v7a --runtimes=cpu,gpu,apu
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu
- LIBMACE64_FULL_SIZE=`stat -c%s build/lib/arm64-v8a/libmace.so`
- if (( LIBMACE64_FULL_SIZE > 2300000 )) ; then echo "The libmace.so size too large"; exit 1; fi
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,dsp
- bash tools/bazel_build_standalone_lib.sh --abi=arm64-v8a --runtimes=cpu,gpu,apu
- bash tools/bazel_build_standalone_lib.sh --abi=arm_linux_gnueabihf --runtimes=cpu
......
......@@ -175,7 +175,7 @@ After that you can rebuild the engine.
.. code-block:: bash
RUNTIME=GPU RUNMODE=code bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU RUNMODE=code QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
``RUNMODE=code`` means you compile and link model library with MACE engine.
......
......@@ -45,7 +45,7 @@ Here we use the mobilenet-v2 model as an example.
cd path/to/mace
# Build library
# output lib path: build/lib
bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-static]
bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-quantize][-static]
.. note::
......
......@@ -20,7 +20,7 @@ Please make sure you have CMake installed.
.. code-block:: sh
RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
which generate libraries in ``build/cmake-build/armeabi-v7a``, you can use either static libraries or the ``libmace.so`` shared library.
......
......@@ -20,7 +20,7 @@
.. code-block:: sh
RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh
编译安装位置为 ``build/cmake-build/armeabi-v7a``, 可以使用 libmace 静态库或者动态库。
......
......@@ -37,7 +37,7 @@
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif // MACE_OS_MAC
#include "CL/cl2.hpp"
#include <CL/cl2.hpp>
#ifdef MACE_OS_MAC
#pragma GCC diagnostic pop
......
......@@ -144,7 +144,7 @@ inline void vst3q(float *ptr, float32x4x3_t v) {
}
inline float32x8_t vld1o(float *ptr) {
return {vld1q_f32(ptr), vld1q_f32(ptr + 4)};
return {{vld1q_f32(ptr), vld1q_f32(ptr + 4)}};
}
inline void vst1o(float *ptr, float32x8_t v) {
......@@ -209,8 +209,8 @@ inline float32x4_t vld1q(const BFloat16 *ptr) {
// load of 2 4D vectors and perform de-interleaving
inline float32x4x2_t vld2q_bf16(const uint16_t *ptr) {
uint16x4x2_t u = vld2_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16))}};
}
inline float32x4x2_t vld2q_bf16(const BFloat16 *ptr) {
......@@ -228,9 +228,9 @@ inline float32x4x2_t vld2q(const BFloat16 *ptr) {
// load of 3 4D vectors and perform de-interleaving
inline float32x4x3_t vld3q_bf16(const uint16_t *ptr) {
uint16x4x3_t u = vld3_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(u.val[0], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[1], 16)),
vreinterpretq_f32_u32(vshll_n_u16(u.val[2], 16))}};
}
inline float32x4x3_t vld3q_bf16(const BFloat16 *ptr) {
......@@ -264,8 +264,8 @@ inline void vst1q(BFloat16 *ptr, const float32x4_t v) {
// store of 2 4D vectors and perform interleaving
inline void vst2q_bf16(uint16_t *ptr, const float32x4x2_t v) {
uint16x4x2_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)};
uint16x4x2_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[1]), 16)}};
vst2_u16(ptr, u);
}
......@@ -283,9 +283,9 @@ inline void vst2q(BFloat16 *ptr, const float32x4x2_t v) {
// store of 3 4D vectors and perform interleaving
inline void vst3q_bf16(uint16_t *ptr, const float32x4x3_t v) {
uint16x4x3_t u = {vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
uint16x4x3_t u = {{vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16),
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)};
vshrn_n_u32(vreinterpretq_u32_f32(v.val[0]), 16)}};
vst3_u16(ptr, u);
}
......@@ -304,8 +304,8 @@ inline void vst3q(BFloat16 *ptr, const float32x4x3_t v) {
// load of 8D vector
inline float32x8_t vld1o_bf16(const uint16_t *ptr) {
uint16x8_t u = vld1q_u16(ptr);
return {vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)),
vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))};
return {{vreinterpretq_f32_u32(vshll_n_u16(vget_low_u16(u), 16)),
vreinterpretq_f32_u32(vshll_n_u16(vget_high_u16(u), 16))}};
}
inline float32x8_t vld1o_bf16(const BFloat16 *ptr) {
......
......@@ -492,6 +492,7 @@ TEST_F(DepthwiseConv2dOpTest, Quant) {
TestQuant(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2});
}
#ifdef MACE_ENABLE_BFLOAT16
namespace {
void TestBFloat16(const index_t batch,
const index_t multiplier,
......@@ -557,6 +558,8 @@ TEST_F(DepthwiseConv2dOpTest, BFloat16) {
TestBFloat16(3, 1, 128, 56, 56, 3, 3, SAME, {2, 2});
}
#endif // MACE_ENABLE_BFLOAT16
} // namespace test
} // namespace ops
} // namespace mace
......@@ -12,9 +12,12 @@ genrule(
cmd = "workdir=$$(mktemp -d -t opencl-clhpp-build.XXXXXXXXXX); cp -aL $$(dirname $(location CMakeLists.txt))/* $$workdir; pushd $$workdir; mkdir build; pushd build; cmake ../ -DBUILD_DOCS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_TESTS=OFF; make generate_clhpp generate_cl2hpp; popd; popd; cp -a $$workdir/build/* $(@D); rm -rf $$workdir; echo installing to $(@D)",
)
# The `srcs` is not used in c++ Code, but we need it to trigger the `genrule`,
# So we add the "include/CL/cl.hpp", "include/CL/cl2.hpp" into `srcs`, these
# two files is imported by the `includes` instead of `srcs`.
cc_library(
name = "opencl_clhpp",
hdrs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"],
strip_include_prefix = "include",
includes = ["include"],
srcs = ["include/CL/cl.hpp", "include/CL/cl2.hpp"],
visibility = ["//visibility:public"],
)
......@@ -10,6 +10,7 @@ build --copt=-D_GLIBCXX_USE_C99_MATH_TR1
build --copt=-DMACE_OBFUSCATE_LITERALS
build --copt=-DGEMMLOWP_USE_MACE_THREAD_POOL
build --copt=-DMACE_DEPTHWISE_U8_USE_MULTI_THREAD
build --copt=-O2
# Usage example: bazel build --config android
build:android --define linux_base=true
......@@ -20,6 +21,7 @@ build:android --linkopt=-lm
build:android --distinct_host_configuration=true
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:android --copt=-Oz
build:android --copt -Wall
build:android --copt -Wno-mismatched-tags
build:android --copt -Wno-missing-braces
......@@ -75,20 +77,27 @@ build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config optimization
build:optimization -c opt
build:optimization --copt=-O3
build:optimization --linkopt=-Wl,--strip-all
build:optimization --copt=-ffunction-sections
build:optimization --copt=-fdata-sections
build:optimization --linkopt=-Wl,--gc-sections
build:optimization --copt=-fno-rtti
build:optimization --copt=-fno-exceptions
build:optimization --copt=-DGOOGLE_PROTOBUF_NO_RTTI
build:optimization --copt=-DPROTOBUF_USE_EXCEPTIONS=0
# Usage example: bazel build --config optimization_darwin
build:optimization_darwin --copt=-O3
build:optimization_darwin --copt=-ffunction-sections
build:optimization_darwin --copt=-fdata-sections
build:optimization_darwin --linkopt=-Wl,-dead_strip
build:optimization_darwin --copt=-fno-rtti
build:optimization_darwin --copt=-fno-exceptions
build:optimization_darwin --copt=-DGOOGLE_PROTOBUF_NO_RTTI
build:optimization_darwin --copt=-DPROTOBUF_USE_EXCEPTIONS=0
# Usage example: bazel build --config symbol_hidden
build:symbol_hidden --copt=-fvisibility=hidden
build:symbol_hidden --copt=-fvisibility-inlines-hidden
# Usage example: bazel build --config debug
build:debug -c dbg
......
......@@ -40,8 +40,8 @@ enable_cpu=true
enable_gpu=false
enable_dsp=false
enable_apu=false
enable_quantize=true
enable_bfloat16=true
enable_quantize=false
enable_bfloat16=false
enable_rpcmem=true
static_lib=false
symbol_hidden=
......@@ -97,6 +97,12 @@ for opt in "${@}";do
static|-static|--static)
static_lib=true
;;
quantize|-quantize|--quantize)
enable_quantize=true
;;
bfloat16|-bfloat16|--bfloat16)
enable_bfloat16=true
;;
help|-help|--help)
helper
;;
......
......@@ -17,14 +17,24 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_AARCH64_LINUX_GNU} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/aarch64-linux-gnu.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -17,19 +17,29 @@ if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_ARM_LINUX_GNUEABIHF} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/arm-linux-gnueabihf.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j$(nproc) VERBOSE=1 && make install
......
......@@ -26,6 +26,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="arm64-v8a" \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \
......@@ -33,12 +43,12 @@ cmake -DANDROID_ABI="arm64-v8a" \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_STL=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -27,6 +27,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_QUANTIZE=OFF
if [[ "$QUANTIZE" == "ON" ]]; then
MACE_ENABLE_QUANTIZE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="armeabi-v7a" \
-DANDROID_ARM_NEON=ON \
......@@ -35,12 +45,12 @@ cmake -DANDROID_ABI="armeabi-v7a" \
-DCMAKE_BUILD_TYPE=Release \
-DANDROID_STL=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_QUANTIZE=${MACE_ENABLE_QUANTIZE} \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
......
......@@ -13,12 +13,16 @@ if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
DMACE_ENABLE_BFLOAT16=OFF
if [[ "$BFLOAT16" == "ON" ]]; then
DMACE_ENABLE_BFLOAT16=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DMACE_ENABLE_NEON=OFF \
-DMACE_ENABLE_QUANTIZE=OFF \
-DMACE_ENABLE_OPENCL=OFF \
-DMACE_ENABLE_BFLOAT16=ON \
-DMACE_ENABLE_BFLOAT16=${DMACE_ENABLE_BFLOAT16} \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册