From 747a9e2a8e41af37ed677bff7b2b3d424306277b Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Mon, 12 Feb 2018 10:13:04 +0800 Subject: [PATCH] Optimize activation and fix export lib scripts --- mace/kernels/activation.h | 5 +++++ tools/export_lib.sh | 22 ++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 745f1744..0a856fc9 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -54,17 +54,20 @@ void DoActivation(const T *input_ptr, case NOOP: break; case RELU: +#pragma omp parallel for for (index_t i = 0; i < size; ++i) { output_ptr[i] = std::max(input_ptr[i], static_cast(0)); } break; case RELUX: +#pragma omp parallel for for (index_t i = 0; i < size; ++i) { output_ptr[i] = std::min(std::max(input_ptr[i], static_cast(0)), static_cast(relux_max_limit)); } break; case PRELU: +#pragma omp parallel for for (index_t i = 0; i < size; ++i) { T in = input_ptr[i]; if (in < 0) { @@ -75,12 +78,14 @@ void DoActivation(const T *input_ptr, } break; case TANH: +#pragma omp parallel for for (index_t i = 0; i < size; ++i) { T in_exp = std::exp(-2 * input_ptr[i]); output_ptr[i] = (1 - in_exp) / (1 + in_exp); } break; case SIGMOID: +#pragma omp parallel for for (index_t i = 0; i < size; ++i) { output_ptr[i] = 1 / (1 + std::exp(-input_ptr[i])); } diff --git a/tools/export_lib.sh b/tools/export_lib.sh index 6e84ed7d..00a01949 100755 --- a/tools/export_lib.sh +++ b/tools/export_lib.sh @@ -3,8 +3,8 @@ set -e Usage() { - echo "Usage: ./tools/export_lib.sh android_abi[armeabi-v7a/arm64-v8a] runtime[gpu/dsp] export_include_dir export_lib_dir" - echo "eg: ./tools/export_lib.sh armeabi-v7a ../include ../lib/libmace_v7" + echo "Usage: ./tools/export_lib.sh target_abi[armeabi-v7a | arm64-v8a | host] runtime[gpu | dsp] export_include_dir export_lib_dir" + echo "eg: ./tools/export_lib.sh armeabi-v7a gpu ../include ../lib/libmace_v7" } if [ $# -lt 4 ]; then @@ -12,9 +12,7 @@ if [ $# -lt 4 ]; then exit 1 fi -# ANDROID_ABI=arm64-v8a -# ANDROID_ABI=armeabi-v7a -ANDROID_ABI=$1 +TARGET_ABI=$1 RUNTIME=$2 EXPORT_INCLUDE_DIR=$3 EXPORT_LIB_DIR=$4 @@ -63,15 +61,18 @@ build_target() bazel build --verbose_failures -c opt --strip always $BAZEL_TARGET \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ - --cpu=$ANDROID_ABI \ + --cpu=$TARGET_ABI \ --copt="-std=c++11" \ --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ --copt="-Werror=return-type" \ --copt="-DMACE_OBFUSCATE_LITERALS" \ + --copt="-O3" \ + --define neon=true \ + --define openmp=true \ $DSP_MODE_BUILD_FLAGS || exit 1 } -build_local_target() +build_host_target() { BAZEL_TARGET=$1 bazel build --verbose_failures -c opt --strip always $BAZEL_TARGET \ @@ -79,7 +80,8 @@ build_local_target() --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ --copt="-Werror=return-type" \ --copt="-DMACE_OBFUSCATE_LITERALS" \ - --define openmp=true || exit -1 + --copt="-O3" \ + --define openmp=true || exit 1 } merge_libs() @@ -132,10 +134,10 @@ bash mace/tools/git/gen_version_source.sh ${CODEGEN_DIR}/version/version.cc || e echo "Step 3: Build libmace targets" bazel clean -if [ x"${RUNTIME}" = x"local" ]; then +if [ x"${TARGET_ABI}" = x"host" ] || [ x"${TARGET_ABI}" = x"local" ]; then for target in ${all_targets[*]} do - build_local_target ${target} + build_host_target ${target} done else for target in ${all_targets[*]} -- GitLab