提交 17d2a04a 编写于 作者: H HappyAngel 提交者: GitHub

[arm] fix clang v7 bug (#3118)

* set arm_lang default is off. test=develop

* fix resize error, test-develop
上级 9ebaaa1b
...@@ -76,6 +76,7 @@ lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF) ...@@ -76,6 +76,7 @@ lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# cv build options # cv build options
lite_option(LITE_WITH_CV "Enable build cv image in lite" OFF) lite_option(LITE_WITH_CV "Enable build cv image in lite" OFF)
lite_option(LITE_WITH_STATIC_CUDA "Statically link cuda libraries." ON) lite_option(LITE_WITH_STATIC_CUDA "Statically link cuda libraries." ON)
lite_option(LITE_WITH_ARM_CLANG "when arm lang is clang, its ON." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX) if(ANDROID OR IOS OR ARMLINUX)
......
...@@ -12,6 +12,7 @@ message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}") ...@@ -12,6 +12,7 @@ message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}") message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}") message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}") message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}")
message(STATUS "LITE_WITH_ARM_LANG:\t${LITE_WITH_ARM_LANG}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}) set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK})
......
...@@ -508,6 +508,8 @@ void act_switch_3x3s1(const float* inr0, ...@@ -508,6 +508,8 @@ void act_switch_3x3s1(const float* inr0,
"x5", "x5",
"x6", "x6",
"x7"); "x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else #else
asm volatile(COMPUTE RELU STORE asm volatile(COMPUTE RELU STORE
: [r0] "+r"(inr0), : [r0] "+r"(inr0),
...@@ -541,6 +543,7 @@ void act_switch_3x3s1(const float* inr0, ...@@ -541,6 +543,7 @@ void act_switch_3x3s1(const float* inr0,
"r3", "r3",
"r4", "r4",
"r5"); "r5");
#endif
#endif #endif
break; break;
case lite_api::ActivationType::kRelu6: case lite_api::ActivationType::kRelu6:
...@@ -593,6 +596,8 @@ void act_switch_3x3s1(const float* inr0, ...@@ -593,6 +596,8 @@ void act_switch_3x3s1(const float* inr0,
"x5", "x5",
"x6", "x6",
"x7"); "x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else #else
asm volatile(COMPUTE RELU RELU6 STORE asm volatile(COMPUTE RELU RELU6 STORE
: [r0] "+r"(inr0), : [r0] "+r"(inr0),
...@@ -626,6 +631,7 @@ void act_switch_3x3s1(const float* inr0, ...@@ -626,6 +631,7 @@ void act_switch_3x3s1(const float* inr0,
"r3", "r3",
"r4", "r4",
"r5"); "r5");
#endif
#endif #endif
break; break;
case lite_api::ActivationType::kLeakyRelu: case lite_api::ActivationType::kLeakyRelu:
...@@ -678,6 +684,8 @@ void act_switch_3x3s1(const float* inr0, ...@@ -678,6 +684,8 @@ void act_switch_3x3s1(const float* inr0,
"x5", "x5",
"x6", "x6",
"x7"); "x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else #else
asm volatile(COMPUTE LEAKY_RELU STORE asm volatile(COMPUTE LEAKY_RELU STORE
: [r0] "+r"(inr0), : [r0] "+r"(inr0),
...@@ -711,6 +719,7 @@ void act_switch_3x3s1(const float* inr0, ...@@ -711,6 +719,7 @@ void act_switch_3x3s1(const float* inr0,
"r3", "r3",
"r4", "r4",
"r5"); "r5");
#endif
#endif #endif
break; break;
default: default:
...@@ -768,6 +777,8 @@ void act_switch_3x3s1(const float* inr0, ...@@ -768,6 +777,8 @@ void act_switch_3x3s1(const float* inr0,
"x5", "x5",
"x6", "x6",
"x7"); "x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else #else
asm volatile(COMPUTE STORE asm volatile(COMPUTE STORE
: [r0] "+r"(inr0), : [r0] "+r"(inr0),
...@@ -801,6 +812,7 @@ void act_switch_3x3s1(const float* inr0, ...@@ -801,6 +812,7 @@ void act_switch_3x3s1(const float* inr0,
"r3", "r3",
"r4", "r4",
"r5"); "r5");
#endif
#endif #endif
} }
} }
...@@ -988,6 +1000,8 @@ void conv_3x3s1_depthwise_fp32(const float* i_data, ...@@ -988,6 +1000,8 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
w8, w8,
vbias, vbias,
act_param); act_param);
#else
#ifdef LITE_WITH_ARM_CLANG
#else #else
act_switch_3x3s1(inr0, act_switch_3x3s1(inr0,
inr1, inr1,
...@@ -1008,6 +1022,7 @@ void conv_3x3s1_depthwise_fp32(const float* i_data, ...@@ -1008,6 +1022,7 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
vbias, vbias,
vbias, vbias,
act_param); act_param);
#endif
#endif #endif
outl[0] += 4; outl[0] += 4;
outl[1] += 4; outl[1] += 4;
......
...@@ -629,6 +629,7 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -629,6 +629,7 @@ void conv_depthwise_3x3_fp32(const void* din,
act_param, act_param,
ctx); ctx);
} else { } else {
#ifdef __aarch64__
conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din), conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
reinterpret_cast<float*>(dout), reinterpret_cast<float*>(dout),
num, num,
...@@ -643,6 +644,27 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -643,6 +644,27 @@ void conv_depthwise_3x3_fp32(const void* din,
param, param,
act_param, act_param,
ctx); ctx);
#else
#ifdef LITE_WITH_ARM_CLANG
LOG(FATAL) << "fp32 depthwise conv3x3s1px doesnot support in v7-clang, "
"this can run in basic";
#else
conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
reinterpret_cast<float*>(dout),
num,
ch_out,
h_out,
w_out,
ch_in,
h_in,
w_in,
reinterpret_cast<const float*>(weights),
bias,
param,
act_param,
ctx);
#endif
#endif
} }
} else if (stride == 2) { } else if (stride == 2) {
if (pads_less && pad_h == pad_w && (pad < 2)) { // support pad = [0, 1] if (pads_less && pad_h == pad_w && (pad < 2)) { // support pad = [0, 1]
......
...@@ -60,6 +60,10 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -60,6 +60,10 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
bool flag_dw_5x5 = (kw == 5) && (kh == 5) && (stride == 1 || stride == 2); bool flag_dw_5x5 = (kw == 5) && (kh == 5) && (stride == 1 || stride == 2);
bool flag_dw = flag_dw_3x3 || flag_dw_5x5; bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
#ifdef LITE_WITH_ARM_CLANG // clang
flag_dw_3x3 =
(stride == 1 && (paddings[0] > 1 || paddings[2] > 1)) ? false : true;
#endif
/// select conv impl /// select conv impl
if (param.groups == ic && ic == oc && ks_equal && no_dilation && flag_dw) { if (param.groups == ic && ic == oc && ks_equal && no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>; impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
......
...@@ -559,7 +559,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -559,7 +559,7 @@ void test_img(const std::vector<int>& cluster_id,
} }
} }
#if 0 #if 1
TEST(TestImageConvertRand, test_func_image_convert_preprocess) { TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
...@@ -573,12 +573,12 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) { ...@@ -573,12 +573,12 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
for (auto layout : {1}) { for (auto layout : {1}) {
if ((srcFormat == ImageFormat::NV12 || if ((srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) && srcFormat == ImageFormat::NV21) &&
(dstFormat == ImageFormat::GRAY)) { (dstFormat == ImageFormat::GRAY)) {
continue; continue;
} }
if ((dstFormat == ImageFormat::NV12 || if ((dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21) && dstFormat == ImageFormat::NV21) &&
(srcFormat == ImageFormat::GRAY)) { (srcFormat == ImageFormat::GRAY)) {
continue; continue;
} }
if (srcFormat == ImageFormat::NV12 || if (srcFormat == ImageFormat::NV12 ||
...@@ -611,7 +611,7 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) { ...@@ -611,7 +611,7 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
} }
} }
#endif #endif
#if 0 #if 1
TEST(TestImageConvertRand, test_func_image_resize_preprocess) { TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
...@@ -624,7 +624,7 @@ TEST(TestImageConvertRand, test_func_image_resize_preprocess) { ...@@ -624,7 +624,7 @@ TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
for (auto dstFormat : {0, 1, 2, 3, 4, 11}) { for (auto dstFormat : {0, 1, 2, 3, 4, 11}) {
for (auto layout : {1}) { for (auto layout : {1}) {
if (dstFormat == ImageFormat::NV12 || if (dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21 || dstFormat == ImageFormat::NV21 ||
(srcFormat == ImageFormat::NV12 || (srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) && srcFormat == ImageFormat::NV21) &&
dstFormat == ImageFormat::GRAY) { dstFormat == ImageFormat::GRAY) {
......
...@@ -21,6 +21,7 @@ OPTMODEL_DIR="" ...@@ -21,6 +21,7 @@ OPTMODEL_DIR=""
BUILD_TAILOR=OFF BUILD_TAILOR=OFF
BUILD_CV=OFF BUILD_CV=OFF
SHUTDOWN_LOG=ON SHUTDOWN_LOG=ON
LITE_WITH_ARM_LANG=OFF
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
...@@ -37,6 +38,14 @@ fi ...@@ -37,6 +38,14 @@ fi
function prepare_workspace { function prepare_workspace {
local root_dir=$1 local root_dir=$1
local build_dir=$2 local build_dir=$2
# ARM LANG
if [ ${ARM_LANG} == "clang" ]; then
LITE_WITH_ARM_LANG=ON
else
LITE_WITH_ARM_LANG=OFF
fi
echo "ARM_LANG is ${ARM_LANG}"
echo "LITE_WITH_ARM_LANG is ${LITE_WITH_ARM_LANG}"
# in build directory # in build directory
# 1. Prepare gen_code file # 1. Prepare gen_code file
GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code
...@@ -106,7 +115,7 @@ function make_tiny_publish_so { ...@@ -106,7 +115,7 @@ function make_tiny_publish_so {
if [ ${os} == "armlinux" ]; then if [ ${os} == "armlinux" ]; then
BUILD_JAVA=OFF BUILD_JAVA=OFF
fi fi
cmake .. \ cmake .. \
${PYTHON_FLAGS} \ ${PYTHON_FLAGS} \
${CMAKE_COMMON_OPTIONS} \ ${CMAKE_COMMON_OPTIONS} \
...@@ -118,6 +127,7 @@ function make_tiny_publish_so { ...@@ -118,6 +127,7 @@ function make_tiny_publish_so {
-DANDROID_STL_TYPE=$android_stl \ -DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \ -DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \ -DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
...@@ -200,6 +210,7 @@ function make_full_publish_so { ...@@ -200,6 +210,7 @@ function make_full_publish_so {
-DANDROID_STL_TYPE=$android_stl \ -DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \ -DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \ -DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
...@@ -223,13 +234,14 @@ function make_all_tests { ...@@ -223,13 +234,14 @@ function make_all_tests {
fi fi
mkdir -p $build_directory mkdir -p $build_directory
cd $build_directory cd $build_directory
prepare_workspace $root_dir $build_directory prepare_workspace $root_dir $build_directory
cmake $root_dir \ cmake $root_dir \
${CMAKE_COMMON_OPTIONS} \ ${CMAKE_COMMON_OPTIONS} \
-DWITH_TESTING=ON \ -DWITH_TESTING=ON \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \ -DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make lite_compile_deps -j$NUM_PROC make lite_compile_deps -j$NUM_PROC
......
...@@ -147,52 +147,34 @@ void resize(const uint8_t* src, ...@@ -147,52 +147,34 @@ void resize(const uint8_t* src,
yofs = yofs1; yofs = yofs1;
ialpha = ialpha1; ialpha = ialpha1;
} }
if (sy == prev_sy1) {
memset(rowsbuf0, 0, sizeof(uint16_t) * w_out);
const uint8_t* S1 = src + srcw * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows1p = rowsbuf1;
for (int dx = 0; dx < dstw; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S1pl = S1 + sx; // hresize two rows
const uint8_t* S1pr = S1 + sx + num; const uint8_t* S0 = src + w_in * (sy);
for (int i = 0; i < num; i++) { const uint8_t* S1 = src + w_in * (sy + 1);
*rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4; const int16_t* ialphap = ialpha;
} int16_t* rows0p = rowsbuf0;
ialphap += 2; int16_t* rows1p = rowsbuf1;
} for (int dx = 0; dx < dstw; dx++) {
} else { int sx = xofs[dx];
// hresize two rows int16_t a0 = ialphap[0];
const uint8_t* S0 = src + w_in * (sy); int16_t a1 = ialphap[1];
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows0p = rowsbuf0;
int16_t* rows1p = rowsbuf1;
for (int dx = 0; dx < dstw; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S0pl = S0 + sx; const uint8_t* S0pl = S0 + sx;
const uint8_t* S0pr = S0 + sx + num; const uint8_t* S0pr = S0 + sx + num;
const uint8_t* S1pl = S1 + sx; const uint8_t* S1pl = S1 + sx;
const uint8_t* S1pr = S1 + sx + num; const uint8_t* S1pr = S1 + sx + num;
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
*rows0p++ = ((*S0pl++) * a0 + (*S0pr++) * a1) >> 4; *rows0p++ = ((*S0pl++) * a0 + (*S0pr++) * a1) >> 4;
*rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4; *rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4;
}
ialphap += 2;
} }
ialphap += 2;
} }
prev_sy1 = sy + 1;
int16_t b0 = ibeta[0]; int16_t b0 = ibeta[0];
int16_t b1 = ibeta[1]; int16_t b1 = ibeta[1];
uint8_t* dp_ptr = dst + dy * w_out; uint8_t* dp_ptr = dst + dy * w_out;
int16_t* rows0p = rowsbuf0; rows0p = rowsbuf0;
int16_t* rows1p = rowsbuf1; rows1p = rowsbuf1;
int16x8_t _b0 = vdupq_n_s16(b0); int16x8_t _b0 = vdupq_n_s16(b0);
int16x8_t _b1 = vdupq_n_s16(b1); int16x8_t _b1 = vdupq_n_s16(b1);
int re_cnt = cnt; int re_cnt = cnt;
...@@ -281,6 +263,13 @@ void resize(const uint8_t* src, ...@@ -281,6 +263,13 @@ void resize(const uint8_t* src,
2); 2);
} }
ibeta += 2; ibeta += 2;
delete[] rowsbuf0;
delete[] rowsbuf1;
}
if (orih < dsth) { // uv
delete[] xofs1;
delete[] yofs1;
delete[] ialpha1;
} }
delete[] buf; delete[] buf;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册