提交 17d2a04a 编写于 作者: H HappyAngel 提交者: GitHub

[arm] fix clang v7 bug (#3118)

* set arm_lang default is off. test=develop

* fix resize error, test-develop
上级 9ebaaa1b
......@@ -76,6 +76,7 @@ lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# cv build options
lite_option(LITE_WITH_CV "Enable build cv image in lite" OFF)
lite_option(LITE_WITH_STATIC_CUDA "Statically link cuda libraries." ON)
lite_option(LITE_WITH_ARM_CLANG "when arm lang is clang, its ON." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX)
......
......@@ -12,6 +12,7 @@ message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}")
message(STATUS "LITE_WITH_ARM_LANG:\t${LITE_WITH_ARM_LANG}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK})
......
......@@ -508,6 +508,8 @@ void act_switch_3x3s1(const float* inr0,
"x5",
"x6",
"x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else
asm volatile(COMPUTE RELU STORE
: [r0] "+r"(inr0),
......@@ -541,6 +543,7 @@ void act_switch_3x3s1(const float* inr0,
"r3",
"r4",
"r5");
#endif
#endif
break;
case lite_api::ActivationType::kRelu6:
......@@ -593,6 +596,8 @@ void act_switch_3x3s1(const float* inr0,
"x5",
"x6",
"x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else
asm volatile(COMPUTE RELU RELU6 STORE
: [r0] "+r"(inr0),
......@@ -626,6 +631,7 @@ void act_switch_3x3s1(const float* inr0,
"r3",
"r4",
"r5");
#endif
#endif
break;
case lite_api::ActivationType::kLeakyRelu:
......@@ -678,6 +684,8 @@ void act_switch_3x3s1(const float* inr0,
"x5",
"x6",
"x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else
asm volatile(COMPUTE LEAKY_RELU STORE
: [r0] "+r"(inr0),
......@@ -711,6 +719,7 @@ void act_switch_3x3s1(const float* inr0,
"r3",
"r4",
"r5");
#endif
#endif
break;
default:
......@@ -768,6 +777,8 @@ void act_switch_3x3s1(const float* inr0,
"x5",
"x6",
"x7");
#else
#ifdef LITE_WITH_ARM_CLANG
#else
asm volatile(COMPUTE STORE
: [r0] "+r"(inr0),
......@@ -801,6 +812,7 @@ void act_switch_3x3s1(const float* inr0,
"r3",
"r4",
"r5");
#endif
#endif
}
}
......@@ -988,6 +1000,8 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
w8,
vbias,
act_param);
#else
#ifdef LITE_WITH_ARM_CLANG
#else
act_switch_3x3s1(inr0,
inr1,
......@@ -1008,6 +1022,7 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
vbias,
vbias,
act_param);
#endif
#endif
outl[0] += 4;
outl[1] += 4;
......
......@@ -629,6 +629,7 @@ void conv_depthwise_3x3_fp32(const void* din,
act_param,
ctx);
} else {
#ifdef __aarch64__
conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
reinterpret_cast<float*>(dout),
num,
......@@ -643,6 +644,27 @@ void conv_depthwise_3x3_fp32(const void* din,
param,
act_param,
ctx);
#else
#ifdef LITE_WITH_ARM_CLANG
LOG(FATAL) << "fp32 depthwise conv3x3s1px doesnot support in v7-clang, "
"this can run in basic";
#else
conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
reinterpret_cast<float*>(dout),
num,
ch_out,
h_out,
w_out,
ch_in,
h_in,
w_in,
reinterpret_cast<const float*>(weights),
bias,
param,
act_param,
ctx);
#endif
#endif
}
} else if (stride == 2) {
if (pads_less && pad_h == pad_w && (pad < 2)) { // support pad = [0, 1]
......
......@@ -60,6 +60,10 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
bool flag_dw_5x5 = (kw == 5) && (kh == 5) && (stride == 1 || stride == 2);
bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
#ifdef LITE_WITH_ARM_CLANG // clang
flag_dw_3x3 =
(stride == 1 && (paddings[0] > 1 || paddings[2] > 1)) ? false : true;
#endif
/// select conv impl
if (param.groups == ic && ic == oc && ks_equal && no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
......
......@@ -559,7 +559,7 @@ void test_img(const std::vector<int>& cluster_id,
}
}
#if 0
#if 1
TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
......@@ -573,12 +573,12 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
for (auto layout : {1}) {
if ((srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) &&
(dstFormat == ImageFormat::GRAY)) {
(dstFormat == ImageFormat::GRAY)) {
continue;
}
if ((dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21) &&
(srcFormat == ImageFormat::GRAY)) {
(srcFormat == ImageFormat::GRAY)) {
continue;
}
if (srcFormat == ImageFormat::NV12 ||
......@@ -611,7 +611,7 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
}
}
#endif
#if 0
#if 1
TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
......@@ -624,7 +624,7 @@ TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
for (auto dstFormat : {0, 1, 2, 3, 4, 11}) {
for (auto layout : {1}) {
if (dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21 ||
dstFormat == ImageFormat::NV21 ||
(srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) &&
dstFormat == ImageFormat::GRAY) {
......
......@@ -21,6 +21,7 @@ OPTMODEL_DIR=""
BUILD_TAILOR=OFF
BUILD_CV=OFF
SHUTDOWN_LOG=ON
LITE_WITH_ARM_LANG=OFF
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
......@@ -37,6 +38,14 @@ fi
function prepare_workspace {
local root_dir=$1
local build_dir=$2
# ARM LANG
if [ ${ARM_LANG} == "clang" ]; then
LITE_WITH_ARM_LANG=ON
else
LITE_WITH_ARM_LANG=OFF
fi
echo "ARM_LANG is ${ARM_LANG}"
echo "LITE_WITH_ARM_LANG is ${LITE_WITH_ARM_LANG}"
# in build directory
# 1. Prepare gen_code file
GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code
......@@ -106,7 +115,7 @@ function make_tiny_publish_so {
if [ ${os} == "armlinux" ]; then
BUILD_JAVA=OFF
fi
cmake .. \
${PYTHON_FLAGS} \
${CMAKE_COMMON_OPTIONS} \
......@@ -118,6 +127,7 @@ function make_tiny_publish_so {
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
......@@ -200,6 +210,7 @@ function make_full_publish_so {
-DANDROID_STL_TYPE=$android_stl \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DLITE_BUILD_TAILOR=$BUILD_TAILOR \
-DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
......@@ -223,13 +234,14 @@ function make_all_tests {
fi
mkdir -p $build_directory
cd $build_directory
prepare_workspace $root_dir $build_directory
cmake $root_dir \
${CMAKE_COMMON_OPTIONS} \
-DWITH_TESTING=ON \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \
-DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \
-DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
make lite_compile_deps -j$NUM_PROC
......
......@@ -147,52 +147,34 @@ void resize(const uint8_t* src,
yofs = yofs1;
ialpha = ialpha1;
}
if (sy == prev_sy1) {
memset(rowsbuf0, 0, sizeof(uint16_t) * w_out);
const uint8_t* S1 = src + srcw * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows1p = rowsbuf1;
for (int dx = 0; dx < dstw; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S1pl = S1 + sx;
const uint8_t* S1pr = S1 + sx + num;
for (int i = 0; i < num; i++) {
*rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4;
}
ialphap += 2;
}
} else {
// hresize two rows
const uint8_t* S0 = src + w_in * (sy);
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows0p = rowsbuf0;
int16_t* rows1p = rowsbuf1;
for (int dx = 0; dx < dstw; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
// hresize two rows
const uint8_t* S0 = src + w_in * (sy);
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows0p = rowsbuf0;
int16_t* rows1p = rowsbuf1;
for (int dx = 0; dx < dstw; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S0pl = S0 + sx;
const uint8_t* S0pr = S0 + sx + num;
const uint8_t* S1pl = S1 + sx;
const uint8_t* S1pr = S1 + sx + num;
for (int i = 0; i < num; i++) {
*rows0p++ = ((*S0pl++) * a0 + (*S0pr++) * a1) >> 4;
*rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4;
}
ialphap += 2;
const uint8_t* S0pl = S0 + sx;
const uint8_t* S0pr = S0 + sx + num;
const uint8_t* S1pl = S1 + sx;
const uint8_t* S1pr = S1 + sx + num;
for (int i = 0; i < num; i++) {
*rows0p++ = ((*S0pl++) * a0 + (*S0pr++) * a1) >> 4;
*rows1p++ = ((*S1pl++) * a0 + (*S1pr++) * a1) >> 4;
}
ialphap += 2;
}
prev_sy1 = sy + 1;
int16_t b0 = ibeta[0];
int16_t b1 = ibeta[1];
uint8_t* dp_ptr = dst + dy * w_out;
int16_t* rows0p = rowsbuf0;
int16_t* rows1p = rowsbuf1;
rows0p = rowsbuf0;
rows1p = rowsbuf1;
int16x8_t _b0 = vdupq_n_s16(b0);
int16x8_t _b1 = vdupq_n_s16(b1);
int re_cnt = cnt;
......@@ -281,6 +263,13 @@ void resize(const uint8_t* src,
2);
}
ibeta += 2;
delete[] rowsbuf0;
delete[] rowsbuf1;
}
if (orih < dsth) { // uv
delete[] xofs1;
delete[] yofs1;
delete[] ialpha1;
}
delete[] buf;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册