From b16ae4e2bee9a603ec873b93b77cd80088f3901f Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 29 May 2019 10:59:05 +0800 Subject: [PATCH] [lite] fix fc bias and enable armv7 fc (#17695) --- cmake/cross_compiling/android.cmake | 3 ++- paddle/fluid/lite/arm/math/funcs.cc | 13 ++++++------- paddle/fluid/lite/arm/math/packed_sgemm.cc | 4 ++-- paddle/fluid/lite/core/cpu_info.cc | 2 +- paddle/fluid/lite/core/cpu_info.h | 6 +++--- .../lite/core/mir/runtime_context_assign_pass.cc | 2 +- paddle/fluid/lite/kernels/arm/CMakeLists.txt | 2 +- paddle/fluid/lite/kernels/arm/fc_compute_test.cc | 8 +++----- 8 files changed, 19 insertions(+), 21 deletions(-) diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake index a12ecdccc1e..dedfad8c7ab 100644 --- a/cmake/cross_compiling/android.cmake +++ b/cmake/cross_compiling/android.cmake @@ -45,7 +45,8 @@ if (NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST) endif() if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a") - message(STATUS "NEON is enabled on arm-v7a") + set(CMAKE_ANDROID_ARM_NEON ON) + message(STATUS "NEON is enabled on arm-v7a with softfp") endif() set(ANDROID_STL_TYPE_LITS "gnustl_static" "c++_static") diff --git a/paddle/fluid/lite/arm/math/funcs.cc b/paddle/fluid/lite/arm/math/funcs.cc index ff1bf5b09a9..4013ac31bfd 100644 --- a/paddle/fluid/lite/arm/math/funcs.cc +++ b/paddle/fluid/lite/arm/math/funcs.cc @@ -21,14 +21,14 @@ namespace arm { namespace math { template <> -void fill_bias_fc(float *tensor, const float *bias, const int num, +void fill_bias_fc(float *out, const float *bias, const int num, const int channel) { int cnt = channel >> 4; int remain = channel & 15; for (int j = 0; j < num; ++j) { const float *ptr_bias = bias; - float *ptr_out = tensor + j * channel; + float *ptr_out = out + j * channel; float32x4_t vout1; float32x4_t vout2; @@ -61,7 +61,6 @@ void fill_bias_fc(float *tensor, const float *bias, const int num, ptr_out += 16; ptr_bias += 16; } - #if 0 if (cnt > 0) { asm( @@ -79,21 +78,21 @@ void fill_bias_fc(float *tensor, const float *bias, const int num, ); } #endif - for (; remain > 0; remain--) { + for (int i = 0; i < remain; ++i) { *(ptr_out++) += *(ptr_bias++); } } } template <> -void fill_bias_fc(int *tensor, const int *bias, const int num, +void fill_bias_fc(int *out, const int *bias, const int num, const int channel) { int cnt = channel >> 4; int remain = channel & 15; for (int j = 0; j < num; ++j) { const int *ptr_bias = bias; - int *ptr_out = tensor + j * channel; + int *ptr_out = out + j * channel; int32x4_t vout1; int32x4_t vout2; @@ -144,7 +143,7 @@ void fill_bias_fc(int *tensor, const int *bias, const int num, ); } #endif - for (; remain > 0; remain--) { + for (int i = 0; i < remain; ++i) { *(ptr_out++) += *(ptr_bias++); } } diff --git a/paddle/fluid/lite/arm/math/packed_sgemm.cc b/paddle/fluid/lite/arm/math/packed_sgemm.cc index c3a53227390..1028d371d3c 100644 --- a/paddle/fluid/lite/arm/math/packed_sgemm.cc +++ b/paddle/fluid/lite/arm/math/packed_sgemm.cc @@ -2751,7 +2751,7 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias, bool transB, ARMContext* ctx) { size_t l2_cache = ctx->l2_cache_size() > 0 ? ctx->l2_cache_size() : 512 * 1024; - void* workspace = ctx->get_work_space(); + auto* workspace = ctx->workspace_data(); int threads = ctx->threads(); //! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2 int x_block = @@ -2785,7 +2785,7 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias, flag_p_remain = true; } //! load bpanel - float* b_pannel = static_cast(workspace); + float* b_pannel = workspace; if (transB) { loadb_trans(b_pannel, B, K, 0, K, x0, xmax); } else { diff --git a/paddle/fluid/lite/core/cpu_info.cc b/paddle/fluid/lite/core/cpu_info.cc index 0336c2d7ac4..8c2c427777d 100644 --- a/paddle/fluid/lite/core/cpu_info.cc +++ b/paddle/fluid/lite/core/cpu_info.cc @@ -20,7 +20,7 @@ namespace lite { #ifdef LITE_WITH_ARM -void DeviceInfo::get_info(DeviceInfo* dev) { +void DeviceInfo::InitInternal(DeviceInfo* dev) { set_default_cache(dev); dev->compute_core_num_ = arm_get_cpucount(); dev->max_memory_ = arm_get_meminfo(); diff --git a/paddle/fluid/lite/core/cpu_info.h b/paddle/fluid/lite/core/cpu_info.h index 23a996f80e0..bf603024bc2 100644 --- a/paddle/fluid/lite/core/cpu_info.h +++ b/paddle/fluid/lite/core/cpu_info.h @@ -85,14 +85,14 @@ class DeviceInfo { return *x; } - static void init_info() { + static void Init() { auto& info = Global(); - get_info(&info); + InitInternal(&info); } private: DeviceInfo() = default; - static void get_info(DeviceInfo* dev); + static void InitInternal(DeviceInfo* dev); }; size_t arm_get_meminfo(); diff --git a/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc b/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc index 1852fc2fcbe..ecca00e33c5 100644 --- a/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc +++ b/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc @@ -64,7 +64,7 @@ class RuntimeContextAssignPass : public StmtPass { #ifdef LITE_WITH_ARM std::unique_ptr NewARMContext() { - DeviceInfo::init_info(); + DeviceInfo::Init(); std::unique_ptr ctx(new KernelContext); ctx->As(); return ctx; diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 75dc9fe43ad..ebdd42443e0 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -9,7 +9,7 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps}) cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3) cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3) -lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm eigen3) +lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm) set(arm_kernels fc_compute_arm diff --git a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc index 1949a3a1eb1..2e85fccf7d6 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc @@ -61,8 +61,6 @@ TEST(fc_arm, compare_test) { b_data[i] = static_cast(i); } - // TODO(TJ): enable bias soon - b_data = nullptr; lite::arm::math::fc_compute_eigen(x_data, batch_size, 3, // w_data, 3, 4, // b_data, ref_data); @@ -74,11 +72,11 @@ TEST(fc_arm, compare_test) { param.in_num_col_dims = 1; param.input = &x; param.w = &w; - param.bias = nullptr; + param.bias = &b; param.output = &out; param.in_mat_dims = x.dims(); - DeviceInfo::init_info(); + DeviceInfo::Init(); std::unique_ptr ctx(new KernelContext); ctx->As(); fc.SetParam(param); @@ -136,7 +134,7 @@ TEST(fc_arm, num_col_dims) { std::unique_ptr ctx(new KernelContext); ctx->As(); - DeviceInfo::init_info(); + DeviceInfo::Init(); fc.SetParam(param); fc.SetContext(std::move(ctx)); -- GitLab