diff --git a/paddle/fluid/lite/arm/math/CMakeLists.txt b/paddle/fluid/lite/arm/math/CMakeLists.txt index 78cf685c8e498718b8d68ef37b1e7262d6663671..1dc9ab46073525bdde912f938320d1a24d8db3f2 100644 --- a/paddle/fluid/lite/arm/math/CMakeLists.txt +++ b/paddle/fluid/lite/arm/math/CMakeLists.txt @@ -78,5 +78,5 @@ if (NOT HAS_ARM_MATH_LIB_DIR) gemv_arm_int8.cc conv3x3s1_direct_int8.cc conv3x3s2_direct_int8.cc - DEPS ${lite_kernel_deps} eigen3 framework_proto_lite) + DEPS ${lite_kernel_deps} framework_proto_lite) endif() diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 9fe7cfd5704a2af992fcf494b58ae113826509b2..ac30772cd0456fb717ddffd6cea759b2c2ac4234 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -29,7 +29,12 @@ endif() lite_cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) lite_cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) lite_cc_library(cpu_info_lite SRCS cpu_info.cc) + +if (LITE_WITH_ARM) +lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite CL_DEPS cl_helper) +else() lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3 CL_DEPS cl_helper) +endif() lite_cc_library(kernel_lite SRCS kernel.cc DEPS context_lite type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite}) lite_cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite cpp_op_desc_lite ${tensor_lite}) diff --git a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc index 8eec578c27188fd175ff3562082df144458c85fa..65ccde50dbed8a893ae54ea125244803c10a073f 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute_test.cc @@ -28,6 +28,35 @@ namespace lite { namespace kernels { namespace arm { +#define A(i, j) a[i * lda + j] +#define B(i, j) b[i * ldb + j] +#define C(i, j) c[i * ldc + j] + +template +void gemm_bias(const T* a, const int M, const int K, const T* b, const int K_, + const int N, T* biases, T* c) { + EXPECT_TRUE(K_ == K && M > 0 && N > 0 && K > 0); + EXPECT_TRUE(a && b && c); + const int lda = K; + const int ldb = N; + const int ldc = N; + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + C(m, n) = 0.0f; + for (int k = 0; k < K; ++k) { + C(m, n) += A(m, k) * B(k, n); + } + } + } + if (biases) { + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + C(m, n) += biases[n]; + } + } + } +} + template void FillData(T* a, const int n, const T lower = static_cast(-2.f), const T upper = static_cast(2.f)) { @@ -103,8 +132,8 @@ TEST(fc_arm, compare_test) { fc.PrepareForRun(); fc.Run(); - lite::arm::math::fc_compute_eigen(x_data, m, k, w_data, k, n, b_data, - ref_data); + gemm_bias(x_data, m, k, w_data, k, n, b_data, ref_data); + for (int i = 0; i < out.dims().production(); i++) { EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); } @@ -158,8 +187,8 @@ TEST(fc_arm, num_col_dims) { fc.PrepareForRun(); fc.Run(); - lite::arm::math::fc_compute_eigen(x_data, 2, 3, w_data, 3, 4, b_data, - ref_data); + gemm_bias(x_data, 2, 3, w_data, 3, 4, b_data, ref_data); + for (int i = 0; i < out.dims().production(); i++) { EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); } diff --git a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc index 5e3d17ec93ae9d73028343b3d4dd1e77a0fe86f0..9a0deec2a1d1fcd2dd76c43cb2901641bcdbcc79 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc @@ -28,6 +28,28 @@ namespace lite { namespace kernels { namespace arm { +#define A(i, j) a[i * lda + j] +#define B(i, j) b[i * ldb + j] +#define C(i, j) c[i * ldc + j] + +template +void mul_gemm(const T* a, const int M, const int K, const T* b, const int K_, + const int N, T* c) { + EXPECT_TRUE(K_ == K && M > 0 && N > 0 && K > 0); + EXPECT_TRUE(a && b && c); + const int lda = K; + const int ldb = N; + const int ldc = N; + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + C(m, n) = 0.0f; + for (int k = 0; k < K; ++k) { + C(m, n) += A(m, k) * B(k, n); + } + } + } +} + template void FillData(T* a, const int n, const T lower = static_cast(-2.f), const T upper = static_cast(2.f)) { @@ -91,8 +113,8 @@ TEST(mul_arm, compare_test) { mul.Run(); - lite::arm::math::mul_compute_eigen(x_data, m, k, y_data, k, n, - ref_data); + mul_gemm(x_data, m, k, y_data, k, n, ref_data); + for (int i = 0; i < out.dims().production(); i++) { EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); } @@ -138,7 +160,8 @@ TEST(mul_arm, num_col_dims) { mul.Run(); - lite::arm::math::mul_compute_eigen(x_data, 2, 12, y_data, 12, 5, ref_data); + mul_gemm(x_data, 2, 12, y_data, 12, 5, ref_data); + for (int i = 0; i < out.dims().production(); i++) { EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); }