提交 a5952cff 编写于 作者: 开心的小妮's avatar 开心的小妮

Merge branch 'remove_eigen_deps' into 'incubate/lite'

[LITE][ARM] Remove Eigen deps of ARM

See merge request inference/paddlelite!87
...@@ -78,5 +78,5 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -78,5 +78,5 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
gemv_arm_int8.cc gemv_arm_int8.cc
conv3x3s1_direct_int8.cc conv3x3s1_direct_int8.cc
conv3x3s2_direct_int8.cc conv3x3s2_direct_int8.cc
DEPS ${lite_kernel_deps} eigen3 framework_proto_lite) DEPS ${lite_kernel_deps} framework_proto_lite)
endif() endif()
...@@ -29,7 +29,12 @@ endif() ...@@ -29,7 +29,12 @@ endif()
lite_cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) lite_cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
lite_cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) lite_cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
lite_cc_library(cpu_info_lite SRCS cpu_info.cc) lite_cc_library(cpu_info_lite SRCS cpu_info.cc)
if (LITE_WITH_ARM)
lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite CL_DEPS cl_helper)
else()
lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3 CL_DEPS cl_helper) lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3 CL_DEPS cl_helper)
endif()
lite_cc_library(kernel_lite SRCS kernel.cc DEPS context_lite type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite}) lite_cc_library(kernel_lite SRCS kernel.cc DEPS context_lite type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
lite_cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite lite_cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite
cpp_op_desc_lite ${tensor_lite}) cpp_op_desc_lite ${tensor_lite})
......
...@@ -28,6 +28,35 @@ namespace lite { ...@@ -28,6 +28,35 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
#define A(i, j) a[i * lda + j]
#define B(i, j) b[i * ldb + j]
#define C(i, j) c[i * ldc + j]
template <typename T>
void gemm_bias(const T* a, const int M, const int K, const T* b, const int K_,
const int N, T* biases, T* c) {
EXPECT_TRUE(K_ == K && M > 0 && N > 0 && K > 0);
EXPECT_TRUE(a && b && c);
const int lda = K;
const int ldb = N;
const int ldc = N;
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
C(m, n) = 0.0f;
for (int k = 0; k < K; ++k) {
C(m, n) += A(m, k) * B(k, n);
}
}
}
if (biases) {
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
C(m, n) += biases[n];
}
}
}
}
template <typename T> template <typename T>
void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f), void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f),
const T upper = static_cast<T>(2.f)) { const T upper = static_cast<T>(2.f)) {
...@@ -103,8 +132,8 @@ TEST(fc_arm, compare_test) { ...@@ -103,8 +132,8 @@ TEST(fc_arm, compare_test) {
fc.PrepareForRun(); fc.PrepareForRun();
fc.Run(); fc.Run();
lite::arm::math::fc_compute_eigen(x_data, m, k, w_data, k, n, b_data, gemm_bias<T>(x_data, m, k, w_data, k, n, b_data, ref_data);
ref_data);
for (int i = 0; i < out.dims().production(); i++) { for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
} }
...@@ -158,8 +187,8 @@ TEST(fc_arm, num_col_dims) { ...@@ -158,8 +187,8 @@ TEST(fc_arm, num_col_dims) {
fc.PrepareForRun(); fc.PrepareForRun();
fc.Run(); fc.Run();
lite::arm::math::fc_compute_eigen(x_data, 2, 3, w_data, 3, 4, b_data, gemm_bias<T>(x_data, 2, 3, w_data, 3, 4, b_data, ref_data);
ref_data);
for (int i = 0; i < out.dims().production(); i++) { for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
} }
......
...@@ -28,6 +28,28 @@ namespace lite { ...@@ -28,6 +28,28 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
#define A(i, j) a[i * lda + j]
#define B(i, j) b[i * ldb + j]
#define C(i, j) c[i * ldc + j]
template <typename T>
void mul_gemm(const T* a, const int M, const int K, const T* b, const int K_,
const int N, T* c) {
EXPECT_TRUE(K_ == K && M > 0 && N > 0 && K > 0);
EXPECT_TRUE(a && b && c);
const int lda = K;
const int ldb = N;
const int ldc = N;
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
C(m, n) = 0.0f;
for (int k = 0; k < K; ++k) {
C(m, n) += A(m, k) * B(k, n);
}
}
}
}
template <typename T> template <typename T>
void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f), void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f),
const T upper = static_cast<T>(2.f)) { const T upper = static_cast<T>(2.f)) {
...@@ -91,8 +113,8 @@ TEST(mul_arm, compare_test) { ...@@ -91,8 +113,8 @@ TEST(mul_arm, compare_test) {
mul.Run(); mul.Run();
lite::arm::math::mul_compute_eigen(x_data, m, k, y_data, k, n, mul_gemm<T>(x_data, m, k, y_data, k, n, ref_data);
ref_data);
for (int i = 0; i < out.dims().production(); i++) { for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
} }
...@@ -138,7 +160,8 @@ TEST(mul_arm, num_col_dims) { ...@@ -138,7 +160,8 @@ TEST(mul_arm, num_col_dims) {
mul.Run(); mul.Run();
lite::arm::math::mul_compute_eigen(x_data, 2, 12, y_data, 12, 5, ref_data); mul_gemm<T>(x_data, 2, 12, y_data, 12, 5, ref_data);
for (int i = 0; i < out.dims().production(); i++) { for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3); EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册