提交 644d97f5 编写于 作者: C chenjiaoAngel

fic format. test=develop

上级 e5066279
...@@ -4252,18 +4252,18 @@ void gemm_prepack_int8(const int8_t* A_packed, ...@@ -4252,18 +4252,18 @@ void gemm_prepack_int8(const int8_t* A_packed,
} }
#else #else
gemm_prepack_oth_int8<float32_t>(A_packed, gemm_prepack_oth_int8<float32_t>(A_packed,
B, B,
bias, bias,
C, C,
M, M,
N, N,
K, K,
is_bias, is_bias,
flag_act, flag_act,
is_transB, is_transB,
scale, scale,
alpha, alpha,
ctx); ctx);
#endif #endif
} }
...@@ -4319,22 +4319,33 @@ void gemm_prepack_int8(const int8_t* A_packed, ...@@ -4319,22 +4319,33 @@ void gemm_prepack_int8(const int8_t* A_packed,
ctx); ctx);
} else { } else {
gemm_prepack_oth_int8<int8_t>(A_packed, gemm_prepack_oth_int8<int8_t>(A_packed,
B, B,
bias, bias,
C, C,
M, M,
N, N,
K, K,
is_bias, is_bias,
flag_act, flag_act,
is_transB, is_transB,
scale, scale,
alpha, alpha,
ctx); ctx);
} }
#else #else
gemm_prepack_oth_int8<int8_t>( gemm_prepack_oth_int8<int8_t>(A_packed,
A_packed, B, bias, C, M, N, K, is_bias, flag_act, is_transB, scale, alpha, ctx); B,
bias,
C,
M,
N,
K,
is_bias,
flag_act,
is_transB,
scale,
alpha,
ctx);
#endif #endif
} }
......
...@@ -65,17 +65,15 @@ inline void write_gemv_out(const int* in, ...@@ -65,17 +65,15 @@ inline void write_gemv_out(const int* in,
vout1 = vmaxq_f32(vout1, vzero); vout1 = vmaxq_f32(vout1, vzero);
vout0 = vminq_f32(vout0, vsix); vout0 = vminq_f32(vout0, vsix);
vout1 = vminq_f32(vout1, vsix); vout1 = vminq_f32(vout1, vsix);
} else if (act == lite_api::ActivationType::kLeakyRelu) {
float32x4_t valpha = vdupq_n_f32(alpha);
uint32x4_t maska = vcgeq_f32(vout0, vzero);
uint32x4_t maskb = vcgeq_f32(vout1, vzero);
float32x4_t suma = vmulq_f32(vout0, valpha);
float32x4_t sumb = vmulq_f32(vout1, valpha);
vout0 = vbslq_f32(maska, vout0, suma);
vout1 = vbslq_f32(maskb, vout1, sumb);
} }
vout0 = vmaxq_f32(vout0, vzero);
vout1 = vmaxq_f32(vout1, vzero);
} else if (act == lite_api::ActivationType::kLeakyRelu) {
float32x4_t valpha = vdupq_n_f32(alpha);
uint32x4_t maska = vcgeq_f32(vout0, vzero);
uint32x4_t maskb = vcgeq_f32(vout1, vzero);
float32x4_t suma = vmulq_f32(vout0, valpha);
float32x4_t sumb = vmulq_f32(vout1, valpha);
vout0 = vbslq_f32(maska, vout0, suma);
vout1 = vbslq_f32(maskb, vout1, sumb);
} }
vst1q_f32(out, vout0); vst1q_f32(out, vout0);
vst1q_f32(out + 4, vout1); vst1q_f32(out + 4, vout1);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "lite/core/profile/timer.h" #include "lite/core/profile/timer.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/tests/utils/tensor_utils.h" #include "lite/tests/utils/tensor_utils.h"
#include "lite/operators/op_params.h"
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
using paddle::lite::profile::Timer; using paddle::lite::profile::Timer;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册