提交 f99e8295 编写于 作者: C chenjiaoAngel

update conv_dw_3x3s2

上级 86746af8
...@@ -203,7 +203,7 @@ void conv_depthwise_3x3s1_fp32(const float *din, ...@@ -203,7 +203,7 @@ void conv_depthwise_3x3s1_fp32(const float *din,
w_out, w_out,
ctx); ctx);
} else { } else {
conv_depthwise_3x3s1p0_bias_s_relu(dout, conv_depthwise_3x3s1p1_bias_s_relu(dout,
din, din,
weights, weights,
bias, bias,
...@@ -267,7 +267,7 @@ void conv_depthwise_3x3s1_fp32(const float *din, ...@@ -267,7 +267,7 @@ void conv_depthwise_3x3s1_fp32(const float *din,
w_out, w_out,
ctx); ctx);
} else { } else {
conv_depthwise_3x3s1p0_bias_s_relu6(dout, conv_depthwise_3x3s1p1_bias_s_relu6(dout,
din, din,
weights, weights,
bias, bias,
...@@ -331,7 +331,7 @@ void conv_depthwise_3x3s1_fp32(const float *din, ...@@ -331,7 +331,7 @@ void conv_depthwise_3x3s1_fp32(const float *din,
w_out, w_out,
ctx); ctx);
} else { } else {
conv_depthwise_3x3s1p0_bias_s_leakyRelu(dout, conv_depthwise_3x3s1p1_bias_s_leakyRelu(dout,
din, din,
weights, weights,
bias, bias,
...@@ -2225,7 +2225,7 @@ void conv_depthwise_3x3s1p1_bias_relu6(float *dout, ...@@ -2225,7 +2225,7 @@ void conv_depthwise_3x3s1p1_bias_relu6(float *dout,
float32x4_t vzero = vdupq_n_f32(0.f); float32x4_t vzero = vdupq_n_f32(0.f);
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vsix = vdupq_n_f32(six); float32x4_t vsix = vld1q_f32(six);
#endif #endif
for (int n = 0; n < num; ++n) { for (int n = 0; n < num; ++n) {
const float *din_batch = din + n * ch_in * size_in_channel; const float *din_batch = din + n * ch_in * size_in_channel;
...@@ -2523,7 +2523,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout, ...@@ -2523,7 +2523,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
float32x4_t vzero = vdupq_n_f32(0.f); float32x4_t vzero = vdupq_n_f32(0.f);
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vscale = vdupq_n_f32(scale); float32x4_t vscale = vld1q_f32(scale);
#endif #endif
for (int n = 0; n < num; ++n) { for (int n = 0; n < num; ++n) {
const float *din_batch = din + n * ch_in * size_in_channel; const float *din_batch = din + n * ch_in * size_in_channel;
...@@ -2786,7 +2786,7 @@ void conv_depthwise_3x3s1p1_bias_s_relu6(float *dout, ...@@ -2786,7 +2786,7 @@ void conv_depthwise_3x3s1p1_bias_s_relu6(float *dout,
int size_in_channel = w_in * h_in; int size_in_channel = w_in * h_in;
int size_out_channel = w_out * h_out; int size_out_channel = w_out * h_out;
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vsix = vdupq_n_f32(six); float32x4_t vsix = vld1q_f32(six);
#endif #endif
for (int n = 0; n < num; ++n) { for (int n = 0; n < num; ++n) {
const float *din_batch = din + n * ch_in * size_in_channel; const float *din_batch = din + n * ch_in * size_in_channel;
...@@ -2947,7 +2947,7 @@ void conv_depthwise_3x3s1p1_bias_s_leakyRelu(float *dout, ...@@ -2947,7 +2947,7 @@ void conv_depthwise_3x3s1p1_bias_s_leakyRelu(float *dout,
int size_in_channel = w_in * h_in; int size_in_channel = w_in * h_in;
int size_out_channel = w_out * h_out; int size_out_channel = w_out * h_out;
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vscale = vdupq_n_f32(scale); float32x4_t vscale = vld1q_f32(scale);
#endif #endif
for (int n = 0; n < num; ++n) { for (int n = 0; n < num; ++n) {
const float *din_batch = din + n * ch_in * size_in_channel; const float *din_batch = din + n * ch_in * size_in_channel;
...@@ -3119,7 +3119,7 @@ void conv_depthwise_3x3s1p0_bias_relu6(float *dout, ...@@ -3119,7 +3119,7 @@ void conv_depthwise_3x3s1p0_bias_relu6(float *dout,
const int remian_idx[4] = {0, 1, 2, 3}; const int remian_idx[4] = {0, 1, 2, 3};
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vsix = vdupq_n_f32(six); float32x4_t vsix = vld1q_f32(six);
#endif #endif
if (remain == 0 && size_pad_right == 6) { // w_in == w_out and w_out % 4 == 0 if (remain == 0 && size_pad_right == 6) { // w_in == w_out and w_out % 4 == 0
...@@ -3402,7 +3402,7 @@ void conv_depthwise_3x3s1p0_bias_s_relu6(float *dout, ...@@ -3402,7 +3402,7 @@ void conv_depthwise_3x3s1p0_bias_s_relu6(float *dout,
vcgeq_s32(vld1q_s32(right_pad_idx + 4), vdupq_n_s32(6 - w_in)); vcgeq_s32(vld1q_s32(right_pad_idx + 4), vdupq_n_s32(6 - w_in));
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vsix = vdupq_n_f32(six); float32x4_t vsix = vld1q_f32(six);
#endif #endif
unsigned int vmask[8]; unsigned int vmask[8];
...@@ -3569,7 +3569,7 @@ void conv_depthwise_3x3s1p0_bias_leakyRelu(float *dout, ...@@ -3569,7 +3569,7 @@ void conv_depthwise_3x3s1p0_bias_leakyRelu(float *dout,
const int remian_idx[4] = {0, 1, 2, 3}; const int remian_idx[4] = {0, 1, 2, 3};
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vscale = vdupq_n_f32(scale); float32x4_t vscale = vld1q_f32(scale);
#endif #endif
if (remain == 0 && size_pad_right == 6) { // w_in == w_out and w_out % 4 == 0 if (remain == 0 && size_pad_right == 6) { // w_in == w_out and w_out % 4 == 0
...@@ -3853,7 +3853,7 @@ void conv_depthwise_3x3s1p0_bias_s_leakyRelu(float *dout, ...@@ -3853,7 +3853,7 @@ void conv_depthwise_3x3s1p0_bias_s_leakyRelu(float *dout,
vcgeq_s32(vld1q_s32(right_pad_idx + 4), vdupq_n_s32(6 - w_in)); vcgeq_s32(vld1q_s32(right_pad_idx + 4), vdupq_n_s32(6 - w_in));
#ifdef __aarch64__ #ifdef __aarch64__
float32x4_t vscale = vdupq_n_f32(scale); float32x4_t vscale = vld1q_f32(scale);
#endif #endif
unsigned int vmask[8]; unsigned int vmask[8];
......
...@@ -379,6 +379,61 @@ void conv_depthwise_3x3s1p1_bias_s_no_relu(float* dout, ...@@ -379,6 +379,61 @@ void conv_depthwise_3x3s1p1_bias_s_no_relu(float* dout,
const int w_out, const int w_out,
ARMContext* ctx); ARMContext* ctx);
void conv_depthwise_3x3s2p0_bias_no_relu(float* dout,
const float* din,
const float* weights,
const float* bias,
bool flag_bias,
bool flag_relu,
const int num,
const int ch_in,
const int h_in,
const int w_in,
const int h_out,
const int w_out,
ARMContext* ctx);
void conv_depthwise_3x3s2p0_bias_s_no_relu(float* dout,
const float* din,
const float* weights,
const float* bias,
bool flag_bias,
bool flag_relu,
const int num,
const int ch_in,
const int h_in,
const int w_in,
const int h_out,
const int w_out,
ARMContext* ctx);
void conv_depthwise_3x3s2p1_bias_no_relu(float* dout,
const float* din,
const float* weights,
const float* bias,
bool flag_bias,
bool flag_relu,
const int num,
const int ch_in,
const int h_in,
const int w_in,
const int h_out,
const int w_out,
ARMContext* ctx);
void conv_depthwise_3x3s2p1_bias_s_no_relu(float* dout,
const float* din,
const float* weights,
const float* bias,
bool flag_bias,
bool flag_relu,
const int num,
const int ch_in,
const int h_in,
const int w_in,
const int h_out,
const int w_out,
ARMContext* ctx);
} // namespace math } // namespace math
} // namespace arm } // namespace arm
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册