提交 9513ad79 编写于 作者: H hjchen2

Fix 3x3 depthwise conv bug when if_bias is false

上级 bb42168c
...@@ -91,6 +91,7 @@ PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) { ...@@ -91,6 +91,7 @@ PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) {
} }
} }
template <typename Device, typename T>
bool PaddleMobile<Device, T>::LoadCombinedMemory( bool PaddleMobile<Device, T>::LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len, size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification, uint8_t *combined_params_buf, bool optimize, bool quantification,
......
...@@ -253,7 +253,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input, ...@@ -253,7 +253,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias, framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) { bool if_bias, bool if_relu) {
#if __ARM_NEON #if __ARM_NEON
const float *bias_data = bias->data<float>();
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]); const int c = static_cast<int>(input->dims()[1]);
const int h = static_cast<int>(input->dims()[2]); const int h = static_cast<int>(input->dims()[2]);
...@@ -267,6 +266,11 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input, ...@@ -267,6 +266,11 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
const int lb = (h - 1) * w; const int lb = (h - 1) * w;
const int rb = h * w - 1; const int rb = h * w - 1;
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
}
float32x4_t zero = vdupq_n_f32(0.0); float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
...@@ -1966,7 +1970,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, ...@@ -1966,7 +1970,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias, framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) { bool if_bias, bool if_relu) {
#if __ARM_NEON #if __ARM_NEON
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int input_channel = static_cast<int>(input->dims()[1]); const int input_channel = static_cast<int>(input->dims()[1]);
...@@ -1983,7 +1986,12 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, ...@@ -1983,7 +1986,12 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
for (int c = 0; c < input_channel; c++) { for (int c = 0; c < input_channel; c++) {
const float *filter_data = filter->data<float>() + c * 9; const float *filter_data = filter->data<float>() + c * 9;
const float *input_data = input->data<float>() + c * inhxw; const float *input_data = input->data<float>() + c * inhxw;
const float *bias_data = bias->data<float>() + c; const float *bias_data;
float32x4_t biasv;
if (if_bias) {
bias_data = bias->data<float>() + c;
biasv = vld1q_dup_f32(bias_data);
}
float *output_data = output->data<float>() + c * outhxw; float *output_data = output->data<float>() + c * outhxw;
float w00 = filter_data[0]; float w00 = filter_data[0];
float w01 = filter_data[1]; float w01 = filter_data[1];
...@@ -1994,7 +2002,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, ...@@ -1994,7 +2002,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
float w20 = filter_data[6]; float w20 = filter_data[6];
float w21 = filter_data[7]; float w21 = filter_data[7];
float w22 = filter_data[8]; float w22 = filter_data[8];
float32x4_t biasv = vld1q_dup_f32(bias_data);
for (int i = 0; i < output_height; i += 1) { for (int i = 0; i < output_height; i += 1) {
for (int m = 0; m < output_width - 2; m += 3) { for (int m = 0; m < output_width - 2; m += 3) {
float *output_ptr = output_data + i * output_width + m; float *output_ptr = output_data + i * output_width + m;
......
...@@ -643,8 +643,8 @@ void DepthwiseConv5x5S1<int8_t, int32_t>(const framework::Tensor &input, ...@@ -643,8 +643,8 @@ void DepthwiseConv5x5S1<int8_t, int32_t>(const framework::Tensor &input,
for (int w = valid_w_end; w < output_w; ++w) { for (int w = valid_w_end; w < output_w; ++w) {
int padding = w + 5 - (padding_w + input_w); int padding = w + 5 - (padding_w + input_w);
if (padding >= 5) { if (padding >= 5) {
*output_ptr0 = 0.f; *output_ptr0 = 0;
*output_ptr1 = 0.f; *output_ptr1 = 0;
} else { } else {
int iw = w - valid_w_end; int iw = w - valid_w_end;
int32_t sum0 = input_ptr0[iw] * filter_ptr0[0] + int32_t sum0 = input_ptr0[iw] * filter_ptr0[0] +
......
...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0 ...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \ grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "PaddleMobileCPU.h"); do grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
cpplint $file; cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册