Fix 3x3 depthwise conv bug when if_bias is false

9513ad79 · hjchen2 · bb42168c · 9513ad79 · 9513ad79 · 9513ad79
4 changed file
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -91,6 +91,7 @@ PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) {
  }
 }
+template <typename Device, typename T>
 bool PaddleMobile<Device, T>::LoadCombinedMemory(
    size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
    uint8_t *combined_params_buf, bool optimize, bool quantification,

--- a/src/operators/math/depthwise_conv3x3.cpp
+++ b/src/operators/math/depthwise_conv3x3.cpp
@@ -253,7 +253,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
                          framework::Tensor *output, framework::Tensor *bias,
                          bool if_bias, bool if_relu) {
 #if __ARM_NEON
-  const float *bias_data = bias->data<float>();
  const int batch_size = static_cast<int>(input->dims()[0]);
  const int c = static_cast<int>(input->dims()[1]);
  const int h = static_cast<int>(input->dims()[2]);
@@ -267,6 +266,11 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
  const int lb = (h - 1) * w;
  const int rb = h * w - 1;
+  const float *bias_data;
+  if (if_bias) {
+    bias_data = bias->data<float>();
+  }
  float32x4_t zero = vdupq_n_f32(0.0);
  for (int b = 0; b < batch_size; ++b) {
@@ -1966,7 +1970,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
                          framework::Tensor *output, framework::Tensor *bias,
                          bool if_bias, bool if_relu) {
 #if __ARM_NEON
  const int batch_size = static_cast<int>(input->dims()[0]);
  const int input_channel = static_cast<int>(input->dims()[1]);
@@ -1983,7 +1986,12 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
    for (int c = 0; c < input_channel; c++) {
      const float *filter_data = filter->data<float>() + c * 9;
      const float *input_data = input->data<float>() + c * inhxw;
-      const float *bias_data = bias->data<float>() + c;
+      const float *bias_data;
+      float32x4_t biasv;
+      if (if_bias) {
+        bias_data = bias->data<float>() + c;
+        biasv = vld1q_dup_f32(bias_data);
+      }
      float *output_data = output->data<float>() + c * outhxw;
      float w00 = filter_data[0];
      float w01 = filter_data[1];
@@ -1994,7 +2002,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
      float w20 = filter_data[6];
      float w21 = filter_data[7];
      float w22 = filter_data[8];
-      float32x4_t biasv = vld1q_dup_f32(bias_data);
      for (int i = 0; i < output_height; i += 1) {
        for (int m = 0; m < output_width - 2; m += 3) {
          float *output_ptr = output_data + i * output_width + m;

--- a/src/operators/math/depthwise_conv5x5_int8.cpp
+++ b/src/operators/math/depthwise_conv5x5_int8.cpp
@@ -643,8 +643,8 @@ void DepthwiseConv5x5S1<int8_t, int32_t>(const framework::Tensor &input,
        for (int w = valid_w_end; w < output_w; ++w) {
          int padding = w + 5 - (padding_w + input_w);
          if (padding >= 5) {
-            *output_ptr0 = 0.f;
+            *output_ptr0 = 0;
-            *output_ptr1 = 0.f;
+            *output_ptr1 = 0;
          } else {
            int iw = w - valid_w_end;
            int32_t sum0 = input_ptr0[iw] * filter_ptr0[0] +

--- a/tools/pre-commit.hooks/cpplint.hook
+++ b/tools/pre-commit.hooks/cpplint.hook
@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
 for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
        grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
-        grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "PaddleMobileCPU.h"); do
+        grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
    cpplint $file;
    TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
 done