提交 9513ad79 编写于 作者: H hjchen2

Fix 3x3 depthwise conv bug when if_bias is false

上级 bb42168c
......@@ -91,6 +91,7 @@ PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) {
}
}
template <typename Device, typename T>
bool PaddleMobile<Device, T>::LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification,
......
......@@ -253,7 +253,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) {
#if __ARM_NEON
const float *bias_data = bias->data<float>();
const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]);
const int h = static_cast<int>(input->dims()[2]);
......@@ -267,6 +266,11 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
const int lb = (h - 1) * w;
const int rb = h * w - 1;
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
}
float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) {
......@@ -1966,7 +1970,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) {
#if __ARM_NEON
const int batch_size = static_cast<int>(input->dims()[0]);
const int input_channel = static_cast<int>(input->dims()[1]);
......@@ -1983,7 +1986,12 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
for (int c = 0; c < input_channel; c++) {
const float *filter_data = filter->data<float>() + c * 9;
const float *input_data = input->data<float>() + c * inhxw;
const float *bias_data = bias->data<float>() + c;
const float *bias_data;
float32x4_t biasv;
if (if_bias) {
bias_data = bias->data<float>() + c;
biasv = vld1q_dup_f32(bias_data);
}
float *output_data = output->data<float>() + c * outhxw;
float w00 = filter_data[0];
float w01 = filter_data[1];
......@@ -1994,7 +2002,6 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
float w20 = filter_data[6];
float w21 = filter_data[7];
float w22 = filter_data[8];
float32x4_t biasv = vld1q_dup_f32(bias_data);
for (int i = 0; i < output_height; i += 1) {
for (int m = 0; m < output_width - 2; m += 3) {
float *output_ptr = output_data + i * output_width + m;
......
......@@ -643,8 +643,8 @@ void DepthwiseConv5x5S1<int8_t, int32_t>(const framework::Tensor &input,
for (int w = valid_w_end; w < output_w; ++w) {
int padding = w + 5 - (padding_w + input_w);
if (padding >= 5) {
*output_ptr0 = 0.f;
*output_ptr1 = 0.f;
*output_ptr0 = 0;
*output_ptr1 = 0;
} else {
int iw = w - valid_w_end;
int32_t sum0 = input_ptr0[iw] * filter_ptr0[0] +
......
......@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "PaddleMobileCPU.h"); do
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册