diff --git a/paddle/fluid/lite/arm/math/elementwise.cc b/paddle/fluid/lite/arm/math/elementwise.cc index cf300616245c452fddbf89abfcb346188539edcd..2a74e7ee4ec4be51b420b1fa2d2a1be7c3f148fb 100644 --- a/paddle/fluid/lite/arm/math/elementwise.cc +++ b/paddle/fluid/lite/arm/math/elementwise.cc @@ -41,10 +41,10 @@ void elementwise_add(const float* dinx, const float* diny, float* dout, float32x4_t diny2 = vld1q_f32(diny_ptr + 8); float32x4_t diny3 = vld1q_f32(diny_ptr + 12); - float32x4_t dinx0 = vaddq_f32(dinx0, diny0); - float32x4_t dinx1 = vaddq_f32(dinx1, diny1); - float32x4_t dinx2 = vaddq_f32(dinx2, diny2); - float32x4_t dinx3 = vaddq_f32(dinx3, diny3); + dinx0 = vaddq_f32(dinx0, diny0); + dinx1 = vaddq_f32(dinx1, diny1); + dinx2 = vaddq_f32(dinx2, diny2); + dinx3 = vaddq_f32(dinx3, diny3); vst1q_f32(dout_ptr, dinx0); vst1q_f32(dout_ptr + 4, dinx1); @@ -100,10 +100,10 @@ void elementwise_add_axis(const float* dinx, const float* diny, if (remain >= 8) { float32x4_t din0 = vld1q_f32(din_ptr); float32x4_t din1 = vld1q_f32(din_ptr + 4); - din0 = vaddq_f32(din0, diny_data); - din1 = vaddq_f32(din1, diny_data); - vst1q_f32(dout_ptr, r0); - vst1q_f32(dout_ptr + 4, r1); + din0 = vaddq_f32(din0, rb); + din1 = vaddq_f32(din1, rb); + vst1q_f32(dout_ptr, din0); + vst1q_f32(dout_ptr + 4, din1); din_ptr += 8; dout_ptr += 8; remain -= 8; @@ -111,16 +111,16 @@ void elementwise_add_axis(const float* dinx, const float* diny, if (remain >= 4) { float32x4_t din0 = vld1q_f32(din_ptr); din0 = vaddq_f32(din0, rb); - vst1q_f32(dout_ptr, diny_data); + vst1q_f32(dout_ptr, din0); din_ptr += 4; dout_ptr += 4; remain -= 4; } if (remain > 0) { - for (p = 0; p < remain; p++) { - *dout_ptr = *dinx_ptr + diny_data; + for (int p = 0; p < remain; p++) { + *dout_ptr = *din_ptr + diny_data; dout_ptr++; - dinx_ptr++; + din_ptr++; } } } diff --git a/paddle/fluid/lite/arm/math/elementwise.h b/paddle/fluid/lite/arm/math/elementwise.h index 7e907cd5e04eeaa5f61f426897502278e49ac9ad..ca8f87895fcea80f9a1a178a0bf43b34c44182bb 100644 --- a/paddle/fluid/lite/arm/math/elementwise.h +++ b/paddle/fluid/lite/arm/math/elementwise.h @@ -23,8 +23,8 @@ template void elementwise_add(const T* dinx, const T* diny, T* dout, int num); template -void elementwise_add_axis(const T* dinx, const T* diny, T* dout, - int batch, int channels, int num); +void elementwise_add_axis(const T* dinx, const T* diny, T* dout, int batch, + int channels, int num); } // namespace math } // namespace arm diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc index 0b464a5df0b0c33e76d2a31db183a515fea7a015..a8a2ac790a3c045642277ef75367bbdd878f0d6d 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc @@ -100,15 +100,15 @@ void ConvCompute::Run() { REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ConvCompute, def) .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) + // .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ConvCompute, def) .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) + // .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc index 1f06e6285bb73e9116dcb9c0f0cf85751c16fdb7..e9d9f4927b7ee18b3e18efa69a00dcb1c813bf3b 100644 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc @@ -31,7 +31,7 @@ void ElementwiseAddCompute::Run() { if (axis < 0) { axis = x_dims.size() - y_dims.size(); } - if (axis == 0) { + if (x_dims.size() == y_dims.size()) { lite::arm::math::elementwise_add(x_data, y_data, out_data, x_dims.production()); } else { diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc index b2bbe2d3ae0f453161f9c7bb03ce852c43b048d5..20b998dc6cfa8a9606fcf0f716470366fdd60338 100644 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc @@ -44,6 +44,9 @@ void elementwise_add_compute_ref(const operators::ElementwiseParam& param) { auto x_dims = param.X->dims(); auto y_dims = param.Y->dims(); int axis = param.axis; + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } int batch = 1; int channels = 1; int num = 1; @@ -61,9 +64,11 @@ void elementwise_add_compute_ref(const operators::ElementwiseParam& param) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; - dtype* dout_ptr = dout + offset; + dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { - dout_ptr[k] = din_ptr[k] + diny_data; + *dout_ptr = *din_ptr + diny_data; + dout_ptr++; + din_ptr++; } } } @@ -79,18 +84,15 @@ TEST(elementwise_add, compute) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { - for (auto yd{{n}, - {c}, - {h}, - {w}, - {n, c}, - {c, h}, - {h, w}, - {n, c, h}, - {c, h, w}, - {n, c, h, w}}) { + for (auto yd : + {std::vector({n}), std::vector({c}), + std::vector({h}), std::vector({w}), + std::vector({n, c}), std::vector({c, h}), + std::vector({h, w}), std::vector({n, c, h}), + std::vector({c, h, w}), + std::vector({n, c, h, w})}) { auto x_dim = DDim(std::vector({n, c, h, w})); - auto y_dim = DDim(std::vector(yd)); + auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; @@ -102,26 +104,27 @@ TEST(elementwise_add, compute) { x.Resize(x_dim); y.Resize(y_dim); - output.Resize(DDim(std::vector({n, c, h, w}))); - output_ref.Resize(DDim(std::vector({n, c, h, w}))); + output.Resize(x_dim); + output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); + auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); - for (int i = 0; i < x.dims().production(); i++) { + for (int i = 0; i < x_dim.production(); i++) { x_data[i] = i; } - for (int i = 0; i < y.dims().production(); i++) { + for (int i = 0; i < y_dim.production(); i++) { y_data[i] = i; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; - softmax.SetParam(param); - softmax.Run(); + elementwise_add.SetParam(param); + elementwise_add.Run(); param.Out = &output_ref; elementwise_add_compute_ref(param); - for (int i = 0; i < out.dims().production(); i++) { + for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } }