diff --git a/paddle/fluid/lite/arm/math/CMakeLists.txt b/paddle/fluid/lite/arm/math/CMakeLists.txt index 3c0fa97afae0f83485004e14ddf60eac97cdef3e..8af2c33943f7e2abe7e539b04e3759e8e2d4da33 100644 --- a/paddle/fluid/lite/arm/math/CMakeLists.txt +++ b/paddle/fluid/lite/arm/math/CMakeLists.txt @@ -6,4 +6,4 @@ if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)) return() endif() -cc_library(math_arm SRCS funcs.cc packed_sgemm.cc softmax.cc scale.cc DEPS ${lite_kernel_deps} eigen3) +cc_library(math_arm SRCS funcs.cc packed_sgemm.cc softmax.cc scale.cc elementwise.cc DEPS ${lite_kernel_deps} eigen3) diff --git a/paddle/fluid/lite/arm/math/elementwise.cc b/paddle/fluid/lite/arm/math/elementwise.cc new file mode 100644 index 0000000000000000000000000000000000000000..68140a5d7dbccc9fa0028e9cde3e9d074275a7ee --- /dev/null +++ b/paddle/fluid/lite/arm/math/elementwise.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/arm/math/elementwise.h" +#include "paddle/fluid/lite/arm/math/funcs.h" + +namespace paddle { +namespace lite { +namespace arm { +namespace math { + +template <> +void elementwise_add(const float* dinx, const float* diny, float* dout, + int num) { + int cnt = num >> 4; + int remain = num % 16; +#pragma omp parallel for + for (int i = 0; i < cnt; i++) { + const float* dinx_ptr = dinx + (i << 4); + const float* diny_ptr = diny + (i << 4); + float* dout_ptr = dout + (i << 4); + + float32x4_t dinx0 = vld1q_f32(dinx_ptr); + float32x4_t dinx1 = vld1q_f32(dinx_ptr + 4); + float32x4_t dinx2 = vld1q_f32(dinx_ptr + 8); + float32x4_t dinx3 = vld1q_f32(dinx_ptr + 12); + + float32x4_t diny0 = vld1q_f32(diny_ptr); + float32x4_t diny1 = vld1q_f32(diny_ptr + 4); + float32x4_t diny2 = vld1q_f32(diny_ptr + 8); + float32x4_t diny3 = vld1q_f32(diny_ptr + 12); + + float32x4_t vsum0 = vaddq_f32(dinx0, diny0); + float32x4_t vsum1 = vaddq_f32(dinx1, diny1); + float32x4_t vsum2 = vaddq_f32(dinx2, diny2); + float32x4_t vsum3 = vaddq_f32(dinx3, diny3); + + vst1q_f32(dout_ptr, vsum0); + vst1q_f32(dout_ptr + 4, vsum1); + vst1q_f32(dout_ptr + 8, vsum2); + vst1q_f32(dout_ptr + 12, vsum3); + } + if (remain > 0) { + const float* dinx_ptr = dinx + (cnt << 4); + const float* diny_ptr = diny + (cnt << 4); + float* dout_ptr = dout + (cnt << 4); + for (int i = 0; i < remain; i++) { + *dout_ptr = *dinx_ptr + *diny_ptr; + dout_ptr++; + dinx_ptr++; + diny_ptr++; + } + } +} + +} // namespace math +} // namespace arm +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/arm/math/elementwise.h b/paddle/fluid/lite/arm/math/elementwise.h new file mode 100644 index 0000000000000000000000000000000000000000..cf4c8e46b0703a888bc9ac9a4a395d4e57ba886d --- /dev/null +++ b/paddle/fluid/lite/arm/math/elementwise.h @@ -0,0 +1,28 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace paddle { +namespace lite { +namespace arm { +namespace math { + +template +void elementwise_add(const T* dinx, const T* diny, T* dout, int num); + +} // namespace math +} // namespace arm +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/arm/math/funcs.h b/paddle/fluid/lite/arm/math/funcs.h index b3aefcca6a55321adee9639a6df2b304cfb3b572..e95506c1a968f2dcb336a1ae45854bb0f3c9fbfe 100644 --- a/paddle/fluid/lite/arm/math/funcs.h +++ b/paddle/fluid/lite/arm/math/funcs.h @@ -17,6 +17,7 @@ #include #include +#include "paddle/fluid/lite/arm/math/elementwise.h" #include "paddle/fluid/lite/arm/math/packed_sgemm.h" #include "paddle/fluid/lite/arm/math/scale.h" #include "paddle/fluid/lite/arm/math/softmax.h" diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 1b5f5b7fdc3399afbddf114e2096fc06810237ed..ff3cab02ee8b7e88783b8c6c18c496bf674c7cfd 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -9,16 +9,19 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps}) cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3) cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) +cc_library(elementwise_add_compute_arm SRCS elementwise_add_compute.cc DEPS ${lite_kernel_deps} math_arm) lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm) lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm) lite_cc_test(test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm) +lite_cc_test(test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm) set(arm_kernels fc_compute_arm relu_compute_arm mul_compute_arm scale_compute_arm - softmax_compute_arm) + softmax_compute_arm + elementwise_add_compute_arm) set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..310cde17bbd2f235789250fa02f8e8f82f672ff0 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/elementwise_add_compute.h" +#include "paddle/fluid/lite/arm/math/funcs.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +void ElementwiseAddCompute::Run() { + auto& param = Param(); + const float* x_data = param.X->data(); + const float* y_data = param.Y->data(); + float* out_data = param.Out->mutable_data(); + int n = param.X->dims().production(); + lite::arm::math::elementwise_add(x_data, y_data, out_data, n); +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::ElementwiseAddCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.h b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..9939509d0be25eadccdb563e802c98291dea751b --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.h @@ -0,0 +1,36 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +class ElementwiseAddCompute + : public KernelLite { + public: + void Run() override; + + virtual ~ElementwiseAddCompute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..7156d08ce77df9c93ec46c1c55fb3a11df44a308 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/elementwise_add_compute.h" +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +TEST(elementwise_add_arm, retrive_op) { + auto elementwise_add = + KernelRegistry::Global().Create( + "elementwise_add"); + ASSERT_FALSE(elementwise_add.empty()); + ASSERT_TRUE(elementwise_add.front()); +} + +TEST(elementwise_add_arm, init) { + ElementwiseAddCompute elementwise_add; + ASSERT_EQ(elementwise_add.precision(), PRECISION(kFloat)); + ASSERT_EQ(elementwise_add.target(), TARGET(kARM)); +} + +template +void elementwise_add_compute_ref(const operators::ElementwiseParam& param) { + const dtype* x_data = param.X->data(); + const dtype* y_data = param.Y->data(); + dtype* out_data = param.Out->mutable_data(); + DDim dim = param.X->dims(); + ASSERT_EQ(dim.data(), param.Out->dims().data()); + for (int i = 0; i < dim.production(); i++) { + out_data[i] = x_data[i] + y_data[i]; + } +} + +TEST(elementwise_add, compute) { + ElementwiseAddCompute elementwise_add; + operators::ElementwiseParam param; + + lite::Tensor x, y, out, out_ref; + x.Resize(DDim(std::vector({2, 3, 4, 5}))); + y.Resize(DDim(std::vector({2, 3, 4, 5}))); + out.Resize(DDim(std::vector({2, 3, 4, 5}))); + out_ref.Resize(DDim(std::vector({2, 3, 4, 5}))); + auto* x_data = x.mutable_data(); + auto* y_data = y.mutable_data(); + auto* out_data = out.mutable_data(); + auto* out_ref_data = out_ref.mutable_data(); + for (int i = 0; i < x.dims().production(); i++) { + x_data[i] = y_data[i] = i; + } + + param.X = &x; + param.Y = &y; + param.Out = &out; + elementwise_add.SetParam(param); + elementwise_add.Run(); + + param.Out = &out_ref; + elementwise_add_compute_ref(param); + for (int i = 0; i < out.dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + } +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def);