// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/arm/elementwise_compute.h" #include #include #include #include "lite/core/op_registry.h" namespace paddle { namespace lite { namespace kernels { namespace arm { TEST(elementwise_add_arm, retrive_op) { auto elementwise_add = KernelRegistry::Global().Create( "elementwise_add"); ASSERT_FALSE(elementwise_add.empty()); ASSERT_TRUE(elementwise_add.front()); } TEST(elementwise_add_arm, init) { ElementwiseAddCompute elementwise_add; ASSERT_EQ(elementwise_add.precision(), PRECISION(kFloat)); ASSERT_EQ(elementwise_add.target(), TARGET(kARM)); } template void elementwise_compute_ref(const operators::ElementwiseParam& param, const std::string elt_type, const std::string act_type) { const dtype* x_data = param.X->data(); const dtype* y_data = param.Y->data(); dtype* out_data = param.Out->mutable_data(); auto x_dims = param.X->dims(); auto y_dims = param.Y->dims(); int axis = param.axis; if (axis < 0) { axis = x_dims.size() - y_dims.size(); } int batch = 1; int channels = 1; int num = 1; for (int i = 0; i < axis; ++i) { batch *= x_dims[i]; } for (int i = 0; i < y_dims.size(); ++i) { channels *= y_dims[i]; } for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { num *= x_dims[i]; } // do elementwise add/sub/max... if (elt_type == "add") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr + diny_data; dout_ptr++; din_ptr++; } } } } else if (elt_type == "sub") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr - diny_data; dout_ptr++; din_ptr++; } } } } else if (elt_type == "mul") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr * diny_data; dout_ptr++; din_ptr++; } } } } else if (elt_type == "max") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = std::max(*din_ptr, diny_data); dout_ptr++; din_ptr++; } } } } else { LOG(FATAL) << "unsupported Elementwise type: " << elt_type; } // do activation relu/sigmod... if (act_type.size() > 0) { if (act_type == "relu") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { dtype* dout_ptr = out_data + (i * channels + j) * num; for (int k = 0; k < num; ++k) { *dout_ptr = *dout_ptr > 0.0f ? *dout_ptr : 0.0f; dout_ptr++; } } } } else { LOG(FATAL) << "unsupported Activation type: " << elt_type; } } } TEST(elementwise_add, compute) { ElementwiseAddCompute elementwise_add; operators::ElementwiseParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #else for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { x_data[i] = i; } for (int i = 0; i < y_dim.production(); i++) { y_data[i] = i; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; elementwise_add.SetParam(param); elementwise_add.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "add", ""); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } TEST(fusion_elementwise_add_activation_arm, retrive_op) { auto fusion_elementwise_add_activation = KernelRegistry::Global().Create( "fusion_elementwise_add_activation"); ASSERT_FALSE(fusion_elementwise_add_activation.empty()); ASSERT_TRUE(fusion_elementwise_add_activation.front()); } TEST(fusion_elementwise_add_activation_arm, init) { ElementwiseAddActivationCompute fusion_elementwise_add_activation; ASSERT_EQ(fusion_elementwise_add_activation.precision(), PRECISION(kFloat)); ASSERT_EQ(fusion_elementwise_add_activation.target(), TARGET(kARM)); } TEST(fusion_elementwise_add_activation_arm, compute) { ElementwiseAddActivationCompute fusion_elementwise_add_activation; operators::FusionElementwiseActivationParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({h, w}), std::vector({n, c, h}), std::vector({n, c, h, w})}) { #else for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { float sign = i % 3 == 0 ? -1.0f : 1.0f; x_data[i] = i * sign; } for (int i = 0; i < y_dim.production(); i++) { float sign = i % 2 == 0 ? 0.5f : -0.5f; y_data[i] = i * sign; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; param.act_type = act_type; fusion_elementwise_add_activation.SetParam(param); fusion_elementwise_add_activation.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "add", act_type); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } } TEST(elementwise_mul_arm, retrive_op) { auto elementwise_mul = KernelRegistry::Global().Create( "elementwise_mul"); ASSERT_FALSE(elementwise_mul.empty()); ASSERT_TRUE(elementwise_mul.front()); } TEST(elementwise_mul_arm, init) { ElementwiseMulCompute elementwise_mul; ASSERT_EQ(elementwise_mul.precision(), PRECISION(kFloat)); ASSERT_EQ(elementwise_mul.target(), TARGET(kARM)); } TEST(elementwise_mul, compute) { ElementwiseMulCompute elementwise_mul; operators::ElementwiseParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #else for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { x_data[i] = i; } for (int i = 0; i < y_dim.production(); i++) { y_data[i] = i; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; elementwise_mul.SetParam(param); elementwise_mul.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "mul", ""); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } TEST(fusion_elementwise_mul_activation_arm, retrive_op) { auto fusion_elementwise_mul_activation = KernelRegistry::Global().Create( "fusion_elementwise_mul_activation"); ASSERT_FALSE(fusion_elementwise_mul_activation.empty()); ASSERT_TRUE(fusion_elementwise_mul_activation.front()); } TEST(fusion_elementwise_mul_activation_arm, init) { ElementwiseMulActivationCompute fusion_elementwise_mul_activation; ASSERT_EQ(fusion_elementwise_mul_activation.precision(), PRECISION(kFloat)); ASSERT_EQ(fusion_elementwise_mul_activation.target(), TARGET(kARM)); } TEST(fusion_elementwise_mul_activation_arm, compute) { ElementwiseMulActivationCompute fusion_elementwise_mul_activation; operators::FusionElementwiseActivationParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({h, w}), std::vector({n, c, h}), std::vector({n, c, h, w})}) { #else for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { float sign = i % 3 == 0 ? -1.0f : 1.0f; x_data[i] = i * sign; } for (int i = 0; i < y_dim.production(); i++) { float sign = i % 2 == 0 ? 0.5f : -0.5f; y_data[i] = i * sign; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; param.act_type = act_type; fusion_elementwise_mul_activation.SetParam(param); fusion_elementwise_mul_activation.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "mul", act_type); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } } TEST(elementwise_max_arm, retrive_op) { auto elementwise_max = KernelRegistry::Global().Create( "elementwise_max"); ASSERT_FALSE(elementwise_max.empty()); ASSERT_TRUE(elementwise_max.front()); } TEST(elementwise_max_arm, init) { ElementwiseMaxCompute elementwise_max; ASSERT_EQ(elementwise_max.precision(), PRECISION(kFloat)); ASSERT_EQ(elementwise_max.target(), TARGET(kARM)); } TEST(elementwise_max, compute) { ElementwiseMaxCompute elementwise_max; operators::ElementwiseParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #else for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { x_data[i] = i; } for (int i = 0; i < y_dim.production(); i++) { y_data[i] = i; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; elementwise_max.SetParam(param); elementwise_max.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "max", ""); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } TEST(fusion_elementwise_max_activation_arm, retrive_op) { auto fusion_elementwise_max_activation = KernelRegistry::Global().Create( "fusion_elementwise_max_activation"); ASSERT_FALSE(fusion_elementwise_max_activation.empty()); ASSERT_TRUE(fusion_elementwise_max_activation.front()); } TEST(fusion_elementwise_max_activation_arm, init) { ElementwiseMaxActivationCompute fusion_elementwise_max_activation; ASSERT_EQ(fusion_elementwise_max_activation.precision(), PRECISION(kFloat)); ASSERT_EQ(fusion_elementwise_max_activation.target(), TARGET(kARM)); } TEST(fusion_elementwise_max_activation_arm, compute) { ElementwiseMaxActivationCompute fusion_elementwise_max_activation; operators::FusionElementwiseActivationParam param; lite::Tensor x, y, output, output_ref; #if 1 for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4}) { for (auto c : {1, 3, 4}) { for (auto h : {1, 3, 4}) { for (auto w : {1, 3, 4}) { for (auto axis : {-1, 0, 1, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({h, w}), std::vector({n, c, h}), std::vector({n, c, h, w})}) { #else for (auto act_type : {"relu"}) { for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 4, 11}) { for (auto h : {1, 3, 4, 11}) { for (auto w : {1, 3, 4, 11}) { for (auto axis : {-1, 0, 1, 2, 3}) { for (auto yd : {std::vector({n}), std::vector({c}), std::vector({h}), std::vector({w}), std::vector({n, c}), std::vector({c, h}), std::vector({h, w}), std::vector({n, c, h}), std::vector({c, h, w}), std::vector({n, c, h, w})}) { #endif auto x_dim = DDim(std::vector({n, c, h, w})); auto y_dim = DDim(yd); int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; if (axis_t + y_dim.size() > 4) continue; bool flag = false; for (int i = 0; i < y_dim.size(); i++) { if (x_dim[i + axis_t] != y_dim[i]) flag = true; } if (flag) continue; x.Resize(x_dim); y.Resize(y_dim); output.Resize(x_dim); output_ref.Resize(x_dim); auto* x_data = x.mutable_data(); auto* y_data = y.mutable_data(); auto* output_data = output.mutable_data(); auto* output_ref_data = output_ref.mutable_data(); for (int i = 0; i < x_dim.production(); i++) { float sign = i % 3 == 0 ? -1.0f : 1.0f; x_data[i] = i * sign; } for (int i = 0; i < y_dim.production(); i++) { float sign = i % 2 == 0 ? 0.5f : -0.5f; y_data[i] = i * sign; } param.X = &x; param.Y = &y; param.axis = axis; param.Out = &output; param.act_type = act_type; fusion_elementwise_max_activation.SetParam(param); fusion_elementwise_max_activation.Run(); param.Out = &output_ref; elementwise_compute_ref(param, "max", act_type); for (int i = 0; i < output.dims().production(); i++) { EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); } } } } } } } } } } // namespace arm } // namespace kernels } // namespace lite } // namespace paddle USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(fusion_elementwise_add_activation, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(elementwise_mul, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(fusion_elementwise_mul_activation, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(elementwise_max, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(fusion_elementwise_max_activation, kARM, kFloat, kNCHW, def);