// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/operators/elementwise_ops.h" #include #include #include "lite/core/op_registry.h" #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/npu/bridges/test_helper.h" namespace paddle { namespace lite { namespace kernels { namespace npu { namespace bridges { template void elementwise_add_ref(const std::shared_ptr op) { Scope* scope = op->scope(); const OpInfo* op_info = op->op_info(); auto x = scope->FindTensor("x"); auto y = scope->FindTensor("y"); auto out = scope->FindMutableTensor("out_ref"); out->Resize(x->dims()); auto x_data = x->data(); auto y_data = y->data(); auto out_data = out->mutable_data(); auto x_dims = x->dims(); auto y_dims = y->dims(); int axis = op_info->GetAttr("axis"); if (axis < 0) { axis += x_dims.size(); } int batch = x_dims[0] / y_dims[0]; int channels = y->numel(); int num = x->numel() / channels / batch; // do elementwise add/sub/max... std::string op_type = op_info->Type(); if (op_type == "elementwise_add") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr + diny_data; dout_ptr++; din_ptr++; } } } } else if (op_type == "elementwise_sub") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr - diny_data; dout_ptr++; din_ptr++; } } } } else if (op_type == "elementwise_mul") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr * diny_data; dout_ptr++; din_ptr++; } } } } else if (op_type == "elementwise_div") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = *din_ptr / diny_data; dout_ptr++; din_ptr++; } } } } else if (op_type == "elementwise_max") { for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { int offset = (i * channels + j) * num; const dtype* din_ptr = x_data + offset; const dtype diny_data = y_data[j]; dtype* dout_ptr = out_data + offset; for (int k = 0; k < num; ++k) { *dout_ptr = std::max(*din_ptr, diny_data); dout_ptr++; din_ptr++; } } } } else { LOG(FATAL) << "unsupported Elementwise type: " << op_type; } } void test_elementwise_add(const std::vector& x_shape, const std::vector& y_shape, int axis, std::string elt_type) { // prepare input&output variables Scope scope; std::string x_var_name = "x"; std::string y_var_name = "y"; std::string out_var_name = "out"; std::string out_ref_var_name = "out_ref"; auto* x = scope.Var(x_var_name)->GetMutable(); auto* y = scope.Var(y_var_name)->GetMutable(); auto* out = scope.Var(out_var_name)->GetMutable(); auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); x->Resize(x_shape); y->Resize(y_shape); // initialize input&output data FillTensor(x, 1, 5); FillTensor(y, 1, 5); // initialize op desc cpp::OpDesc opdesc; opdesc.SetType("elementwise_" + elt_type); opdesc.SetInput("X", {x_var_name}); opdesc.SetInput("Y", {y_var_name}); opdesc.SetOutput("Out", {out_var_name}); opdesc.SetAttr("axis", axis); // create and convert op to NPU model, then run it on NPU auto op = CreateOp(opdesc, &scope); LauchOp(op, {x_var_name}, {out_var_name}); // execute reference implementation and save to output tensor elementwise_add_ref(op); // compare results auto* out_data = out->mutable_data(); auto* out_ref_data = out_ref->mutable_data(); for (int i = 0; i < out->dims().production(); i++) { EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2); } } TEST(NPUBridges, elementwise_add) { for (auto elt_type : {"add", "sub", "mul", "div"}) { test_elementwise_add({1, 2, 3, 4}, {1, 2, 1, 1}, 1, elt_type); test_elementwise_add({1, 2, 3, 4}, {1, 2, 3, 4}, 3, elt_type); } } } // namespace bridges } // namespace npu } // namespace kernels } // namespace lite } // namespace paddle USE_LITE_OP(elementwise_add); USE_NPU_BRIDGE(elementwise_add); USE_LITE_OP(elementwise_sub); USE_NPU_BRIDGE(elementwise_sub); USE_LITE_OP(elementwise_mul); USE_NPU_BRIDGE(elementwise_mul); USE_LITE_OP(elementwise_div); USE_NPU_BRIDGE(elementwise_div);