diff --git a/lite/kernels/npu/bridges/elementwise_ops.cc b/lite/kernels/npu/bridges/elementwise_ops.cc index 69b77b5def4faf9722fdbd8bd6f2480e67e2c160..c6ff56de67ccb7c257c08db343be2e4767938900 100644 --- a/lite/kernels/npu/bridges/elementwise_ops.cc +++ b/lite/kernels/npu/bridges/elementwise_ops.cc @@ -21,28 +21,42 @@ namespace lite { namespace subgraph { namespace npu { -std::vector CvtYShape(const DDim& x_dims, - const DDim& y_dims, - int axis) { - CHECK_EQ(x_dims.size(), 4UL) << "[NPU] Only support 4-dimension x"; - CHECK_GE(x_dims.size(), y_dims.size()); +void CvtYShape(std::vector* x_shape, + std::vector* y_shape, + int axis) { + CHECK_GE(x_shape->size(), y_shape->size()); if (axis < 0) { - axis += x_dims.size(); + axis = x_shape->size() - y_shape->size(); } - std::vector y_new_shape(y_dims.Vectorize()); - if (y_new_shape.size() == 4UL) { - return y_new_shape; + // only support: + // (n,c,h,w) * (n,c,h,w) + // (n,c,h,w) * (1,c,1,1) + // (n,c,h,w) * (1,c,h,1) + // (n,c,h,w) * (1,c,h,w) + int y_shape_size = y_shape->size(); + if (y_shape_size == 1) { + y_shape->insert(y_shape->begin(), 1); + y_shape->insert(y_shape->end(), 2, 1); + } else if (y_shape_size == 2) { + y_shape->insert(y_shape->begin(), 1); + y_shape->insert(y_shape->end(), 1); + } else if (y_shape_size == 3) { + y_shape->insert(y_shape->begin(), 1); } - for (int i = 0; i < axis; i++) { - y_new_shape.insert(y_new_shape.begin(), 1); + if (y_shape_size < 4) { + int n = 1; + for (int i = 0; i < axis; i++) { + n *= x_shape->at(i); + } + x_shape->erase(x_shape->begin(), x_shape->begin() + axis); + x_shape->insert(x_shape->begin(), n); + x_shape->insert(x_shape->end(), 4 - x_shape->size(), 1); } - while (y_new_shape.size() < 4) { - y_new_shape.push_back(1); - } - CHECK_EQ(y_new_shape.size(), 4UL); - return y_new_shape; + + CHECK_EQ(x_shape->size(), 4UL); + CHECK_EQ(y_shape->size(), 4UL); } int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { @@ -61,32 +75,58 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(x_type->layout() == DATALAYOUT(kNCHW)); auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); + auto y_name = op_info->Input("Y").front(); auto y_type = kernel->GetInputDeclType("Y"); CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->layout() == DATALAYOUT(kNCHW)); auto y = scope->FindMutableTensor(y_name); auto y_dims = y->dims(); + auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); + auto out = scope->FindMutableTensor(out_name); + auto out_dims = out->dims(); + auto axis = op_info->GetAttr("axis"); + auto x_new_shape = x_dims.Vectorize(); + auto y_new_shape = y_dims.Vectorize(); + CvtYShape(&x_new_shape, &y_new_shape, axis); + // X node std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { x_node = graph->Get(x_name); + if (x_dims.Vectorize() != x_new_shape) { + auto reshaped_x_node = graph->Add(x_name + "/reshape"); + auto reshaped_x_op = reshaped_x_node->data(); + reshaped_x_op->set_input_tensor(*x_node->data()); + reshaped_x_op->set_attr_shape( + ge::AttrValue::LIST_INT(x_new_shape.begin(), x_new_shape.end())); + reshaped_x_op->set_attr_axis(0); + x_node = reshaped_x_node; + } } else { - x_node = graph->Add(x_name, *x); + x_node = graph->Add(x_name, *x, x_new_shape); } // Y node std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { y_node = graph->Get(y_name); + if (y_dims.Vectorize() != y_new_shape) { + auto reshaped_y_node = graph->Add(y_name + "/reshape"); + auto reshaped_y_op = reshaped_y_node->data(); + reshaped_y_op->set_input_tensor(*y_node->data()); + reshaped_y_op->set_attr_shape( + ge::AttrValue::LIST_INT(y_new_shape.begin(), y_new_shape.end())); + reshaped_y_op->set_attr_axis(0); + y_node = reshaped_y_node; + } } else { - auto y_new_shape = CvtYShape(x_dims, y_dims, axis); y_node = graph->Add(y_name, *y, y_new_shape); } @@ -98,17 +138,20 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto elt_op = elt_node->data(); elt_op->set_input_x1(*x_node->data()); elt_op->set_input_x2(*y_node->data()); - } else if (op_type == "elementwise_sub") { + } else if (op_type == "elementwise_sub" || + op_type == "fusion_elementwise_sub_activation") { elt_node = graph->Add(out_name); auto elt_op = elt_node->data(); elt_op->set_input_x1(*x_node->data()); elt_op->set_input_x2(*y_node->data()); - } else if (op_type == "elementwise_mul") { + } else if (op_type == "elementwise_mul" || + op_type == "fusion_elementwise_mul_activation") { elt_node = graph->Add(out_name); auto elt_op = elt_node->data(); elt_op->set_input_x(*x_node->data()); elt_op->set_input_y(*y_node->data()); - } else if (op_type == "elementwise_div") { + } else if (op_type == "elementwise_div" || + op_type == "fusion_elementwise_div_activation") { elt_node = graph->Add(out_name); auto elt_op = elt_node->data(); elt_op->set_input_x1(*x_node->data()); @@ -118,8 +161,22 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { return FAILED; } + if (out_dims.Vectorize() != x_new_shape) { + auto reshaped_elt_node = graph->Add(out_name); + auto reshaped_elt_op = reshaped_elt_node->data(); + reshaped_elt_op->set_input_tensor(*elt_node->data()); + auto out_shape = out_dims.Vectorize(); + reshaped_elt_op->set_attr_shape( + ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); + reshaped_elt_op->set_attr_axis(0); + elt_node = reshaped_elt_node; + } + // Act node - if (op_type == "fusion_elementwise_add_activation") { + if (op_type == "fusion_elementwise_add_activation" || + op_type == "fusion_elementwise_sub_activation" || + op_type == "fusion_elementwise_mul_activation" || + op_type == "fusion_elementwise_div_activation") { auto act_type = op_info->GetAttr("act_type"); auto act_node = graph->Add(out_name); auto act_op = act_node->data(); @@ -128,6 +185,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { // clipped_relu etc. act_op->set_attr_mode(CvtActMode(act_type)); } + return REBUILD_WHEN_SHAPE_CHANGED; } @@ -139,9 +197,6 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { REGISTER_SUBGRAPH_BRIDGE(elementwise_add, kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); -REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, - kNPU, - paddle::lite::subgraph::npu::ElementwiseConverter); REGISTER_SUBGRAPH_BRIDGE(elementwise_sub, kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); @@ -151,3 +206,15 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_mul, REGISTER_SUBGRAPH_BRIDGE(elementwise_div, kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, + kNPU, + paddle::lite::subgraph::npu::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_sub_activation, + kNPU, + paddle::lite::subgraph::npu::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_mul_activation, + kNPU, + paddle::lite::subgraph::npu::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, + kNPU, + paddle::lite::subgraph::npu::ElementwiseConverter); diff --git a/lite/kernels/npu/bridges/elementwise_ops_test.cc b/lite/kernels/npu/bridges/elementwise_ops_test.cc deleted file mode 100644 index 8dd4c851ca89413d3e740bb5bc5d0461938a7f69..0000000000000000000000000000000000000000 --- a/lite/kernels/npu/bridges/elementwise_ops_test.cc +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/elementwise_ops.h" -#include -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -template -void elementwise_add_ref(const std::shared_ptr op) { - Scope* scope = op->scope(); - const OpInfo* op_info = op->op_info(); - auto x = scope->FindTensor("x"); - auto y = scope->FindTensor("y"); - auto out = scope->FindMutableTensor("out_ref"); - out->Resize(x->dims()); - - auto x_data = x->data(); - auto y_data = y->data(); - auto out_data = out->mutable_data(); - - auto x_dims = x->dims(); - auto y_dims = y->dims(); - int axis = op_info->GetAttr("axis"); - - if (axis < 0) { - axis += x_dims.size(); - } - int batch = 1; - int channels = y->numel(); - int num = x->numel() / channels / batch; - // do elementwise add/sub/max... - std::string op_type = op_info->Type(); - if (op_type == "elementwise_add") { - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = *din_ptr + diny_data; - dout_ptr++; - din_ptr++; - } - } - } - } else if (op_type == "elementwise_sub") { - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = *din_ptr - diny_data; - dout_ptr++; - din_ptr++; - } - } - } - } else if (op_type == "elementwise_mul") { - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = *din_ptr * diny_data; - dout_ptr++; - din_ptr++; - } - } - } - } else if (op_type == "elementwise_div") { - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = *din_ptr / diny_data; - dout_ptr++; - din_ptr++; - } - } - } - } else if (op_type == "elementwise_max") { - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = std::max(*din_ptr, diny_data); - dout_ptr++; - din_ptr++; - } - } - } - } else { - LOG(FATAL) << "unsupported Elementwise type: " << op_type; - } -} - -void test_elementwise_add(const std::vector& x_shape, - const std::vector& y_shape, - int axis, - std::string elt_type) { - // prepare input&output variables - Scope scope; - std::string x_var_name = "x"; - std::string y_var_name = "y"; - std::string out_var_name = "out"; - std::string out_ref_var_name = "out_ref"; - auto* x = scope.Var(x_var_name)->GetMutable(); - auto* y = scope.Var(y_var_name)->GetMutable(); - auto* out = scope.Var(out_var_name)->GetMutable(); - auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - x->Resize(x_shape); - y->Resize(y_shape); - - // initialize input&output data - FillTensor(x, 1, 3); - FillTensor(y, 1, 3); - - // initialize op desc - cpp::OpDesc opdesc; - opdesc.SetType("elementwise_" + elt_type); - opdesc.SetInput("X", {x_var_name}); - opdesc.SetInput("Y", {y_var_name}); - opdesc.SetOutput("Out", {out_var_name}); - opdesc.SetAttr("axis", axis); - - // create and convert op to NPU model, then run it on NPU - auto op = CreateOp(opdesc, &scope); - LauchOp(op, {x_var_name}, {out_var_name}); - - // execute reference implementation and save to output tensor - elementwise_add_ref(op); - - // compare results - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2); - } -} - -TEST(NPUBridges, elementwise_add) { - for (auto elt_type : {"add", "sub", "mul", "div"}) { - test_elementwise_add({1, 2, 3, 4}, {2}, 1, elt_type); - test_elementwise_add({1, 2, 3, 4}, {1, 2, 1, 1}, 1, elt_type); - test_elementwise_add({1, 2, 3, 4}, {1, 2, 3, 4}, 3, elt_type); - } -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(elementwise_add); -USE_NPU_BRIDGE(elementwise_add); -USE_LITE_OP(elementwise_sub); -USE_NPU_BRIDGE(elementwise_sub); -USE_LITE_OP(elementwise_mul); -USE_NPU_BRIDGE(elementwise_mul); -USE_LITE_OP(elementwise_div); -USE_NPU_BRIDGE(elementwise_div); diff --git a/lite/kernels/npu/bridges/paddle_use_bridges.h b/lite/kernels/npu/bridges/paddle_use_bridges.h index 3a584115c173d75b139d68220701cc709a8c03d6..3f8effa61928e522e80f7d74b38a8a672235a1f0 100644 --- a/lite/kernels/npu/bridges/paddle_use_bridges.h +++ b/lite/kernels/npu/bridges/paddle_use_bridges.h @@ -30,10 +30,13 @@ USE_SUBGRAPH_BRIDGE(conv2d_transpose, kNPU); USE_SUBGRAPH_BRIDGE(dropout, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU); -USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_mul, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_div, kNPU); +USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU); +USE_SUBGRAPH_BRIDGE(fusion_elementwise_sub_activation, kNPU); +USE_SUBGRAPH_BRIDGE(fusion_elementwise_mul_activation, kNPU); +USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU); USE_SUBGRAPH_BRIDGE(fc, kNPU); USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU); diff --git a/lite/tests/kernels/elementwise_compute_test.cc b/lite/tests/kernels/elementwise_compute_test.cc index 2c62929010f91110bc7afa1d4005496d276a1db6..66b6223dc45c8fa405d67be2f882ab8445644632 100644 --- a/lite/tests/kernels/elementwise_compute_test.cc +++ b/lite/tests/kernels/elementwise_compute_test.cc @@ -16,654 +16,228 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { +#define ELT(MATHOP) \ + for (int n = 0; n < xn; n++) { \ + for (int c = 0; c < xc; c++) { \ + for (int h = 0; h < xh; h++) { \ + for (int w = 0; w < xw; w++) { \ + int x_offset = n * xc * xh * xw + c * xh * xw + h * xw + w; \ + int y_offset = 0; \ + if (yn != 1) y_offset += n * yc * yh * yw; \ + if (yc != 1) y_offset += c * yh * yw; \ + if (yh != 1) y_offset += h * yw; \ + if (yw != 1) y_offset += w; \ + out_data[x_offset] = out_data[x_offset] MATHOP y_data[y_offset]; \ + } \ + } \ + } \ + } + class ElementwiseComputeTester : public arena::TestCase { protected: // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - DDim dims_{{1, 2, 3, 4}}; + std::string x_ = "x"; + std::string y_ = "y"; + std::string out_ = "out"; + // add, sub, mul, div, max + std::string elt_type_ = ""; + DDim x_dims_{{1, 2, 3, 4}}; + DDim y_dims_{{1, 2, 3, 4}}; + int axis_ = 1; + std::string act_type_ = ""; public: ElementwiseComputeTester(const Place& place, const std::string& alias, - int axis) - : TestCase(place, alias), axis_(axis) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] + y_data[i]; - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("elementwise_add"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class ElementwiseSubComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - DDim dims_{{1, 2, 3, 4}}; - - public: - ElementwiseSubComputeTester(const Place& place, - const std::string& alias, - int axis) - : TestCase(place, alias), axis_(axis) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] - y_data[i]; - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("elementwise_sub"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class ElementwiseMulComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - DDim dims_{{1, 2, 3, 4}}; - - public: - ElementwiseMulComputeTester(const Place& place, - const std::string& alias, - int axis) - : TestCase(place, alias), axis_(axis) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] * y_data[i]; - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("elementwise_mul"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class ElementwiseMaxComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - DDim dims_{{1, 2, 3, 4}}; - - public: - ElementwiseMaxComputeTester(const Place& place, - const std::string& alias, - int axis) - : TestCase(place, alias), axis_(axis) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = std::max(x_data[i], y_data[i]); - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("elementwise_max"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class FusionElementwiseAddActivationComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - std::string act_type_; - DDim dims_{{1, 2, 3, 4}}; - - public: - FusionElementwiseAddActivationComputeTester(const Place& place, - const std::string& alias, - int axis, - std::string act_type) - : TestCase(place, alias), axis_(axis), act_type_(act_type) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] + y_data[i]; - if (act_type_ == "relu") { - out_data[i] = out_data[i] > 0 ? out_data[i] : 0; - } else { - LOG(FATAL) << "unsupported Activation type: " << act_type_; - } - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("fusion_elementwise_add_activation"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - op_desc->SetAttr("act_type", act_type_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class FusionElementwiseSubActivationComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - std::string act_type_; - DDim dims_{{1, 2, 3, 4}}; - - public: - FusionElementwiseSubActivationComputeTester(const Place& place, - const std::string& alias, - int axis, - std::string act_type) - : TestCase(place, alias), axis_(axis), act_type_(act_type) {} + std::string elt_type = "add", + std::vector x_shape = {1, 2, 3, 4}, + std::vector y_shape = {1, 2, 3, 4}, + int axis = 1, + std::string act_type = "") + : TestCase(place, alias), + elt_type_(elt_type), + x_dims_(DDim(x_shape)), + y_dims_(DDim(y_shape)), + axis_(axis), + act_type_(act_type) {} void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] - y_data[i]; - if (act_type_ == "relu") { - out_data[i] = out_data[i] > 0 ? out_data[i] : 0; - } else { - LOG(FATAL) << "unsupported Activation type: " << act_type_; + if (axis_ < 0) { + axis_ = x_dims_.size() - y_dims_.size(); + } + auto x_shape = x_dims_.Vectorize(); + while (x_shape.size() < 4) { + x_shape.push_back(1); + } + auto y_shape = y_dims_.Vectorize(); + y_shape.insert(y_shape.begin(), axis_, 1); + while (y_shape.size() < 4) { + y_shape.push_back(1); + } + CHECK_EQ(x_shape.size(), 4); + CHECK_EQ(y_shape.size(), 4); + + auto x = scope->FindTensor(x_); + auto y = scope->FindTensor(y_); + auto x_data = x->data(); + auto y_data = y->data(); + auto out = scope->NewTensor(out_); + out->Resize(x_dims_); + auto out_data = out->mutable_data(); + memcpy(out_data, x_data, sizeof(float) * x_dims_.production()); + + int xn = x_shape[0]; + int xc = x_shape[1]; + int xh = x_shape[2]; + int xw = x_shape[3]; + + int yn = y_shape[0]; + int yc = y_shape[1]; + int yh = y_shape[2]; + int yw = y_shape[3]; + + if (elt_type_ == "add") { + ELT(+); + } else if (elt_type_ == "sub") { + ELT(-); + } else if (elt_type_ == "mul") { + ELT(*); + } else if (elt_type_ == "div") { + ELT(/); + } else if (elt_type_ == "max") { + for (int n = 0; n < xn; n++) { + for (int c = 0; c < xc; c++) { + for (int h = 0; h < xh; h++) { + for (int w = 0; w < xw; w++) { + int x_offset = n * xc * xh * xw + c * xh * xw + h * xw + w; + int y_offset = 0; + if (yn != 1) y_offset += n * yc * yh * yw; + if (yc != 1) y_offset += c * yh * yw; + if (yh != 1) y_offset += h * yw; + if (yw != 1) y_offset += w; + out_data[x_offset] = + std::max(out_data[x_offset], y_data[y_offset]); + } + } + } } + } else { + LOG(FATAL) << "unsupported"; } - } - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("fusion_elementwise_sub_activation"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - op_desc->SetAttr("act_type", act_type_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class FusionElementwiseMulActivationComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - std::string act_type_; - DDim dims_{{1, 2, 3, 4}}; - - public: - FusionElementwiseMulActivationComputeTester(const Place& place, - const std::string& alias, - int axis, - std::string act_type) - : TestCase(place, alias), axis_(axis), act_type_(act_type) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] * y_data[i]; + if (!act_type_.empty()) { if (act_type_ == "relu") { - out_data[i] = out_data[i] > 0 ? out_data[i] : 0; + for (int i = 0; i < x_dims_.production(); i++) { + out_data[i] = std::max(0.f, out_data[i]); + } } else { - LOG(FATAL) << "unsupported Activation type: " << act_type_; + LOG(FATAL) << "unsupported"; } } } void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("fusion_elementwise_mul_activation"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); + std::string op_type = "elementwise_" + elt_type_; + if (!act_type_.empty()) { + op_type = "fusion_" + op_type + "_activation"; + } + op_desc->SetType(op_type); + op_desc->SetInput("X", {x_}); + op_desc->SetInput("Y", {y_}); + op_desc->SetOutput("Out", {out_}); op_desc->SetAttr("axis", axis_); - op_desc->SetAttr("act_type", act_type_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class FusionElementwiseMaxActivationComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - std::string act_type_; - DDim dims_{{1, 2, 3, 4}}; - - public: - FusionElementwiseMaxActivationComputeTester(const Place& place, - const std::string& alias, - int axis, - std::string act_type) - : TestCase(place, alias), axis_(axis), act_type_(act_type) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - const auto* y_data = x->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = std::max(x_data[i], y_data[i]); - if (act_type_ == "relu") { - out_data[i] = out_data[i] > 0 ? out_data[i] : 0; - } else { - LOG(FATAL) << "unsupported Activation type: " << act_type_; - } + if (!act_type_.empty()) { + op_desc->SetAttr("act_type", act_type_); } } - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("fusion_elementwise_max_activation"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - op_desc->SetAttr("act_type", act_type_); - } - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; + std::vector dx(x_dims_.production()); + for (size_t i = 0; i < dx.size(); i++) { + dx[i] = (i % 3) * 1.1f; + dx[i] = dx[i] == 0 ? 1.f : dx[i]; } + SetCommonTensor(x_, x_dims_, dx.data()); - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data.data()); - } -}; - -class ElementwiseDivComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - DDim dims_{{1, 2, 3, 4}}; - - public: - ElementwiseDivComputeTester(const Place& place, - const std::string& alias, - int axis) - : TestCase(place, alias), axis_(axis) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - auto* y = scope->FindTensor(inputy_); - const auto* y_data = y->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] / y_data[i]; + std::vector dy(y_dims_.production()); + for (size_t i = 0; i < dy.size(); i++) { + dy[i] = (i % 5) * 1.1f; + dy[i] = dy[i] == 0 ? 1.f : dy[i]; } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("elementwise_div"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - std::vector data2(dims_.production()); - for (int i = 0; i < dims_.production(); i++) { - data2[i] = (i + 1) * 1.1; - } - - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data2.data()); + SetCommonTensor(y_, y_dims_, dy.data()); } }; -class FusionElementwiseDivActivationComputeTester : public arena::TestCase { - protected: - // common attributes for this op. - std::string inputx_ = "x"; - std::string inputy_ = "y"; - std::string output_ = "out"; - int axis_; - std::string act_type_; - DDim dims_{{1, 2, 3, 4}}; - - public: - FusionElementwiseDivActivationComputeTester(const Place& place, - const std::string& alias, - int axis, - std::string act_type) - : TestCase(place, alias), axis_(axis), act_type_(act_type) {} - - void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); - CHECK(out); - out->Resize(dims_); - auto* out_data = out->mutable_data(); - - auto* x = scope->FindTensor(inputx_); - const auto* x_data = x->data(); - auto* y = scope->FindTensor(inputy_); - const auto* y_data = y->data(); - - for (int i = 0; i < dims_.production(); i++) { - out_data[i] = x_data[i] / y_data[i]; - if (act_type_ == "relu") { - out_data[i] = out_data[i] > 0 ? out_data[i] : 0; - } else { - LOG(FATAL) << "unsupported Activation type: " << act_type_; - } - } - } - - void PrepareOpDesc(cpp::OpDesc* op_desc) { - op_desc->SetType("fusion_elementwise_div_activation"); - op_desc->SetInput("X", {inputx_}); - op_desc->SetInput("Y", {inputy_}); - op_desc->SetOutput("Out", {output_}); - op_desc->SetAttr("axis", axis_); - op_desc->SetAttr("act_type", act_type_); - } - - void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - std::vector data2(dims_.production()); - for (int i = 0; i < dims_.production(); i++) { - data2[i] = (i + 1) * 1.1; - } - SetCommonTensor(inputx_, dims_, data.data()); - SetCommonTensor(inputy_, dims_, data2.data()); - } -}; - -void test_elementwise(Place place) { - for (int axis : {-1, 0, 1, 3}) { - std::unique_ptr tester( - new ElementwiseComputeTester(place, "def", axis)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); - - std::unique_ptr tester_sub( - new ElementwiseSubComputeTester(place, "def", axis)); - arena::Arena arena_sub(std::move(tester_sub), place, 2e-5); - arena_sub.TestPrecision(); - - std::unique_ptr tester_mul( - new ElementwiseMulComputeTester(place, "def", axis)); - arena::Arena arena_mul(std::move(tester_mul), place, 2e-5); - arena_mul.TestPrecision(); - - std::unique_ptr tester_max( - new ElementwiseMaxComputeTester(place, "def", axis)); - arena::Arena arena_max(std::move(tester_max), place, 2e-5); - arena_max.TestPrecision(); - - std::unique_ptr tester_div( - new ElementwiseDivComputeTester(place, "def", axis)); - arena::Arena arena_div(std::move(tester_div), place, 2e-5); - arena_div.TestPrecision(); - } +// add sub mul div max +act + +void TestElt(Place place, + float abs_error, + std::string elt_type, + std::vector x_shape, + std::vector y_shape, + int axis, + std::string act_type = "") { + std::unique_ptr tester(new ElementwiseComputeTester( + place, "def", elt_type, x_shape, y_shape, axis, act_type)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); } -TEST(Elementwise, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_elementwise(place); -#endif +void TestEltDims(Place place, float abs_error) { + TestElt(place, abs_error, "add", {2, 3, 4, 5}, {2, 3, 4, 5}, 0); + TestElt(place, abs_error, "add", {2, 3, 4}, {2, 3, 4}, 0); + TestElt(place, abs_error, "add", {2, 3, 4}, {2, 3}, 0); + TestElt(place, abs_error, "add", {2, 3}, {2}, 0); + TestElt(place, abs_error, "add", {2, 3, 4, 5}, {3, 4}, 1); + TestElt(place, abs_error, "add", {2, 3, 4}, {3, 4}, 1); + TestElt(place, abs_error, "add", {2, 3}, {3}, 1); + TestElt(place, abs_error, "add", {2, 3, 4, 5}, {4, 5}, 2); + TestElt(place, abs_error, "add", {2, 3, 4}, {4}, 2); + TestElt(place, abs_error, "add", {2, 3, 4, 5}, {5}, 3); + TestElt(place, abs_error, "add", {2, 3, 4, 5}, {3, 4, 5}, -1); + TestElt(place, abs_error, "add", {2, 3, 4}, {3, 4}, -1); } -void test_fusion_elementwise(Place place) { - for (int axis : {-1, 0, 1, 3}) { - std::unique_ptr tester_add_act( - new FusionElementwiseAddActivationComputeTester( - place, "def", axis, "relu")); - arena::Arena arena_add_act(std::move(tester_add_act), place, 2e-5); - arena_add_act.TestPrecision(); - - std::unique_ptr tester_sub_act( - new FusionElementwiseSubActivationComputeTester( - place, "def", axis, "relu")); - arena::Arena arena_sub_act(std::move(tester_sub_act), place, 2e-5); - arena_sub_act.TestPrecision(); - - std::unique_ptr tester_mul_act( - new FusionElementwiseMulActivationComputeTester( - place, "def", axis, "relu")); - arena::Arena arena_mul_act(std::move(tester_mul_act), place, 2e-5); - arena_mul_act.TestPrecision(); - - std::unique_ptr tester_max_act( - new FusionElementwiseMaxActivationComputeTester( - place, "def", axis, "relu")); - arena::Arena arena_max_act(std::move(tester_max_act), place, 2e-5); - arena_max_act.TestPrecision(); +void TestEltTypes(Place place, float abs_error) { + for (auto elt_type : + std::vector{"add", "sub", "mul", "div", "max"}) { + TestElt(place, abs_error, elt_type, {2, 3, 4, 5}, {2, 3, 4, 5}, 0); + TestElt(place, abs_error, elt_type, {2, 3, 4, 5}, {3}, 1); + } +} - std::unique_ptr tester_div_act( - new FusionElementwiseDivActivationComputeTester( - place, "def", axis, "relu")); - arena::Arena arena_div_act(std::move(tester_div_act), place, 2e-5); - arena_div_act.TestPrecision(); +void TestEltFuseAct(Place place, float abs_error) { + for (auto elt_type : + std::vector{"add", "sub", "mul", "div", "max"}) { + TestElt(place, abs_error, elt_type, {2, 3, 4, 5}, {2, 3, 4, 5}, 0, "relu"); + TestElt(place, abs_error, elt_type, {2, 3, 4, 5}, {3}, 1, "relu"); } } -TEST(FusionElementwise, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_fusion_elementwise(place); +TEST(Elementwise, precision) { + Place place; + float abs_error = 2e-5; + +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#elif defined(LITE_WITH_XPU) + place = TARGET(kXPU); +#else + return; #endif -} -#ifdef LITE_WITH_XPU -TEST(Elementwise_XPU, precision) { - Place place(TARGET(kXPU)); - for (int axis : {-1, 1}) { - std::unique_ptr tester( - new ElementwiseComputeTester(place, "def", axis)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); - } + // TestEltDims(place, abs_error); + TestEltTypes(place, abs_error); + TestEltFuseAct(place, abs_error); } -#endif } // namespace lite } // namespace paddle