diff --git a/lite/kernels/npu/bridges/CMakeLists.txt b/lite/kernels/npu/bridges/CMakeLists.txt index 63bdb4f57bb5d196daedc6d4c57737fdcb2ad3a1..1dd5ff60ccddaf1f2f35ae59d84f432a564c9443 100644 --- a/lite/kernels/npu/bridges/CMakeLists.txt +++ b/lite/kernels/npu/bridges/CMakeLists.txt @@ -20,6 +20,7 @@ set(npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_np lite_cc_library(subgraph_bridge_fc_op_npu SRCS fc_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_conv_op_npu SRCS conv_op.cc DEPS ${npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_matmul_op_npu SRCS matmul_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_mul_op_npu SRCS mul_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_act_op_npu SRCS act_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_scale_op_npu SRCS scale_op.cc DEPS ${npu_subgraph_bridge_deps}) @@ -50,6 +51,7 @@ set(npu_subgraph_bridges subgraph_bridge_graph_npu subgraph_bridge_fc_op_npu subgraph_bridge_conv_op_npu + subgraph_bridge_matmul_op_npu subgraph_bridge_mul_op_npu subgraph_bridge_act_op_npu subgraph_bridge_scale_op_npu diff --git a/lite/kernels/npu/bridges/matmul_op.cc b/lite/kernels/npu/bridges/matmul_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0e3d5ab2d7f2ada896896abcb2505f14c1d4dc28 --- /dev/null +++ b/lite/kernels/npu/bridges/matmul_op.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/npu/bridges/graph.h" +#include "lite/kernels/npu/bridges/registry.h" +#include "lite/kernels/npu/bridges/utility.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace npu { + +int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[NPU] Converting " + op_type + "..."; + + // Get input and output vars and op attributes + auto x_name = op_info->Input("X").front(); + auto x_type = kernel->GetInputDeclType("X"); + CHECK(x_type->precision() == PRECISION(kFloat)); + CHECK(x_type->layout() == DATALAYOUT(kNCHW)); + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + + auto y_name = op_info->Input("Y").front(); + auto y_type = kernel->GetInputDeclType("Y"); + CHECK(y_type->precision() == PRECISION(kFloat)); + CHECK(y_type->layout() == DATALAYOUT(kNCHW)); + auto y = scope->FindMutableTensor(y_name); + auto y_dims = y->dims(); + + if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) { + LOG(WARNING) + << "[NPU] dims size of x and y must be same and greater than 1."; + return FAILED; + } + if (x_dims.size() > 2 && + x_dims.count(0, x_dims.size() - 2) != + y_dims.count(0, y_dims.size() - 2)) { + LOG(WARNING) << "[NPU] batched matmul only support the same batch size"; + return FAILED; + } + + auto out_name = op_info->Output("Out").front(); + auto out_type = kernel->GetOutputDeclType("Out"); + CHECK(out_type->precision() == PRECISION(kFloat)); + CHECK(out_type->layout() == DATALAYOUT(kNCHW)); + auto out = scope->FindMutableTensor(out_name); + auto out_dims = out->dims(); + + bool transpose_x = op_info->GetAttr("transpose_X"); + if (x_dims.size() > 2 && transpose_x) { + LOG(WARNING) << "[NPU] not support transpose_x == true if x_dims size " + "greater than 2."; + return FAILED; + } + bool transpose_y = op_info->GetAttr("transpose_Y"); + float alpha = op_info->GetAttr("alpha"); + + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + // Y node which only supports 2-D persistable tensor + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); + } else { + y_node = graph->Add(y_name, *y); + } + + // Matmul node + std::shared_ptr matmul_node = nullptr; + if (x_dims.size() == 2) { + matmul_node = graph->Add(out_name); + auto matmul_op = matmul_node->data(); + matmul_op->set_input_x1(*x_node->data()); + matmul_op->set_input_x2(*y_node->data()); + matmul_op->set_attr_transpose_x1(transpose_x); + matmul_op->set_attr_transpose_x2(transpose_y); + } else { + matmul_node = graph->Add(out_name); + auto matmul_op = matmul_node->data(); + matmul_op->set_input_x(*x_node->data()); + matmul_op->set_input_y(*y_node->data()); + matmul_op->set_attr_adj_x(transpose_x); + matmul_op->set_attr_adj_y(transpose_y); + } + + if (fabs(alpha - 1.f) > 1e-6f) { + auto scaled_out_node = graph->Add(out_name); + auto scaled_out_op = scaled_out_node->data(); + scaled_out_op->set_input_x(*matmul_node->data()); + scaled_out_op->set_attr_axis(1); + std::vector scale_bias_shape(4, 1); + if (out_dims.size() < 4) { + scale_bias_shape[1] = out_dims[0]; + } else if (out_dims.size() == 4) { + scale_bias_shape[1] = out_dims[1]; + } else { + LOG(WARNING) << "[NPU] not support out dims size greater than 4."; + return FAILED; + } + auto filter_node = + graph->Add(out_name + "/filter", alpha, scale_bias_shape); + scaled_out_op->set_input_filter(*filter_node->data()); + } + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(matmul, + kNPU, + paddle::lite::subgraph::npu::MatMulConverter); diff --git a/lite/kernels/npu/bridges/paddle_use_bridges.h b/lite/kernels/npu/bridges/paddle_use_bridges.h index 3f8effa61928e522e80f7d74b38a8a672235a1f0..30d7b79c7e03dfb8176c3bdd098f35eef56a9afd 100644 --- a/lite/kernels/npu/bridges/paddle_use_bridges.h +++ b/lite/kernels/npu/bridges/paddle_use_bridges.h @@ -41,6 +41,7 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU); USE_SUBGRAPH_BRIDGE(fc, kNPU); USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU); USE_SUBGRAPH_BRIDGE(nearest_interp, kNPU); +USE_SUBGRAPH_BRIDGE(matmul, kNPU); USE_SUBGRAPH_BRIDGE(mul, kNPU); USE_SUBGRAPH_BRIDGE(pad2d, kNPU); USE_SUBGRAPH_BRIDGE(pool2d, kNPU); diff --git a/lite/kernels/npu/bridges/utility.h b/lite/kernels/npu/bridges/utility.h index 556b5b9fee969d89436620fd8499659d7481f132..6d7dc5891fa6821f926b232633dc40f26efb7a2e 100644 --- a/lite/kernels/npu/bridges/utility.h +++ b/lite/kernels/npu/bridges/utility.h @@ -94,6 +94,30 @@ REG_OP(Pad) .ATTR(epsilon, AttrValue::FLOAT{1e-7f}) .OP_END() + /* + * Multiplies slices of two tensors in batches. + * + * x : The input tensor + * y : The input tensor + * + * z : The output tensor + * + * adj_x : adj_x is true, the input tensor x is transposed, otherwise + * it will not be transposed. Default is false (The current version only + * supports false). + * adj_y : adj_y is true, the input tensor y is transposed, otherwise + * it will not be transposed. Default is false. + * + * 100.320.010.010 + */ + REG_OP(BatchMatMul) + .INPUT(x, TensorType({DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT})) + .OUTPUT(z, TensorType({DT_FLOAT})) + .ATTR(adj_x, AttrValue::BOOL{false}) + .ATTR(adj_y, AttrValue::BOOL{false}) + .OP_END() + } // namespace ge namespace paddle { diff --git a/lite/tests/kernels/matmul_compute_test.cc b/lite/tests/kernels/matmul_compute_test.cc index 5d19e7fe3c023cdeea9f395a84f3ed53454c8c28..59b0fde8fd18b8a2170b6fdbd42444f09843f077 100644 --- a/lite/tests/kernels/matmul_compute_test.cc +++ b/lite/tests/kernels/matmul_compute_test.cc @@ -16,6 +16,7 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { @@ -120,27 +121,27 @@ class MatMulComputeTester : public arena::TestCase { // common attributes for this op. std::string x_ = "X"; std::string y_ = "Y"; - bool x_transpose_; - bool y_transpose_; - float alpha_; std::string out_ = "Out"; DDim x_dims_; DDim y_dims_; + bool x_transpose_; + bool y_transpose_; + float alpha_; public: MatMulComputeTester(const Place& place, const std::string& alias, - bool x_transpose, - bool y_transpose, - float alpha, const DDim& x_dims, - const DDim& y_dims) + const DDim& y_dims, + bool x_transpose = false, + bool y_transpose = false, + float alpha = 1.f) : TestCase(place, alias), + x_dims_(x_dims), + y_dims_(y_dims), x_transpose_(x_transpose), y_transpose_(y_transpose), - alpha_(alpha), - x_dims_(x_dims), - y_dims_(y_dims) {} + alpha_(alpha) {} void RunBaseline(Scope* scope) override { auto* x = scope->FindTensor(x_); @@ -295,215 +296,166 @@ class MatMulComputeTester : public arena::TestCase { } void PrepareData() override { - std::vector x_data(x_dims_.production()); - std::vector y_data(y_dims_.production()); + std::vector x(x_dims_.production()); + fill_data_rand(x.data(), -1.f, 1.f, x_dims_.production()); + SetCommonTensor(x_, x_dims_, x.data()); - for (int i = 0; i < x_dims_.production(); ++i) { - x_data[i] = 1; // i * 1.1; - } - for (int i = 0; i < y_dims_.production(); ++i) { - y_data[i] = 1; // i * 0.9; - } - - SetCommonTensor(x_, x_dims_, x_data.data()); - SetCommonTensor(y_, y_dims_, y_data.data()); + std::vector y(y_dims_.production()); + fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production()); + SetCommonTensor(y_, y_dims_, y.data(), {}, true); } }; -void test_matmul2x2_no_transform(Place place) { - for (int m : {1, 2, 4, 8}) { - for (int k : {1, 3, 5}) { - for (int n : {1, 2, 4, 6}) { +void test_matmul_helper(Place place, + float abs_error, + std::vector x_dims, + std::vector y_dims, + bool x_transpose, + bool y_transpose, + float alpha) { + std::unique_ptr tester(new MatMulComputeTester(place, + "def", + DDim(x_dims), + DDim(y_dims), + x_transpose, + y_transpose, + alpha)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); +} + +void test_matmul2x2(Place place, float abs_error) { + for (int64_t m : {1, 2, 8}) { + for (int64_t k : {1, 3, 5}) { + for (int64_t n : {1, 4, 6}) { for (float alpha : {1., 2.}) { - bool x_transform = false; - bool y_transform = false; - std::unique_ptr tester( - new MatMulComputeTester(place, - "def", - x_transform, - y_transform, - alpha, - DDim({m, k}), - DDim({k, n}))); - arena::Arena arena(std::move(tester), place, 5e-4); - arena.TestPrecision(); + test_matmul_helper( + place, abs_error, {m, k}, {k, n}, false, false, alpha); } } } } } -void test_matmul2x2_x_transpose(Place place) { - std::vector x_dims({DDim({3, 4}), DDim({2, 5})}); - std::vector y_dims({DDim({3, 2}), DDim({2, 1})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, false, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmul2x2_xtranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper(place, abs_error, {3, 4}, {3, 2}, true, false, alpha); + test_matmul_helper(place, abs_error, {2, 5}, {2, 1}, true, false, alpha); } } -void test_matmul2x2_y_transpose(Place place) { - std::vector x_dims({DDim({5, 2}), DDim({2, 5})}); - std::vector y_dims({DDim({3, 2}), DDim({1, 5})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", false, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmul2x2_ytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper(place, abs_error, {5, 2}, {3, 2}, false, true, alpha); + test_matmul_helper(place, abs_error, {2, 5}, {1, 5}, false, true, alpha); } } -void test_matmul2x2_transpose(Place place) { - std::vector x_dims({DDim({6, 2}), DDim({5, 3})}); - std::vector y_dims({DDim({3, 6}), DDim({1, 5})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); +void test_matmul2x2_xytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper(place, abs_error, {6, 2}, {3, 6}, true, true, alpha); + test_matmul_helper(place, abs_error, {5, 3}, {1, 5}, true, true, alpha); } } -void test_matmul1x1_no_transpose(Place place) { - DDim x_dim({3}); - DDim y_dim({3}); - float alpha = 1.5f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); -} - -void test_matmul1x1_transpose(Place place) { - DDim x_dim({3}); - DDim y_dim({5}); - float alpha = 1.5f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", true, true, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmul1x1(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper(place, abs_error, {3}, {3}, false, false, alpha); + } } -void test_matmul_nx1(Place place) { - DDim x_dim({3, 4, 2, 5}); - DDim y_dim({5}); - float alpha = 1.5f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmul1x1_xytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper(place, abs_error, {3}, {5}, true, true, alpha); + } } -void test_matmul_nx2_1(Place place) { - DDim x_dim({1, 2, 2, 3}); - DDim y_dim({3, 1}); - float alpha = 1.f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmulnx1(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 2, 5}, {5}, false, false, alpha); + } } -void test_matmul_nx2_2(Place place) { - DDim x_dim({1, 2, 2, 3}); - DDim y_dim({3, 3}); - float alpha = 1.5f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 2e-5); - arena.TestPrecision(); +void test_matmulnx2(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {1, 2, 2, 3}, {3, 1}, false, false, alpha); + test_matmul_helper( + place, abs_error, {1, 2, 2, 3}, {3, 4}, false, false, alpha); + } } -void test_matmulnx2_x_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); - std::vector y_dims({DDim({6, 2}), DDim({5, 1})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, false, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 2e-4); - arena.TestPrecision(); +void test_matmulnx2_xtranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 6, 2}, {6, 2}, true, false, alpha); + test_matmul_helper( + place, abs_error, {5, 3, 5, 2}, {5, 1}, true, false, alpha); } } -void test_matmulnx2_y_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); - std::vector y_dims({DDim({6, 2}), DDim({1, 2})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", false, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); +void test_matmulnx2_ytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 6, 2}, {5, 2}, false, true, alpha); + test_matmul_helper( + place, abs_error, {5, 3, 5, 2}, {1, 2}, false, true, alpha); } } -void test_matmulnx2_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 4, 3}), DDim({5, 3, 3, 2})}); - std::vector y_dims({DDim({2, 4}), DDim({1, 3})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 5e-5); - arena.TestPrecision(); +void test_matmulnx2_xytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 4, 3}, {2, 4}, true, true, alpha); + test_matmul_helper( + place, abs_error, {5, 3, 3, 2}, {1, 3}, true, true, alpha); } } -void test_matmul_nxn(Place place) { - DDim x_dim({3, 4, 2, 5}); - DDim y_dim({3, 4, 5, 2}); - float alpha = 1.5f; - std::unique_ptr tester( - new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); - arena::Arena arena(std::move(tester), place, 1e-3); - arena.TestPrecision(); +void test_matmulnxn(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 6, 2}, {3, 4, 2, 5}, false, false, alpha); + test_matmul_helper( + place, abs_error, {5, 3, 4}, {5, 4, 6}, false, false, alpha); + } } -void test_matmulnxn_x_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); - std::vector y_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 1})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, false, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 1e-3); - arena.TestPrecision(); +void test_matmulnxn_xtranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 2, 6}, {3, 4, 2, 5}, true, false, alpha); + test_matmul_helper( + place, abs_error, {5, 4, 2}, {5, 4, 6}, true, false, alpha); } } -void test_matmulnxn_y_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); - std::vector y_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 1, 2})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", false, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 1e-3); - arena.TestPrecision(); +void test_matmulnxn_ytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 6, 2}, {3, 4, 5, 2}, false, true, alpha); + test_matmul_helper( + place, abs_error, {5, 3, 4}, {5, 6, 4}, false, true, alpha); } } -void test_matmulnxn_transpose(Place place) { - std::vector x_dims({DDim({3, 4, 4, 3}), DDim({5, 3, 3, 2})}); - std::vector y_dims({DDim({3, 4, 2, 4}), DDim({5, 3, 1, 3})}); - std::vector alphas({1.f, 2.f}); - for (int i = 0; i < x_dims.size(); ++i) { - std::unique_ptr tester(new MatMulComputeTester( - place, "def", true, true, alphas[i], x_dims[i], y_dims[i])); - arena::Arena arena(std::move(tester), place, 1e-3); - arena.TestPrecision(); +void test_matmulnxn_xytranspose(Place place, float abs_error) { + for (float alpha : {1.f, 2.f}) { + test_matmul_helper( + place, abs_error, {3, 4, 2, 6}, {3, 4, 5, 2}, true, true, alpha); + test_matmul_helper( + place, abs_error, {5, 4, 3}, {5, 6, 4}, true, true, alpha); } } TEST(Matmul2x2, precision) { Place place; -#if defined(LITE_WITH_ARM) + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) place = TARGET(kXPU); @@ -511,22 +463,31 @@ TEST(Matmul2x2, precision) { return; #endif - test_matmul2x2_no_transform(place); + test_matmul2x2(place, abs_error); } TEST(Matmul2x2_x_transpose, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul2x2_x_transpose(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmul2x2_xtranspose(place, abs_error); } TEST(Matmul2x2_y_transpose, precision) { Place place; -#if defined(LITE_WITH_ARM) + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) place = TARGET(kXPU); @@ -534,65 +495,80 @@ TEST(Matmul2x2_y_transpose, precision) { return; #endif - test_matmul2x2_y_transpose(place); + test_matmul2x2_ytranspose(place, abs_error); } TEST(Matmul2x2_transpose, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul2x2_transpose(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmul2x2_xytranspose(place, abs_error); } TEST(Matmul1x1, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul1x1_transpose(place); - test_matmul1x1_no_transpose(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmul1x1(place, abs_error); + test_matmul1x1_xytranspose(place, abs_error); } TEST(Matmulnx1, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul_nx1(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmulnx1(place, abs_error); } TEST(Matmulnx2, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul_nx2_1(place); - test_matmul_nx2_2(place); - test_matmulnx2_x_transpose(place); - test_matmulnx2_y_transpose(place); - test_matmulnx2_transpose(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmulnx2(place, abs_error); + test_matmulnx2_xtranspose(place, abs_error); + test_matmulnx2_ytranspose(place, abs_error); + test_matmulnx2_xytranspose(place, abs_error); } TEST(Matmulnxn, precision) { -#ifdef LITE_WITH_X86 - Place place(TARGET(kX86)); -#endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_matmul_nxn(place); - test_matmulnxn_x_transpose(place); - test_matmulnxn_y_transpose(place); - test_matmulnxn_transpose(place); + Place place; + float abs_error = 2e-5; +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; #endif + + test_matmulnxn(place, abs_error); + test_matmulnxn_xtranspose(place, abs_error); + test_matmulnxn_ytranspose(place, abs_error); + test_matmulnxn_xytranspose(place, abs_error); } } // namespace lite