未验证 提交 11716a8b 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] matmul op bridge and ut (#2790)

上级 109b73c8
...@@ -20,6 +20,7 @@ set(npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_np ...@@ -20,6 +20,7 @@ set(npu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_np
lite_cc_library(subgraph_bridge_fc_op_npu SRCS fc_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_fc_op_npu SRCS fc_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_conv_op_npu SRCS conv_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_conv_op_npu SRCS conv_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_matmul_op_npu SRCS matmul_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_mul_op_npu SRCS mul_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_mul_op_npu SRCS mul_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_act_op_npu SRCS act_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_act_op_npu SRCS act_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_scale_op_npu SRCS scale_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_scale_op_npu SRCS scale_op.cc DEPS ${npu_subgraph_bridge_deps})
...@@ -50,6 +51,7 @@ set(npu_subgraph_bridges ...@@ -50,6 +51,7 @@ set(npu_subgraph_bridges
subgraph_bridge_graph_npu subgraph_bridge_graph_npu
subgraph_bridge_fc_op_npu subgraph_bridge_fc_op_npu
subgraph_bridge_conv_op_npu subgraph_bridge_conv_op_npu
subgraph_bridge_matmul_op_npu
subgraph_bridge_mul_op_npu subgraph_bridge_mul_op_npu
subgraph_bridge_act_op_npu subgraph_bridge_act_op_npu
subgraph_bridge_scale_op_npu subgraph_bridge_scale_op_npu
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front();
auto y_type = kernel->GetInputDeclType("Y");
CHECK(y_type->precision() == PRECISION(kFloat));
CHECK(y_type->layout() == DATALAYOUT(kNCHW));
auto y = scope->FindMutableTensor(y_name);
auto y_dims = y->dims();
if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) {
LOG(WARNING)
<< "[NPU] dims size of x and y must be same and greater than 1.";
return FAILED;
}
if (x_dims.size() > 2 &&
x_dims.count(0, x_dims.size() - 2) !=
y_dims.count(0, y_dims.size() - 2)) {
LOG(WARNING) << "[NPU] batched matmul only support the same batch size";
return FAILED;
}
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto out = scope->FindMutableTensor(out_name);
auto out_dims = out->dims();
bool transpose_x = op_info->GetAttr<bool>("transpose_X");
if (x_dims.size() > 2 && transpose_x) {
LOG(WARNING) << "[NPU] not support transpose_x == true if x_dims size "
"greater than 2.";
return FAILED;
}
bool transpose_y = op_info->GetAttr<bool>("transpose_Y");
float alpha = op_info->GetAttr<float>("alpha");
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Y node which only supports 2-D persistable tensor
std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) {
y_node = graph->Get(y_name);
} else {
y_node = graph->Add(y_name, *y);
}
// Matmul node
std::shared_ptr<Node> matmul_node = nullptr;
if (x_dims.size() == 2) {
matmul_node = graph->Add<ge::op::MatMul>(out_name);
auto matmul_op = matmul_node->data<ge::op::MatMul>();
matmul_op->set_input_x1(*x_node->data());
matmul_op->set_input_x2(*y_node->data());
matmul_op->set_attr_transpose_x1(transpose_x);
matmul_op->set_attr_transpose_x2(transpose_y);
} else {
matmul_node = graph->Add<ge::op::BatchMatMul>(out_name);
auto matmul_op = matmul_node->data<ge::op::BatchMatMul>();
matmul_op->set_input_x(*x_node->data());
matmul_op->set_input_y(*y_node->data());
matmul_op->set_attr_adj_x(transpose_x);
matmul_op->set_attr_adj_y(transpose_y);
}
if (fabs(alpha - 1.f) > 1e-6f) {
auto scaled_out_node = graph->Add<ge::op::Scale>(out_name);
auto scaled_out_op = scaled_out_node->data<ge::op::Scale>();
scaled_out_op->set_input_x(*matmul_node->data());
scaled_out_op->set_attr_axis(1);
std::vector<int64_t> scale_bias_shape(4, 1);
if (out_dims.size() < 4) {
scale_bias_shape[1] = out_dims[0];
} else if (out_dims.size() == 4) {
scale_bias_shape[1] = out_dims[1];
} else {
LOG(WARNING) << "[NPU] not support out dims size greater than 4.";
return FAILED;
}
auto filter_node =
graph->Add(out_name + "/filter", alpha, scale_bias_shape);
scaled_out_op->set_input_filter(*filter_node->data());
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(matmul,
kNPU,
paddle::lite::subgraph::npu::MatMulConverter);
...@@ -41,6 +41,7 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU); ...@@ -41,6 +41,7 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU);
USE_SUBGRAPH_BRIDGE(fc, kNPU); USE_SUBGRAPH_BRIDGE(fc, kNPU);
USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU); USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU);
USE_SUBGRAPH_BRIDGE(nearest_interp, kNPU); USE_SUBGRAPH_BRIDGE(nearest_interp, kNPU);
USE_SUBGRAPH_BRIDGE(matmul, kNPU);
USE_SUBGRAPH_BRIDGE(mul, kNPU); USE_SUBGRAPH_BRIDGE(mul, kNPU);
USE_SUBGRAPH_BRIDGE(pad2d, kNPU); USE_SUBGRAPH_BRIDGE(pad2d, kNPU);
USE_SUBGRAPH_BRIDGE(pool2d, kNPU); USE_SUBGRAPH_BRIDGE(pool2d, kNPU);
......
...@@ -94,6 +94,30 @@ REG_OP(Pad) ...@@ -94,6 +94,30 @@ REG_OP(Pad)
.ATTR(epsilon, AttrValue::FLOAT{1e-7f}) .ATTR(epsilon, AttrValue::FLOAT{1e-7f})
.OP_END() .OP_END()
/*
* Multiplies slices of two tensors in batches.
* <Input>
* x : The input tensor
* y : The input tensor
* <Output>
* z : The output tensor
* <Attr>
* adj_x : adj_x is true, the input tensor x is transposed, otherwise
* it will not be transposed. Default is false (The current version only
* supports false).
* adj_y : adj_y is true, the input tensor y is transposed, otherwise
* it will not be transposed. Default is false.
* <Added in HiAI version>
* 100.320.010.010
*/
REG_OP(BatchMatMul)
.INPUT(x, TensorType({DT_FLOAT}))
.INPUT(y, TensorType({DT_FLOAT}))
.OUTPUT(z, TensorType({DT_FLOAT}))
.ATTR(adj_x, AttrValue::BOOL{false})
.ATTR(adj_y, AttrValue::BOOL{false})
.OP_END()
} // namespace ge } // namespace ge
namespace paddle { namespace paddle {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h" #include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -120,27 +121,27 @@ class MatMulComputeTester : public arena::TestCase { ...@@ -120,27 +121,27 @@ class MatMulComputeTester : public arena::TestCase {
// common attributes for this op. // common attributes for this op.
std::string x_ = "X"; std::string x_ = "X";
std::string y_ = "Y"; std::string y_ = "Y";
bool x_transpose_;
bool y_transpose_;
float alpha_;
std::string out_ = "Out"; std::string out_ = "Out";
DDim x_dims_; DDim x_dims_;
DDim y_dims_; DDim y_dims_;
bool x_transpose_;
bool y_transpose_;
float alpha_;
public: public:
MatMulComputeTester(const Place& place, MatMulComputeTester(const Place& place,
const std::string& alias, const std::string& alias,
bool x_transpose,
bool y_transpose,
float alpha,
const DDim& x_dims, const DDim& x_dims,
const DDim& y_dims) const DDim& y_dims,
bool x_transpose = false,
bool y_transpose = false,
float alpha = 1.f)
: TestCase(place, alias), : TestCase(place, alias),
x_dims_(x_dims),
y_dims_(y_dims),
x_transpose_(x_transpose), x_transpose_(x_transpose),
y_transpose_(y_transpose), y_transpose_(y_transpose),
alpha_(alpha), alpha_(alpha) {}
x_dims_(x_dims),
y_dims_(y_dims) {}
void RunBaseline(Scope* scope) override { void RunBaseline(Scope* scope) override {
auto* x = scope->FindTensor(x_); auto* x = scope->FindTensor(x_);
...@@ -295,215 +296,166 @@ class MatMulComputeTester : public arena::TestCase { ...@@ -295,215 +296,166 @@ class MatMulComputeTester : public arena::TestCase {
} }
void PrepareData() override { void PrepareData() override {
std::vector<float> x_data(x_dims_.production()); std::vector<float> x(x_dims_.production());
std::vector<float> y_data(y_dims_.production()); fill_data_rand(x.data(), -1.f, 1.f, x_dims_.production());
SetCommonTensor(x_, x_dims_, x.data());
for (int i = 0; i < x_dims_.production(); ++i) { std::vector<float> y(y_dims_.production());
x_data[i] = 1; // i * 1.1; fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
} SetCommonTensor(y_, y_dims_, y.data(), {}, true);
for (int i = 0; i < y_dims_.production(); ++i) {
y_data[i] = 1; // i * 0.9;
}
SetCommonTensor(x_, x_dims_, x_data.data());
SetCommonTensor(y_, y_dims_, y_data.data());
} }
}; };
void test_matmul2x2_no_transform(Place place) { void test_matmul_helper(Place place,
for (int m : {1, 2, 4, 8}) { float abs_error,
for (int k : {1, 3, 5}) { std::vector<int64_t> x_dims,
for (int n : {1, 2, 4, 6}) { std::vector<int64_t> y_dims,
bool x_transpose,
bool y_transpose,
float alpha) {
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester(place,
"def",
DDim(x_dims),
DDim(y_dims),
x_transpose,
y_transpose,
alpha));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
void test_matmul2x2(Place place, float abs_error) {
for (int64_t m : {1, 2, 8}) {
for (int64_t k : {1, 3, 5}) {
for (int64_t n : {1, 4, 6}) {
for (float alpha : {1., 2.}) { for (float alpha : {1., 2.}) {
bool x_transform = false; test_matmul_helper(
bool y_transform = false; place, abs_error, {m, k}, {k, n}, false, false, alpha);
std::unique_ptr<arena::TestCase> tester(
new MatMulComputeTester(place,
"def",
x_transform,
y_transform,
alpha,
DDim({m, k}),
DDim({k, n})));
arena::Arena arena(std::move(tester), place, 5e-4);
arena.TestPrecision();
} }
} }
} }
} }
} }
void test_matmul2x2_x_transpose(Place place) { void test_matmul2x2_xtranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4}), DDim({2, 5})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 2}), DDim({2, 1})}); test_matmul_helper(place, abs_error, {3, 4}, {3, 2}, true, false, alpha);
std::vector<float> alphas({1.f, 2.f}); test_matmul_helper(place, abs_error, {2, 5}, {2, 1}, true, false, alpha);
for (int i = 0; i < x_dims.size(); ++i) {
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester(
place, "def", true, false, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
} }
} }
void test_matmul2x2_y_transpose(Place place) { void test_matmul2x2_ytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({5, 2}), DDim({2, 5})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 2}), DDim({1, 5})}); test_matmul_helper(place, abs_error, {5, 2}, {3, 2}, false, true, alpha);
std::vector<float> alphas({1.f, 2.f}); test_matmul_helper(place, abs_error, {2, 5}, {1, 5}, false, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) {
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester(
place, "def", false, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
} }
} }
void test_matmul2x2_transpose(Place place) { void test_matmul2x2_xytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({6, 2}), DDim({5, 3})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 6}), DDim({1, 5})}); test_matmul_helper(place, abs_error, {6, 2}, {3, 6}, true, true, alpha);
std::vector<float> alphas({1.f, 2.f}); test_matmul_helper(place, abs_error, {5, 3}, {1, 5}, true, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) {
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester(
place, "def", true, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
} }
} }
void test_matmul1x1_no_transpose(Place place) { void test_matmul1x1(Place place, float abs_error) {
DDim x_dim({3}); for (float alpha : {1.f, 2.f}) {
DDim y_dim({3}); test_matmul_helper(place, abs_error, {3}, {3}, false, false, alpha);
float alpha = 1.5f; }
std::unique_ptr<arena::TestCase> tester(
new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
void test_matmul1x1_transpose(Place place) {
DDim x_dim({3});
DDim y_dim({5});
float alpha = 1.5f;
std::unique_ptr<arena::TestCase> tester(
new MatMulComputeTester(place, "def", true, true, alpha, x_dim, y_dim));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
} }
void test_matmul_nx1(Place place) { void test_matmul1x1_xytranspose(Place place, float abs_error) {
DDim x_dim({3, 4, 2, 5}); for (float alpha : {1.f, 2.f}) {
DDim y_dim({5}); test_matmul_helper(place, abs_error, {3}, {5}, true, true, alpha);
float alpha = 1.5f; }
std::unique_ptr<arena::TestCase> tester(
new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
} }
void test_matmul_nx2_1(Place place) { void test_matmulnx1(Place place, float abs_error) {
DDim x_dim({1, 2, 2, 3}); for (float alpha : {1.f, 2.f}) {
DDim y_dim({3, 1}); test_matmul_helper(
float alpha = 1.f; place, abs_error, {3, 4, 2, 5}, {5}, false, false, alpha);
std::unique_ptr<arena::TestCase> tester( }
new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
} }
void test_matmul_nx2_2(Place place) { void test_matmulnx2(Place place, float abs_error) {
DDim x_dim({1, 2, 2, 3}); for (float alpha : {1.f, 2.f}) {
DDim y_dim({3, 3}); test_matmul_helper(
float alpha = 1.5f; place, abs_error, {1, 2, 2, 3}, {3, 1}, false, false, alpha);
std::unique_ptr<arena::TestCase> tester( test_matmul_helper(
new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); place, abs_error, {1, 2, 2, 3}, {3, 4}, false, false, alpha);
arena::Arena arena(std::move(tester), place, 2e-5); }
arena.TestPrecision();
} }
void test_matmulnx2_x_transpose(Place place) { void test_matmulnx2_xtranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({6, 2}), DDim({5, 1})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 6, 2}, {6, 2}, true, false, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 3, 5, 2}, {5, 1}, true, false, alpha);
place, "def", true, false, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 2e-4);
arena.TestPrecision();
} }
} }
void test_matmulnx2_y_transpose(Place place) { void test_matmulnx2_ytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({6, 2}), DDim({1, 2})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 6, 2}, {5, 2}, false, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 3, 5, 2}, {1, 2}, false, true, alpha);
place, "def", false, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
} }
} }
void test_matmulnx2_transpose(Place place) { void test_matmulnx2_xytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 4, 3}), DDim({5, 3, 3, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({2, 4}), DDim({1, 3})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 4, 3}, {2, 4}, true, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 3, 3, 2}, {1, 3}, true, true, alpha);
place, "def", true, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 5e-5);
arena.TestPrecision();
} }
} }
void test_matmul_nxn(Place place) { void test_matmulnxn(Place place, float abs_error) {
DDim x_dim({3, 4, 2, 5}); for (float alpha : {1.f, 2.f}) {
DDim y_dim({3, 4, 5, 2}); test_matmul_helper(
float alpha = 1.5f; place, abs_error, {3, 4, 6, 2}, {3, 4, 2, 5}, false, false, alpha);
std::unique_ptr<arena::TestCase> tester( test_matmul_helper(
new MatMulComputeTester(place, "def", false, false, alpha, x_dim, y_dim)); place, abs_error, {5, 3, 4}, {5, 4, 6}, false, false, alpha);
arena::Arena arena(std::move(tester), place, 1e-3); }
arena.TestPrecision();
} }
void test_matmulnxn_x_transpose(Place place) { void test_matmulnxn_xtranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 1})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 2, 6}, {3, 4, 2, 5}, true, false, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 4, 2}, {5, 4, 6}, true, false, alpha);
place, "def", true, false, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 1e-3);
arena.TestPrecision();
} }
} }
void test_matmulnxn_y_transpose(Place place) { void test_matmulnxn_ytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 5, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 4, 6, 2}), DDim({5, 3, 1, 2})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 6, 2}, {3, 4, 5, 2}, false, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 3, 4}, {5, 6, 4}, false, true, alpha);
place, "def", false, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 1e-3);
arena.TestPrecision();
} }
} }
void test_matmulnxn_transpose(Place place) { void test_matmulnxn_xytranspose(Place place, float abs_error) {
std::vector<DDim> x_dims({DDim({3, 4, 4, 3}), DDim({5, 3, 3, 2})}); for (float alpha : {1.f, 2.f}) {
std::vector<DDim> y_dims({DDim({3, 4, 2, 4}), DDim({5, 3, 1, 3})}); test_matmul_helper(
std::vector<float> alphas({1.f, 2.f}); place, abs_error, {3, 4, 2, 6}, {3, 4, 5, 2}, true, true, alpha);
for (int i = 0; i < x_dims.size(); ++i) { test_matmul_helper(
std::unique_ptr<arena::TestCase> tester(new MatMulComputeTester( place, abs_error, {5, 4, 3}, {5, 6, 4}, true, true, alpha);
place, "def", true, true, alphas[i], x_dims[i], y_dims[i]));
arena::Arena arena(std::move(tester), place, 1e-3);
arena.TestPrecision();
} }
} }
TEST(Matmul2x2, precision) { TEST(Matmul2x2, precision) {
Place place; Place place;
#if defined(LITE_WITH_ARM) float abs_error = 2e-5;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) #elif defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
...@@ -511,22 +463,31 @@ TEST(Matmul2x2, precision) { ...@@ -511,22 +463,31 @@ TEST(Matmul2x2, precision) {
return; return;
#endif #endif
test_matmul2x2_no_transform(place); test_matmul2x2(place, abs_error);
} }
TEST(Matmul2x2_x_transpose, precision) { TEST(Matmul2x2_x_transpose, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_NPU)
#ifdef LITE_WITH_ARM place = TARGET(kNPU);
Place place(TARGET(kARM)); abs_error = 1e-2; // use fp16 in npu
test_matmul2x2_x_transpose(place); #elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif #endif
test_matmul2x2_xtranspose(place, abs_error);
} }
TEST(Matmul2x2_y_transpose, precision) { TEST(Matmul2x2_y_transpose, precision) {
Place place; Place place;
#if defined(LITE_WITH_ARM) float abs_error = 2e-5;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) #elif defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
...@@ -534,65 +495,80 @@ TEST(Matmul2x2_y_transpose, precision) { ...@@ -534,65 +495,80 @@ TEST(Matmul2x2_y_transpose, precision) {
return; return;
#endif #endif
test_matmul2x2_y_transpose(place); test_matmul2x2_ytranspose(place, abs_error);
} }
TEST(Matmul2x2_transpose, precision) { TEST(Matmul2x2_transpose, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_NPU)
#ifdef LITE_WITH_ARM place = TARGET(kNPU);
Place place(TARGET(kARM)); abs_error = 1e-2; // use fp16 in npu
test_matmul2x2_transpose(place); #elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif #endif
test_matmul2x2_xytranspose(place, abs_error);
} }
TEST(Matmul1x1, precision) { TEST(Matmul1x1, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_ARM)
#ifdef LITE_WITH_ARM place = TARGET(kARM);
Place place(TARGET(kARM)); #else
test_matmul1x1_transpose(place); return;
test_matmul1x1_no_transpose(place);
#endif #endif
test_matmul1x1(place, abs_error);
test_matmul1x1_xytranspose(place, abs_error);
} }
TEST(Matmulnx1, precision) { TEST(Matmulnx1, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_ARM)
#ifdef LITE_WITH_ARM place = TARGET(kARM);
Place place(TARGET(kARM)); #else
test_matmul_nx1(place); return;
#endif #endif
test_matmulnx1(place, abs_error);
} }
TEST(Matmulnx2, precision) { TEST(Matmulnx2, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_ARM)
#ifdef LITE_WITH_ARM place = TARGET(kARM);
Place place(TARGET(kARM)); #else
test_matmul_nx2_1(place); return;
test_matmul_nx2_2(place);
test_matmulnx2_x_transpose(place);
test_matmulnx2_y_transpose(place);
test_matmulnx2_transpose(place);
#endif #endif
test_matmulnx2(place, abs_error);
test_matmulnx2_xtranspose(place, abs_error);
test_matmulnx2_ytranspose(place, abs_error);
test_matmulnx2_xytranspose(place, abs_error);
} }
TEST(Matmulnxn, precision) { TEST(Matmulnxn, precision) {
#ifdef LITE_WITH_X86 Place place;
Place place(TARGET(kX86)); float abs_error = 2e-5;
#endif #if defined(LITE_WITH_NPU)
#ifdef LITE_WITH_ARM place = TARGET(kNPU);
Place place(TARGET(kARM)); abs_error = 1e-2; // use fp16 in npu
test_matmul_nxn(place); #elif defined(LITE_WITH_ARM)
test_matmulnxn_x_transpose(place); place = TARGET(kARM);
test_matmulnxn_y_transpose(place); #else
test_matmulnxn_transpose(place); return;
#endif #endif
test_matmulnxn(place, abs_error);
test_matmulnxn_xtranspose(place, abs_error);
test_matmulnxn_ytranspose(place, abs_error);
test_matmulnxn_xytranspose(place, abs_error);
} }
} // namespace lite } // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册