提交 e81b0006 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] reahspe out for mul and enhance ut (#2847)

上级 5750c483
...@@ -35,14 +35,14 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -35,14 +35,14 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_type = kernel->GetInputDeclType("X"); auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat)); CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW)); CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindTensor(x_name);
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front(); auto y_name = op_info->Input("Y").front();
auto y_type = kernel->GetInputDeclType("Y"); auto y_type = kernel->GetInputDeclType("Y");
CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->precision() == PRECISION(kFloat));
CHECK(y_type->layout() == DATALAYOUT(kNCHW)); CHECK(y_type->layout() == DATALAYOUT(kNCHW));
auto y = scope->FindMutableTensor(y_name); auto y = scope->FindTensor(y_name);
auto y_dims = y->dims(); auto y_dims = y->dims();
if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) { if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) {
...@@ -50,6 +50,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -50,6 +50,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
<< "[NPU] dims size of x and y must be same and greater than 1."; << "[NPU] dims size of x and y must be same and greater than 1.";
return FAILED; return FAILED;
} }
if (y_dims.size() == 2 && !y->persistable()) {
LOG(WARNING) << "[NPU] y must be const if y is 2-D";
return FAILED;
}
if (x_dims.size() > 2 && if (x_dims.size() > 2 &&
x_dims.count(0, x_dims.size() - 2) != x_dims.count(0, x_dims.size() - 2) !=
y_dims.count(0, y_dims.size() - 2)) { y_dims.count(0, y_dims.size() - 2)) {
...@@ -61,7 +65,7 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -61,7 +65,7 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto out_type = kernel->GetOutputDeclType("Out"); auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW)); CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto out = scope->FindMutableTensor(out_name); auto out = scope->FindTensor(out_name);
auto out_dims = out->dims(); auto out_dims = out->dims();
bool transpose_x = op_info->GetAttr<bool>("transpose_X"); bool transpose_x = op_info->GetAttr<bool>("transpose_X");
...@@ -80,7 +84,6 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -80,7 +84,6 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
x_node = graph->Add(x_name, *x); x_node = graph->Add(x_name, *x);
} }
// Y node which only supports 2-D persistable tensor
std::shared_ptr<Node> y_node = nullptr; std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) { if (graph->Has(y_name)) {
y_node = graph->Get(y_name); y_node = graph->Get(y_name);
......
...@@ -36,18 +36,27 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -36,18 +36,27 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_type = kernel->GetInputDeclType("X"); auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat)); CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW)); CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindTensor(x_name);
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front(); auto y_name = op_info->Input("Y").front();
auto y_type = kernel->GetInputDeclType("Y"); auto y_type = kernel->GetInputDeclType("Y");
CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->precision() == PRECISION(kFloat));
CHECK(y_type->layout() == DATALAYOUT(kNCHW)); CHECK(y_type->layout() == DATALAYOUT(kNCHW));
auto y = scope->FindMutableTensor(y_name); auto y = scope->FindTensor(y_name);
auto y_dims = y->dims(); auto y_dims = y->dims();
auto out_name = op_info->Output("Out").front(); auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out"); auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW)); CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto out = scope->FindTensor(out_name);
auto out_dims = out->dims();
if (out_dims.size() > 4) {
LOG(WARNING) << "[NPU] not supported above 4-D.";
return FAILED;
}
int x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims"); int x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
int y_num_col_dims = op_info->GetAttr<int>("y_num_col_dims"); int y_num_col_dims = op_info->GetAttr<int>("y_num_col_dims");
int m = x_dims.Slice(0, x_num_col_dims).production(); int m = x_dims.Slice(0, x_num_col_dims).production();
...@@ -58,20 +67,20 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -58,20 +67,20 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "m:" << m << ",n:" << n << ",k:" << k; VLOG(3) << "m:" << m << ",n:" << n << ",k:" << k;
VLOG(3) << "x_name:" << x_name << ", is data: " << graph->Has(x_name); VLOG(3) << "x_name:" << x_name << ", is data: " << graph->Has(x_name);
VLOG(3) << "y_name:" << y_name << ", is data: " << graph->Has(y_name); VLOG(3) << "y_name:" << y_name << ", is data: " << graph->Has(y_name);
CHECK(graph->Has(x_name))
<< "[NPU] MatMul in HiAI DDK only support X is data, Y is const yet.";
// X node which supports persistable and non-persistable tensor, and // X node which supports persistable and non-persistable tensor, and
// reshape to (m, k) // reshape to (m, k)
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) { if (graph->Has(x_name)) {
x_node = graph->Get(x_name); x_node = graph->Get(x_name);
if (x_dims.size() != 2) {
auto reshaped_x_node = graph->Add<ge::op::Reshape>(x_name + "/reshape"); auto reshaped_x_node = graph->Add<ge::op::Reshape>(x_name + "/reshape");
auto reshaped_x_op = reshaped_x_node->data<ge::op::Reshape>(); auto reshaped_x_op = reshaped_x_node->data<ge::op::Reshape>();
reshaped_x_op->set_input_tensor(*x_node->data()); reshaped_x_op->set_input_tensor(*x_node->data());
reshaped_x_op->set_attr_shape({m, k}); reshaped_x_op->set_attr_shape({m, k});
reshaped_x_op->set_attr_axis(0); reshaped_x_op->set_attr_axis(0);
x_node = reshaped_x_node; x_node = reshaped_x_node;
}
} else { } else {
x_node = graph->Add(x_name, *x, {m, k}); x_node = graph->Add(x_name, *x, {m, k});
} }
...@@ -81,12 +90,14 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -81,12 +90,14 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> y_node = nullptr; std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) { if (graph->Has(y_name)) {
y_node = graph->Get(y_name); y_node = graph->Get(y_name);
if (y_dims.size() != 2) {
auto reshaped_y_node = graph->Add<ge::op::Reshape>(y_name + "/reshape"); auto reshaped_y_node = graph->Add<ge::op::Reshape>(y_name + "/reshape");
auto reshaped_y_op = reshaped_y_node->data<ge::op::Reshape>(); auto reshaped_y_op = reshaped_y_node->data<ge::op::Reshape>();
reshaped_y_op->set_input_tensor(*y_node->data()); reshaped_y_op->set_input_tensor(*y_node->data());
reshaped_y_op->set_attr_shape({k, n}); reshaped_y_op->set_attr_shape({k, n});
reshaped_y_op->set_attr_axis(0); reshaped_y_op->set_attr_axis(0);
y_node = reshaped_y_node; y_node = reshaped_y_node;
}
} else { } else {
y_node = graph->Add(y_name, *y, {k, n}); y_node = graph->Add(y_name, *y, {k, n});
} }
...@@ -96,6 +107,17 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -96,6 +107,17 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto mul_op = mul_node->data<ge::op::MatMul>(); auto mul_op = mul_node->data<ge::op::MatMul>();
mul_op->set_input_x1(*x_node->data()); mul_op->set_input_x1(*x_node->data());
mul_op->set_input_x2(*y_node->data()); mul_op->set_input_x2(*y_node->data());
if (out_dims.size() != 2) {
auto reshaped_out_node = graph->Add<ge::op::Reshape>(out_name);
auto reshaped_out_op = reshaped_out_node->data<ge::op::Reshape>();
reshaped_out_op->set_input_tensor(*mul_node->data());
auto out_shape = out_dims.Vectorize();
reshaped_out_op->set_attr_shape(
ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end()));
reshaped_out_op->set_attr_axis(0);
}
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/mul_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
void mul_ref(const std::shared_ptr<operators::MulOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
int32_t x_num_col_dims = op_info->GetAttr<int32_t>("x_num_col_dims");
int32_t y_num_col_dims = op_info->GetAttr<int32_t>("y_num_col_dims");
auto x_data = x->mutable_data<float>();
auto y_data = y->mutable_data<float>();
auto out_data = out->mutable_data<float>();
auto x_mat_dims = x->dims().Flatten2D(x_num_col_dims);
auto y_mat_dims = y->dims().Flatten2D(y_num_col_dims);
CHECK_EQ(x_mat_dims[1], y_mat_dims[0]);
const int M = x_mat_dims[0];
const int K = x_mat_dims[1];
const int N = y_mat_dims[1];
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
out_data[m * N + n] = 0;
for (int k = 0; k < K; ++k) {
out_data[m * N + n] += x_data[m * K + k] * y_data[k * N + n];
}
}
}
}
void test_mul(const std::vector<int64_t>& x_shape,
const std::vector<int64_t>& y_shape,
int x_num_col_dims,
int y_num_col_dims) {
const auto& bridges = lite::kernels::npu::bridges::Factory::Instance();
const auto& supported_lists = bridges.AllFunctions();
CHECK(bridges.HasType("mul"));
Scope scope;
std::string x_var_name("X");
std::string y_var_name("Y");
std::string out_var_name("Out");
std::string out_ref_var_name("out_ref");
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize(x_shape);
y->Resize(y_shape);
FillTensor<float, int>(x);
FillTensor<float, int>(y);
// create mul op
cpp::OpDesc mul_op_desc;
mul_op_desc.SetType("mul");
mul_op_desc.SetInput("X", {x_var_name});
mul_op_desc.SetInput("Y", {y_var_name});
mul_op_desc.SetOutput("Out", {out_var_name});
mul_op_desc.SetAttr("x_num_col_dims", static_cast<int>(x_num_col_dims));
mul_op_desc.SetAttr("y_num_col_dims", static_cast<int>(y_num_col_dims));
auto mul_op = CreateOp<operators::MulOpLite>(mul_op_desc, &scope);
LauchOp(mul_op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
mul_ref(mul_op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
}
}
TEST(NPUBridges, mul) {
test_mul({1, 8, 8, 1}, {1, 8, 2, 2}, 2, 2);
test_mul({1, 5, 5, 1}, {1, 5, 7, 7}, 2, 2);
test_mul({1, 4, 1, 1}, {4, 8}, 1, 1);
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(mul);
USE_NPU_BRIDGE(mul);
...@@ -32,21 +32,6 @@ bool MulOpLite::CheckShape() const { ...@@ -32,21 +32,6 @@ bool MulOpLite::CheckShape() const {
CHECK_GT_OR_FALSE(x_dims.size(), static_cast<size_t>(param_.x_num_col_dims)); CHECK_GT_OR_FALSE(x_dims.size(), static_cast<size_t>(param_.x_num_col_dims));
CHECK_GT_OR_FALSE(y_dims.size(), static_cast<size_t>(param_.y_num_col_dims)); CHECK_GT_OR_FALSE(y_dims.size(), static_cast<size_t>(param_.y_num_col_dims));
// #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// auto x_mat_dims =
// framework::flatten_to_2d(x_dims.data(), param_.x_num_col_dims);
// auto y_mat_dims =
// framework::flatten_to_2d(y_dims.data(), param_.y_num_col_dims);
// PADDLE_ENFORCE_EQ(x_mat_dims[1],
// y_mat_dims[0],
// "First matrix's width must be equal with second
// matrix's"
// "height. %s, %s",
// x_mat_dims[1],
// y_mat_dims[0]);
// #endif
return true; return true;
} }
...@@ -73,49 +58,8 @@ bool MulOpLite::InferShape() const { ...@@ -73,49 +58,8 @@ bool MulOpLite::InferShape() const {
return true; return true;
} }
#ifdef LITE_WITH_TRAIN
bool MulGradOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.x);
CHECK_OR_FALSE(param_.y);
CHECK_OR_FALSE(param_.output_grad);
return true;
}
bool MulGradOpLite::InferShape() const {
if (param_.x_grad) param_.x_grad->Resize(param_.x->dims());
if (param_.y_grad) param_.y_grad->Resize(param_.y->dims());
return true;
}
bool MulGradOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
auto X_name = op_desc.Input("X").front();
auto Y_name = op_desc.Input("Y").front();
auto Out_grad_name = op_desc.Input(framework::GradVarName("Out")).front();
if (op_desc.Output(framework::GradVarName("X")).size()) {
auto X_grad_name = op_desc.Output(framework::GradVarName("X")).front();
param_.x_grad = GetMutableVar<lite::Tensor>(scope, X_grad_name);
}
if (op_desc.Output(framework::GradVarName("Y")).size()) {
auto Y_grad_name = op_desc.Output(framework::GradVarName("Y")).front();
param_.y_grad = GetMutableVar<lite::Tensor>(scope, Y_grad_name);
}
param_.x = GetVar<lite::Tensor>(scope, X_name);
param_.y = GetVar<lite::Tensor>(scope, Y_name);
param_.output_grad = GetVar<lite::Tensor>(scope, Out_grad_name);
return true;
}
#endif
} // namespace operators } // namespace operators
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
REGISTER_LITE_OP(mul, paddle::lite::operators::MulOpLite); REGISTER_LITE_OP(mul, paddle::lite::operators::MulOpLite);
#ifdef LITE_WITH_TRAIN
REGISTER_LITE_OP(mul_grad, paddle::lite::operators::MulGradOpLite);
#endif
...@@ -99,7 +99,7 @@ class MulComputeTester : public arena::TestCase { ...@@ -99,7 +99,7 @@ class MulComputeTester : public arena::TestCase {
std::vector<float> y(y_dims_.production()); std::vector<float> y(y_dims_.production());
fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production()); fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production());
SetCommonTensor(y_, y_dims_, y.data()); SetCommonTensor(y_, y_dims_, y.data(), {}, true);
} }
}; };
...@@ -123,7 +123,10 @@ TEST(Mul, precision) { ...@@ -123,7 +123,10 @@ TEST(Mul, precision) {
LOG(INFO) << "test mul op"; LOG(INFO) << "test mul op";
float abs_error = 2e-5; float abs_error = 2e-5;
Place place; Place place;
#if defined(LITE_WITH_XPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
#else #else
return; return;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册