diff --git a/lite/kernels/npu/bridges/matmul_op.cc b/lite/kernels/npu/bridges/matmul_op.cc index 0e3d5ab2d7f2ada896896abcb2505f14c1d4dc28..4621f5955a841a0ba1b63381cb956242ce69639a 100644 --- a/lite/kernels/npu/bridges/matmul_op.cc +++ b/lite/kernels/npu/bridges/matmul_op.cc @@ -35,14 +35,14 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_type = kernel->GetInputDeclType("X"); CHECK(x_type->precision() == PRECISION(kFloat)); CHECK(x_type->layout() == DATALAYOUT(kNCHW)); - auto x = scope->FindMutableTensor(x_name); + auto x = scope->FindTensor(x_name); auto x_dims = x->dims(); auto y_name = op_info->Input("Y").front(); auto y_type = kernel->GetInputDeclType("Y"); CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->layout() == DATALAYOUT(kNCHW)); - auto y = scope->FindMutableTensor(y_name); + auto y = scope->FindTensor(y_name); auto y_dims = y->dims(); if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) { @@ -50,6 +50,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { << "[NPU] dims size of x and y must be same and greater than 1."; return FAILED; } + if (y_dims.size() == 2 && !y->persistable()) { + LOG(WARNING) << "[NPU] y must be const if y is 2-D"; + return FAILED; + } if (x_dims.size() > 2 && x_dims.count(0, x_dims.size() - 2) != y_dims.count(0, y_dims.size() - 2)) { @@ -61,7 +65,7 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); - auto out = scope->FindMutableTensor(out_name); + auto out = scope->FindTensor(out_name); auto out_dims = out->dims(); bool transpose_x = op_info->GetAttr("transpose_X"); @@ -80,7 +84,6 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { x_node = graph->Add(x_name, *x); } - // Y node which only supports 2-D persistable tensor std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { y_node = graph->Get(y_name); diff --git a/lite/kernels/npu/bridges/mul_op.cc b/lite/kernels/npu/bridges/mul_op.cc index 27df45819537faed291e108cc8a78a9a9de202cf..e7f497bd55bc302448528412f5cfb971001f79ca 100644 --- a/lite/kernels/npu/bridges/mul_op.cc +++ b/lite/kernels/npu/bridges/mul_op.cc @@ -36,18 +36,27 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_type = kernel->GetInputDeclType("X"); CHECK(x_type->precision() == PRECISION(kFloat)); CHECK(x_type->layout() == DATALAYOUT(kNCHW)); - auto x = scope->FindMutableTensor(x_name); + auto x = scope->FindTensor(x_name); auto x_dims = x->dims(); + auto y_name = op_info->Input("Y").front(); auto y_type = kernel->GetInputDeclType("Y"); CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->layout() == DATALAYOUT(kNCHW)); - auto y = scope->FindMutableTensor(y_name); + auto y = scope->FindTensor(y_name); auto y_dims = y->dims(); + auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); + auto out = scope->FindTensor(out_name); + auto out_dims = out->dims(); + if (out_dims.size() > 4) { + LOG(WARNING) << "[NPU] not supported above 4-D."; + return FAILED; + } + int x_num_col_dims = op_info->GetAttr("x_num_col_dims"); int y_num_col_dims = op_info->GetAttr("y_num_col_dims"); int m = x_dims.Slice(0, x_num_col_dims).production(); @@ -58,20 +67,20 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { VLOG(3) << "m:" << m << ",n:" << n << ",k:" << k; VLOG(3) << "x_name:" << x_name << ", is data: " << graph->Has(x_name); VLOG(3) << "y_name:" << y_name << ", is data: " << graph->Has(y_name); - CHECK(graph->Has(x_name)) - << "[NPU] MatMul in HiAI DDK only support X is data, Y is const yet."; // X node which supports persistable and non-persistable tensor, and // reshape to (m, k) std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { x_node = graph->Get(x_name); - auto reshaped_x_node = graph->Add(x_name + "/reshape"); - auto reshaped_x_op = reshaped_x_node->data(); - reshaped_x_op->set_input_tensor(*x_node->data()); - reshaped_x_op->set_attr_shape({m, k}); - reshaped_x_op->set_attr_axis(0); - x_node = reshaped_x_node; + if (x_dims.size() != 2) { + auto reshaped_x_node = graph->Add(x_name + "/reshape"); + auto reshaped_x_op = reshaped_x_node->data(); + reshaped_x_op->set_input_tensor(*x_node->data()); + reshaped_x_op->set_attr_shape({m, k}); + reshaped_x_op->set_attr_axis(0); + x_node = reshaped_x_node; + } } else { x_node = graph->Add(x_name, *x, {m, k}); } @@ -81,12 +90,14 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { y_node = graph->Get(y_name); - auto reshaped_y_node = graph->Add(y_name + "/reshape"); - auto reshaped_y_op = reshaped_y_node->data(); - reshaped_y_op->set_input_tensor(*y_node->data()); - reshaped_y_op->set_attr_shape({k, n}); - reshaped_y_op->set_attr_axis(0); - y_node = reshaped_y_node; + if (y_dims.size() != 2) { + auto reshaped_y_node = graph->Add(y_name + "/reshape"); + auto reshaped_y_op = reshaped_y_node->data(); + reshaped_y_op->set_input_tensor(*y_node->data()); + reshaped_y_op->set_attr_shape({k, n}); + reshaped_y_op->set_attr_axis(0); + y_node = reshaped_y_node; + } } else { y_node = graph->Add(y_name, *y, {k, n}); } @@ -96,6 +107,17 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto mul_op = mul_node->data(); mul_op->set_input_x1(*x_node->data()); mul_op->set_input_x2(*y_node->data()); + + if (out_dims.size() != 2) { + auto reshaped_out_node = graph->Add(out_name); + auto reshaped_out_op = reshaped_out_node->data(); + reshaped_out_op->set_input_tensor(*mul_node->data()); + auto out_shape = out_dims.Vectorize(); + reshaped_out_op->set_attr_shape( + ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); + reshaped_out_op->set_attr_axis(0); + } + return REBUILD_WHEN_SHAPE_CHANGED; } diff --git a/lite/kernels/npu/bridges/mul_op_test.cc b/lite/kernels/npu/bridges/mul_op_test.cc deleted file mode 100644 index 9bcd72bb35b7bf5de19d880f4ad535fec8e480fa..0000000000000000000000000000000000000000 --- a/lite/kernels/npu/bridges/mul_op_test.cc +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/mul_op.h" -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -void mul_ref(const std::shared_ptr op) { - Scope* scope = op->scope(); - const OpInfo* op_info = op->op_info(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - int32_t x_num_col_dims = op_info->GetAttr("x_num_col_dims"); - int32_t y_num_col_dims = op_info->GetAttr("y_num_col_dims"); - auto x_data = x->mutable_data(); - auto y_data = y->mutable_data(); - auto out_data = out->mutable_data(); - auto x_mat_dims = x->dims().Flatten2D(x_num_col_dims); - auto y_mat_dims = y->dims().Flatten2D(y_num_col_dims); - CHECK_EQ(x_mat_dims[1], y_mat_dims[0]); - const int M = x_mat_dims[0]; - const int K = x_mat_dims[1]; - const int N = y_mat_dims[1]; - for (int m = 0; m < M; ++m) { - for (int n = 0; n < N; ++n) { - out_data[m * N + n] = 0; - for (int k = 0; k < K; ++k) { - out_data[m * N + n] += x_data[m * K + k] * y_data[k * N + n]; - } - } - } -} - -void test_mul(const std::vector& x_shape, - const std::vector& y_shape, - int x_num_col_dims, - int y_num_col_dims) { - const auto& bridges = lite::kernels::npu::bridges::Factory::Instance(); - const auto& supported_lists = bridges.AllFunctions(); - CHECK(bridges.HasType("mul")); - - Scope scope; - std::string x_var_name("X"); - std::string y_var_name("Y"); - std::string out_var_name("Out"); - std::string out_ref_var_name("out_ref"); - auto* x = scope.Var(x_var_name)->GetMutable(); - auto* y = scope.Var(y_var_name)->GetMutable(); - auto* out = scope.Var(out_var_name)->GetMutable(); - auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - x->Resize(x_shape); - y->Resize(y_shape); - - FillTensor(x); - FillTensor(y); - - // create mul op - cpp::OpDesc mul_op_desc; - mul_op_desc.SetType("mul"); - mul_op_desc.SetInput("X", {x_var_name}); - mul_op_desc.SetInput("Y", {y_var_name}); - mul_op_desc.SetOutput("Out", {out_var_name}); - mul_op_desc.SetAttr("x_num_col_dims", static_cast(x_num_col_dims)); - mul_op_desc.SetAttr("y_num_col_dims", static_cast(y_num_col_dims)); - - auto mul_op = CreateOp(mul_op_desc, &scope); - LauchOp(mul_op, {x_var_name}, {out_var_name}); - out_ref->CopyDataFrom(*out); - - mul_ref(mul_op); - - // compare results - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); - } -} - -TEST(NPUBridges, mul) { - test_mul({1, 8, 8, 1}, {1, 8, 2, 2}, 2, 2); - test_mul({1, 5, 5, 1}, {1, 5, 7, 7}, 2, 2); - test_mul({1, 4, 1, 1}, {4, 8}, 1, 1); -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(mul); -USE_NPU_BRIDGE(mul); diff --git a/lite/operators/mul_op.cc b/lite/operators/mul_op.cc index 6067be5315220ec8b2f75265982e55f874e4b23a..c870abdc8989b48d8aa2f14f989ad475c027995e 100644 --- a/lite/operators/mul_op.cc +++ b/lite/operators/mul_op.cc @@ -32,21 +32,6 @@ bool MulOpLite::CheckShape() const { CHECK_GT_OR_FALSE(x_dims.size(), static_cast(param_.x_num_col_dims)); CHECK_GT_OR_FALSE(y_dims.size(), static_cast(param_.y_num_col_dims)); - // #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK - // auto x_mat_dims = - // framework::flatten_to_2d(x_dims.data(), param_.x_num_col_dims); - // auto y_mat_dims = - // framework::flatten_to_2d(y_dims.data(), param_.y_num_col_dims); - - // PADDLE_ENFORCE_EQ(x_mat_dims[1], - // y_mat_dims[0], - // "First matrix's width must be equal with second - // matrix's" - // "height. %s, %s", - // x_mat_dims[1], - // y_mat_dims[0]); - // #endif - return true; } @@ -73,49 +58,8 @@ bool MulOpLite::InferShape() const { return true; } -#ifdef LITE_WITH_TRAIN -bool MulGradOpLite::CheckShape() const { - CHECK_OR_FALSE(param_.x); - CHECK_OR_FALSE(param_.y); - CHECK_OR_FALSE(param_.output_grad); - - return true; -} - -bool MulGradOpLite::InferShape() const { - if (param_.x_grad) param_.x_grad->Resize(param_.x->dims()); - if (param_.y_grad) param_.y_grad->Resize(param_.y->dims()); - return true; -} - -bool MulGradOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { - auto X_name = op_desc.Input("X").front(); - auto Y_name = op_desc.Input("Y").front(); - auto Out_grad_name = op_desc.Input(framework::GradVarName("Out")).front(); - - if (op_desc.Output(framework::GradVarName("X")).size()) { - auto X_grad_name = op_desc.Output(framework::GradVarName("X")).front(); - param_.x_grad = GetMutableVar(scope, X_grad_name); - } - - if (op_desc.Output(framework::GradVarName("Y")).size()) { - auto Y_grad_name = op_desc.Output(framework::GradVarName("Y")).front(); - param_.y_grad = GetMutableVar(scope, Y_grad_name); - } - - param_.x = GetVar(scope, X_name); - param_.y = GetVar(scope, Y_name); - param_.output_grad = GetVar(scope, Out_grad_name); - - return true; -} -#endif - } // namespace operators } // namespace lite } // namespace paddle REGISTER_LITE_OP(mul, paddle::lite::operators::MulOpLite); -#ifdef LITE_WITH_TRAIN -REGISTER_LITE_OP(mul_grad, paddle::lite::operators::MulGradOpLite); -#endif diff --git a/lite/tests/kernels/mul_compute_test.cc b/lite/tests/kernels/mul_compute_test.cc index e70f443985536cb6493558cc6e9aee4584d969f5..d9bbfaa8d049cf2bbcdea9b9c5e58d201e156a67 100644 --- a/lite/tests/kernels/mul_compute_test.cc +++ b/lite/tests/kernels/mul_compute_test.cc @@ -99,7 +99,7 @@ class MulComputeTester : public arena::TestCase { std::vector y(y_dims_.production()); fill_data_rand(y.data(), -1.f, 1.f, y_dims_.production()); - SetCommonTensor(y_, y_dims_, y.data()); + SetCommonTensor(y_, y_dims_, y.data(), {}, true); } }; @@ -123,7 +123,10 @@ TEST(Mul, precision) { LOG(INFO) << "test mul op"; float abs_error = 2e-5; Place place; -#if defined(LITE_WITH_XPU) +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_XPU) place = TARGET(kXPU); #else return;