// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/npu/bridges/graph.h" #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/npu/bridges/utility.h" namespace paddle { namespace lite { namespace subgraph { namespace npu { int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); auto graph = static_cast(ctx); auto op_info = op->op_info(); auto op_type = op_info->Type(); auto scope = op->scope(); VLOG(3) << "[NPU] Converting " + op_type + "..."; // Get input and output vars and op attributes auto x_name = op_info->Input("X").front(); auto x_type = kernel->GetInputDeclType("X"); CHECK(x_type->precision() == PRECISION(kFloat)); CHECK(x_type->layout() == DATALAYOUT(kNCHW)); auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); auto y_name = op_info->Input("Y").front(); auto y_type = kernel->GetInputDeclType("Y"); CHECK(y_type->precision() == PRECISION(kFloat)); CHECK(y_type->layout() == DATALAYOUT(kNCHW)); auto y = scope->FindMutableTensor(y_name); auto y_dims = y->dims(); if (x_dims.size() == 1 || x_dims.size() != y_dims.size()) { LOG(WARNING) << "[NPU] dims size of x and y must be same and greater than 1."; return FAILED; } if (x_dims.size() > 2 && x_dims.count(0, x_dims.size() - 2) != y_dims.count(0, y_dims.size() - 2)) { LOG(WARNING) << "[NPU] batched matmul only support the same batch size"; return FAILED; } auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); auto out = scope->FindMutableTensor(out_name); auto out_dims = out->dims(); bool transpose_x = op_info->GetAttr("transpose_X"); if (x_dims.size() > 2 && transpose_x) { LOG(WARNING) << "[NPU] not support transpose_x == true if x_dims size " "greater than 2."; return FAILED; } bool transpose_y = op_info->GetAttr("transpose_Y"); float alpha = op_info->GetAttr("alpha"); std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { x_node = graph->Get(x_name); } else { x_node = graph->Add(x_name, *x); } // Y node which only supports 2-D persistable tensor std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { y_node = graph->Get(y_name); } else { y_node = graph->Add(y_name, *y); } // Matmul node std::shared_ptr matmul_node = nullptr; if (x_dims.size() == 2) { matmul_node = graph->Add(out_name); auto matmul_op = matmul_node->data(); matmul_op->set_input_x1(*x_node->data()); matmul_op->set_input_x2(*y_node->data()); matmul_op->set_attr_transpose_x1(transpose_x); matmul_op->set_attr_transpose_x2(transpose_y); } else { matmul_node = graph->Add(out_name); auto matmul_op = matmul_node->data(); matmul_op->set_input_x(*x_node->data()); matmul_op->set_input_y(*y_node->data()); matmul_op->set_attr_adj_x(transpose_x); matmul_op->set_attr_adj_y(transpose_y); } if (fabs(alpha - 1.f) > 1e-6f) { auto scaled_out_node = graph->Add(out_name); auto scaled_out_op = scaled_out_node->data(); scaled_out_op->set_input_x(*matmul_node->data()); scaled_out_op->set_attr_axis(1); std::vector scale_bias_shape(4, 1); if (out_dims.size() < 4) { scale_bias_shape[1] = out_dims[0]; } else if (out_dims.size() == 4) { scale_bias_shape[1] = out_dims[1]; } else { LOG(WARNING) << "[NPU] not support out dims size greater than 4."; return FAILED; } auto filter_node = graph->Add(out_name + "/filter", alpha, scale_bias_shape); scaled_out_op->set_input_filter(*filter_node->data()); } return REBUILD_WHEN_SHAPE_CHANGED; } } // namespace npu } // namespace subgraph } // namespace lite } // namespace paddle REGISTER_SUBGRAPH_BRIDGE(matmul, kNPU, paddle::lite::subgraph::npu::MatMulConverter);