diff --git a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt index 86705e95d4b4c7b6c8aa4c1965d2d989a9137fee..3d085f9c807bc16c0dee6a7db07f121b3004f7c5 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt +++ b/lite/kernels/huawei_ascend_npu/bridges/CMakeLists.txt @@ -20,6 +20,12 @@ lite_cc_library(subgraph_bridge_fc_op_huawei_ascend_npu SRCS fc_op.cc DEPS ${hua lite_cc_library(subgraph_bridge_reshape_op_huawei_ascend_npu SRCS reshape_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_transpose_op_huawei_ascend_npu SRCS transpose_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_flatten_op_huawei_ascend_npu SRCS flatten_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_layer_norm_op_huawei_ascend_npu SRCS layer_norm_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_matmul_op_huawei_ascend_npu SRCS matmul_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_cast_op_huawei_ascend_npu SRCS cast_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_scale_op_huawei_ascend_npu SRCS scale_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_slice_op_huawei_ascend_npu SRCS slice_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_gather_op_huawei_ascend_npu SRCS gather_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) set(huawei_ascend_npu_subgraph_bridges subgraph_bridge_registry @@ -38,4 +44,10 @@ set(huawei_ascend_npu_subgraph_bridges subgraph_bridge_reshape_op_huawei_ascend_npu subgraph_bridge_transpose_op_huawei_ascend_npu subgraph_bridge_flatten_op_huawei_ascend_npu + subgraph_bridge_layer_norm_op_huawei_ascend_npu + subgraph_bridge_matmul_op_huawei_ascend_npu + subgraph_bridge_cast_op_huawei_ascend_npu + subgraph_bridge_scale_op_huawei_ascend_npu + subgraph_bridge_slice_op_huawei_ascend_npu + subgraph_bridge_gather_op_huawei_ascend_npu CACHE INTERNAL "huawei_ascend_npu_subgraph_bridges") diff --git a/lite/kernels/huawei_ascend_npu/bridges/cast_op.cc b/lite/kernels/huawei_ascend_npu/bridges/cast_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f948cfa2c76dc60b3d440759846ea947c3ba4718 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/cast_op.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input, output and op attributes + auto x_name = op_info->Input("X").front(); + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + + auto out_name = op_info->Output("Out").front(); + + // BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6; + // SIZE_T = 19;UINT8 = 20;INT8 = 21; + // auto in_dtype = op_info->GetAttr("in_dtype"); + auto out_dtype = op_info->GetAttr("out_dtype"); + + // X node + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + PrecisionType ptype = PRECISION(kFloat); + ge::DataType otype = ge::DT_FLOAT; + switch (out_dtype) { + case 0: // BOOL = 0; + ptype = PRECISION(kBool); + otype = ge::DT_BOOL; + break; + case 1: // INT16 = 1 + ptype = PRECISION(kInt16); + otype = ge::DT_INT16; + break; + case 2: // INT32 = 2 + ptype = PRECISION(kInt32); + otype = ge::DT_INT32; + break; + case 3: // INT64 = 3 + ptype = PRECISION(kInt64); + otype = ge::DT_INT64; + break; + case 4: // FP16 = 4 + ptype = PRECISION(kFP16); + otype = ge::DT_FLOAT16; + break; + case 5: // FP32 = 5 + ptype = PRECISION(kFloat); + otype = ge::DT_FLOAT; + break; + case 21: // INT8 = 21 + ptype = PRECISION(kInt8); + otype = ge::DT_INT8; + break; + default: + LOG(FATAL) << "unsupported data type: " << out_dtype; + break; + } + + // Cast node + auto cast_node = graph->Add(out_name, ptype); + auto cast_op = cast_node->data(); + cast_op->set_input_x(*x_node->data()); + cast_op->set_attr_dst_type(otype); + INPUT_UPDATE(cast_op, x, x_node); + OUTPUT_UPDATE(cast_op, y, cast_node); + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + cast, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::CastConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/elementwise_ops.cc b/lite/kernels/huawei_ascend_npu/bridges/elementwise_ops.cc index 84b221c2ae7489931e6db339597ab7a51fa82e09..91e64ea034e300695cfc8d2aab57824294bada2e 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/elementwise_ops.cc +++ b/lite/kernels/huawei_ascend_npu/bridges/elementwise_ops.cc @@ -138,7 +138,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { x_node = graph->Get(x_name); - auto shape_node = graph->Add(x_name + "/shape", x_new_shape); + auto shape_node = graph->Add(x_name + "/x_shape", x_new_shape); auto reshaped_x_node = graph->Add(x_name + "/reshape"); auto reshaped_x_op = reshaped_x_node->data(); reshaped_x_op->set_input_x(*x_node->data()); @@ -156,7 +156,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { y_node = graph->Get(y_name); - auto shape_node = graph->Add(y_name + "/shape", y_new_shape); + auto shape_node = graph->Add(y_name + "/y_shape", y_new_shape); auto reshaped_y_node = graph->Add(y_name + "/reshape"); auto reshaped_y_op = reshaped_y_node->data(); reshaped_y_op->set_input_x(*y_node->data()); @@ -224,7 +224,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto out_shape = out_dims.Vectorize(); if (out_shape != x_new_shape) { - auto shape_node = graph->Add(out_name + "/shape", out_shape); + auto shape_node = graph->Add(out_name + "/out_shape", out_shape); auto reshaped_elt_node = graph->Add(out_name); auto reshaped_elt_op = reshaped_elt_node->data(); reshaped_elt_op->set_input_x(*elt_node->data()); diff --git a/lite/kernels/huawei_ascend_npu/bridges/gather_op.cc b/lite/kernels/huawei_ascend_npu/bridges/gather_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bf1ae05aa42a64e5537387208fef5eae48cfb827 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/gather_op.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input, output and op attributes + auto x_name = op_info->Input("X").front(); + auto x = scope->FindTensor(x_name); + + auto index_name = op_info->Input("Index").front(); + auto index = scope->FindTensor(index_name); + auto index_dims = index->dims(); + CHECK(index_dims.size() == 1 || + (index_dims.size() == 2 && index_dims[1] == 1)) + << "index dims unmatch"; + + auto out_name = op_info->Output("Out").front(); + + // X node + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + // Index node + std::shared_ptr index_node = nullptr; + if (graph->Has(index_name)) { + index_node = graph->Get(index_name); + } else { + index_node = graph->Add(index_name, *index); + } + + // Gather node + auto gather_node = graph->Add(out_name); + auto gather_op = gather_node->data(); + gather_op->set_input_x(*x_node->data()); + gather_op->set_input_indices(*index_node->data()); + INPUT_UPDATE(gather_op, x, x_node); + INPUT_UPDATE(gather_op, indices, index_node); + OUTPUT_UPDATE(gather_op, y, gather_node); + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + gather, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::GatherConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/layer_norm_op.cc b/lite/kernels/huawei_ascend_npu/bridges/layer_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..72649eb6853443225675b77dce5d38175c7d54e5 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/layer_norm_op.cc @@ -0,0 +1,158 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input and output vars + auto x_name = op_info->Input("X").front(); + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + auto x_rank = static_cast(x_dims.size()); + CHECK(x_rank >= 2 && x_rank <= 4); + + bool has_bias = op_info->HasInput("Bias"); + bool has_scale = op_info->HasInput("Scale"); + + auto y_name = op_info->Output("Y").front(); + auto y = scope->FindMutableTensor(y_name); + auto y_dims = y->dims(); + + auto mean_name = op_info->Output("Mean").front(); + auto mean = scope->FindMutableTensor(mean_name); + auto mean_dims = mean->dims(); + CHECK_EQ(mean_dims.size(), 1); + + auto var_name = op_info->Output("Variance").front(); + auto var = scope->FindMutableTensor(var_name); + auto var_dims = var->dims(); + CHECK_EQ(var_dims.size(), 1); + + // Get op attributes + auto epsilon = op_info->GetAttr("epsilon"); + auto begin_norm_axis = op_info->GetAttr("begin_norm_axis"); + if (begin_norm_axis < 0) { + begin_norm_axis += x_rank; + } + CHECK_GT(begin_norm_axis, 0); + CHECK_LT(begin_norm_axis, x_rank); + CHECK(begin_norm_axis >= 1 && begin_norm_axis < x_rank); + auto matrix_dim = x_dims.Flatten2D(begin_norm_axis); + int batch_size = matrix_dim[0]; + int feature_size = matrix_dim[1]; + CHECK_EQ(mean_dims.production(), batch_size); + CHECK_EQ(var_dims.production(), batch_size); + + // X node + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + // Get shape of bias and scale + DDim scale_bias_dims = x_dims.Slice(begin_norm_axis, x_dims.size()); + CHECK_EQ(scale_bias_dims.production(), feature_size); + // auto scale_bias_dims = DDim({x_dims[x_dims.size()-1]}); + // Bias node + std::shared_ptr bias_node = nullptr; + if (has_bias) { + auto bias_name = op_info->Input("Bias").front(); + auto bias = scope->FindMutableTensor(bias_name); + auto bias_dims = bias->dims(); + CHECK_EQ(bias_dims.size(), 1); + CHECK_EQ(bias_dims.production(), feature_size); + bias_node = graph->Add(bias_name, *bias, scale_bias_dims); + } else { + bias_node = graph->Add(y_name + "/bias", 0.f, scale_bias_dims); + } + + // Scale node + std::shared_ptr scale_node = nullptr; + if (has_scale) { + auto scale_name = op_info->Input("Scale").front(); + auto scale = scope->FindMutableTensor(scale_name); + auto scale_dims = scale->dims(); + CHECK_EQ(scale_dims.size(), 1); + CHECK_EQ(scale_dims.production(), feature_size); + scale_node = graph->Add(scale_name, *scale, scale_bias_dims); + } else { + scale_node = graph->Add(y_name + "/scale", 1.f, scale_bias_dims); + } + + // LayerNorm node + auto layer_norm_node = graph->Add(y_name + "/layer_norm"); + auto layer_norm_op = layer_norm_node->data(); + layer_norm_op->set_input_x(*x_node->data()); + layer_norm_op->set_input_gamma(*scale_node->data()); + layer_norm_op->set_input_beta(*bias_node->data()); + layer_norm_op->set_attr_begin_norm_axis(begin_norm_axis); + layer_norm_op->set_attr_begin_params_axis(begin_norm_axis); + layer_norm_op->set_attr_epsilon(epsilon); + INPUT_UPDATE(layer_norm_op, x, x_node); + INPUT_UPDATE(layer_norm_op, gamma, scale_node); + INPUT_UPDATE(layer_norm_op, beta, bias_node); + OUTPUT_UPDATE(layer_norm_op, y, layer_norm_node); + OUTPUT_UPDATE(layer_norm_op, mean, layer_norm_node); + OUTPUT_UPDATE(layer_norm_op, variance, layer_norm_node); + + // Get output of Y + auto out_y_node = graph->Add(y_name); + auto out_y_op = out_y_node->data(); + out_y_op->set_input_x(*layer_norm_node->data(), "y"); + INPUT_UPDATE(out_y_op, x, layer_norm_node); + OUTPUT_UPDATE(out_y_op, y, out_y_node); + + // Get output of Mean + auto out_mean_node = graph->Add(mean_name); + auto out_mean_op = out_mean_node->data(); + out_mean_op->set_input_x(*layer_norm_node->data(), "mean"); + INPUT_UPDATE(out_mean_op, x, layer_norm_node); + OUTPUT_UPDATE(out_mean_op, y, out_mean_node); + + // Get output of Variance + auto out_var_node = graph->Add(var_name); + auto out_var_op = out_var_node->data(); + out_var_op->set_input_x(*layer_norm_node->data(), "variance"); + INPUT_UPDATE(out_var_op, x, layer_norm_node); + OUTPUT_UPDATE(out_var_op, y, out_var_node); + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + layer_norm, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::LayerNormConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/matmul_op.cc b/lite/kernels/huawei_ascend_npu/bridges/matmul_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..561769de29acc438f3775271f6bd060c4dba1393 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/matmul_op.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input and output vars and op attributes + auto x_name = op_info->Input("X").front(); + auto x = scope->FindTensor(x_name); + auto x_dims = x->dims(); + + if (x_dims.size() < 2) { + LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Input dims should be equal or large " + "than 2 in Huawei Ascend NPU DDK."; + return FAILED; + } + + auto y_name = op_info->Input("Y").front(); + auto y = scope->FindTensor(y_name); + auto y_dims = y->dims(); + + if (y_dims.size() < 2) { + LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Input dims should be equal or large " + "than 2 in Huawei Ascend NPU DDK."; + return FAILED; + } + + if (x_dims.size() != y_dims.size()) { + LOG(WARNING) << "[HUAWEI_ASCEND_NPU] dims size of input x1 and x2 must be " + "same in Huawei Ascend NPU DDK."; + return FAILED; + } + + auto out_name = op_info->Output("Out").front(); + auto out = scope->FindTensor(out_name); + auto out_dims = out->dims(); + + bool transpose_x = op_info->GetAttr("transpose_X"); + bool transpose_y = op_info->GetAttr("transpose_Y"); + float alpha = op_info->GetAttr("alpha"); + + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); + } else { + y_node = graph->Add(y_name, *y); + } + + // Matmul node + std::shared_ptr matmul_node = nullptr; + if (x_dims.size() == 2) { + matmul_node = graph->Add(out_name); + auto matmul_op = matmul_node->data(); + matmul_op->set_input_x1(*x_node->data()); + matmul_op->set_input_x2(*y_node->data()); + matmul_op->set_attr_transpose_x1(transpose_x); + matmul_op->set_attr_transpose_x2(transpose_y); + INPUT_UPDATE(matmul_op, x1, x_node); + INPUT_UPDATE(matmul_op, x2, y_node); + OUTPUT_UPDATE(matmul_op, y, matmul_node); + } else { + matmul_node = graph->Add(out_name); + auto matmul_op = matmul_node->data(); + matmul_op->set_input_x1(*x_node->data()); + matmul_op->set_input_x2(*y_node->data()); + matmul_op->set_attr_adj_x1(transpose_x); + matmul_op->set_attr_adj_x2(transpose_y); + INPUT_UPDATE(matmul_op, x1, x_node); + INPUT_UPDATE(matmul_op, x2, y_node); + OUTPUT_UPDATE(matmul_op, y, matmul_node); + } + + if (fabs(alpha - 1.f) > 1e-6f) { + auto scale_node = graph->Add(out_name); + auto scale_op = scale_node->data(); + scale_op->set_input_x(*matmul_node->data()); + scale_op->set_attr_value(alpha); + INPUT_UPDATE(scale_op, x, matmul_node); + OUTPUT_UPDATE(scale_op, y, scale_node); + } + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + matmul, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::MatMulConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h index f7cfe39468bc34c6d93e3d97d4b270e77cc29a33..5d59248e7fc91925dabc02298967f9ce6df1e8ef 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h +++ b/lite/kernels/huawei_ascend_npu/bridges/paddle_use_bridges.h @@ -48,3 +48,9 @@ USE_SUBGRAPH_BRIDGE(transpose, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(transpose2, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(flatten, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(flatten2, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(layer_norm, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(matmul, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(cast, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(scale, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(slice, kHuaweiAscendNPU); +USE_SUBGRAPH_BRIDGE(gather, kHuaweiAscendNPU); diff --git a/lite/kernels/huawei_ascend_npu/bridges/scale_op.cc b/lite/kernels/huawei_ascend_npu/bridges/scale_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..fb55822cff05afa9607a4b5b61adf64e35ddec60 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/scale_op.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input, output and op attributes + auto x_name = op_info->Input("X").front(); + auto x = scope->FindMutableTensor(x_name); + auto x_dims = x->dims(); + auto out_name = op_info->Output("Out").front(); + float scale = op_info->GetAttr("scale"); + float bias = op_info->GetAttr("bias"); + bool bias_after_scale = op_info->GetAttr("bias_after_scale"); + if (!bias_after_scale) { + bias *= scale; + } + + // X node + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + } else { + x_node = graph->Add(x_name, *x); + } + + // const node + auto input_scale_node = + graph->Add(out_name + "/scale", scale, x_dims.Vectorize()); + + // scale node + auto scale_node = graph->Add(out_name); + auto scale_op = scale_node->data(); + scale_op->set_input_x(*x_node->data()); + scale_op->set_input_scale(*input_scale_node->data()); + scale_op->set_attr_axis(0); + scale_op->set_attr_num_axes(-1); + scale_op->set_attr_scale_from_blob(true); + INPUT_UPDATE(scale_op, x, x_node); + INPUT_UPDATE(scale_op, scale, input_scale_node); + OUTPUT_UPDATE(scale_op, y, scale_node); + + // Add bias node(fill with bias) + if (fabs(bias) > 1e-6f) { + auto bias_node = graph->Add(out_name + "/bias", bias, x_dims.Vectorize()); + scale_op->set_input_bias(*bias_node->data()); + INPUT_UPDATE(scale_op, bias, input_scale_node); + } + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + scale, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::ScaleConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/slice_op.cc b/lite/kernels/huawei_ascend_npu/bridges/slice_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6f9e7f8b1817f0b5ebbce0af18b009473bd6e059 --- /dev/null +++ b/lite/kernels/huawei_ascend_npu/bridges/slice_op.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/huawei_ascend_npu/bridges/graph.h" +#include "lite/kernels/huawei_ascend_npu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace huawei_ascend_npu { + +int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "..."; + + // Get input, output and op attributes + auto input_name = op_info->Input("Input").front(); + auto input = scope->FindMutableTensor(input_name); + auto input_dims = input->dims(); + auto input_rank = static_cast(input_dims.size()); + std::vector input_shape = input_dims.Vectorize(); + + auto out_name = op_info->Output("Out").front(); + + auto axes = op_info->GetAttr>("axes"); + auto starts = op_info->GetAttr>("starts"); + auto ends = op_info->GetAttr>("ends"); + CHECK_EQ(axes.size(), starts.size()); + CHECK_EQ(axes.size(), ends.size()); + + // X node + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); + } else { + input_node = graph->Add(input_name, *input); + } + + // Get begin/offset based on axes and starts + std::vector offset_vec(input_rank, 0); + std::vector size_vec(input_shape.begin(), input_shape.end()); + // Get begin/offset based on axes and starts + for (int i = 0; i < axes.size(); i++) { + auto axis = axes[i]; + CHECK_LE(axis, input_rank) + << "[HUAWEI_ASCEND_NPU] axes value should less than input rank."; + offset_vec[axis] = starts[i]; + size_vec[axis] = ends[i] - starts[i]; + } + + // Cast node + auto slice_node = graph->Add(out_name); + auto slice_op = slice_node->data(); + slice_op->set_input_x(*input_node->data()); + slice_op->set_attr_offsets( + ge::Operator::OpListInt(offset_vec.begin(), offset_vec.end())); + slice_op->set_attr_size( + ge::Operator::OpListInt(size_vec.begin(), size_vec.end())); + INPUT_UPDATE(slice_op, x, input_node); + OUTPUT_UPDATE(slice_op, y, slice_node); + + return REBUILD_WHEN_SHAPE_CHANGED; +} + +} // namespace huawei_ascend_npu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE( + slice, + kHuaweiAscendNPU, + paddle::lite::subgraph::huawei_ascend_npu::SliceConverter); diff --git a/lite/kernels/huawei_ascend_npu/bridges/utility.h b/lite/kernels/huawei_ascend_npu/bridges/utility.h index 40cfd971b81c4e54eb7aa690ff5ae98bb52192ac..d1dfa8aa0ce9f37e7d89d9f5653b5b97e00d1c1c 100644 --- a/lite/kernels/huawei_ascend_npu/bridges/utility.h +++ b/lite/kernels/huawei_ascend_npu/bridges/utility.h @@ -66,7 +66,7 @@ ge::DataType CvtPrecisionType(PrecisionType itype); ge::Format CvtDataLayoutType(DataLayoutType itype); -// Padding the shape to 4-dimensions(NCHW) for HiAI +// Padding the shape to 4-dimensions(NCHW) for Huawei Ascend NPU std::vector CvtShape(const std::vector& in_shape); std::vector CvtShape(const DDim& in_dims); diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc index 34038dfdc797d0e5ee618b575ad532fd64809276..e0edb3c54e38b2e4387a5886ae6f74facd5752ba 100644 --- a/lite/tests/kernels/cast_compute_test.cc +++ b/lite/tests/kernels/cast_compute_test.cc @@ -137,17 +137,20 @@ TEST(Cast, precision) { place = TARGET(kARM); #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) place = TARGET(kXPU); +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #else return; #endif // BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6; // SIZE_T = 19;UINT8 = 20;INT8 = 21; -#ifndef LITE_WITH_XPU +#if !defined(LITE_WITH_XPU) && !defined(LITE_WITH_HUAWEI_ASCEND_NPU) TestCast(place, abs_error, 20, 5); #endif TestCast(place, abs_error, 2, 5); -#ifdef LITE_WITH_XPU +#if defined(LITE_WITH_XPU) || defined(LITE_WITH_HUAWEI_ASCEND_NPU) TestCast(place, abs_error, 3, 5); TestCast(place, abs_error, 5, 3); #endif diff --git a/lite/tests/kernels/gather_compute_test.cc b/lite/tests/kernels/gather_compute_test.cc index c023a12b0fb4e3118976d854114c554ca6bf6462..59be5b973a46f17f924b4fb533eabe33534af93e 100644 --- a/lite/tests/kernels/gather_compute_test.cc +++ b/lite/tests/kernels/gather_compute_test.cc @@ -96,6 +96,9 @@ TEST(Gather, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) diff --git a/lite/tests/kernels/layer_norm_compute_test.cc b/lite/tests/kernels/layer_norm_compute_test.cc index bd4480b6127a318286b3172f53fc8a5bceb8c328..26234f1c49e8dced75ce7a8534c995724101c78f 100644 --- a/lite/tests/kernels/layer_norm_compute_test.cc +++ b/lite/tests/kernels/layer_norm_compute_test.cc @@ -152,6 +152,9 @@ TEST(LayerNorm, precision) { #elif defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); abs_error = 6e-5; diff --git a/lite/tests/kernels/lookup_table_compute_test.cc b/lite/tests/kernels/lookup_table_compute_test.cc index ae39abf1dbaf206fe0a68dd492a48a2452c8094e..9563a7809198a1d7e3317c7ad2a7effafc3b3f97 100644 --- a/lite/tests/kernels/lookup_table_compute_test.cc +++ b/lite/tests/kernels/lookup_table_compute_test.cc @@ -118,6 +118,9 @@ TEST(LookupTable, precision) { place = TARGET(kARM); #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) place = TARGET(kXPU); +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #else return; #endif diff --git a/lite/tests/kernels/matmul_compute_test.cc b/lite/tests/kernels/matmul_compute_test.cc index 9799c15622b07a8d126654c79738d29b176c2cf4..abd836af256e8fe8c29b25c10ab39a1666f42e11 100644 --- a/lite/tests/kernels/matmul_compute_test.cc +++ b/lite/tests/kernels/matmul_compute_test.cc @@ -455,6 +455,9 @@ TEST(Matmul2x2, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) @@ -472,6 +475,9 @@ TEST(Matmul2x2_x_transpose, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #else @@ -487,6 +493,9 @@ TEST(Matmul2x2_y_transpose, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) @@ -559,6 +568,9 @@ TEST(Matmulnxn, precision) { #if defined(LITE_WITH_NPU) place = TARGET(kNPU); abs_error = 1e-2; // use fp16 in npu +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); #else diff --git a/lite/tests/kernels/scale_compute_test.cc b/lite/tests/kernels/scale_compute_test.cc index 9d1f4403dc1a82e58d8c764933ba01c0e0b5c082..b08b42e7f19281133d81c1386db2cce84b596605 100644 --- a/lite/tests/kernels/scale_compute_test.cc +++ b/lite/tests/kernels/scale_compute_test.cc @@ -168,6 +168,9 @@ TEST(Scale, precision) { #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) place = TARGET(kXPU); abs_error = 3e-4; // Some operations use fp16 in XPU +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + place = TARGET(kHuaweiAscendNPU); + abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_X86) place = TARGET(kX86); #else diff --git a/lite/tests/kernels/slice_compute_test.cc b/lite/tests/kernels/slice_compute_test.cc index b566bfa3e86cf6067f9914b5fc3932458a6ee186..9ca9f6190c80dfbddcf0a8e1339dd5d8885ca6f5 100644 --- a/lite/tests/kernels/slice_compute_test.cc +++ b/lite/tests/kernels/slice_compute_test.cc @@ -271,6 +271,9 @@ TEST(Slice, precision) { #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) Place place(TARGET(kXPU)); test_slice(place); +#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) + Place place = TARGET(kHuaweiAscendNPU); + test_slice(place); #endif }