未验证 提交 64eaaeca 编写于 作者: Q Qi Li 提交者: GitHub

[Ascend] add layer_norm, matmul, cast, scale, slice, gather op for ernie, test=develop (#4126)

* add new ops for ernie, test=develop

* drop lookuptable, test=develop

* update cast compute, test=develop

* add slice gather op, test=develop
上级 d14e57f7
...@@ -20,6 +20,12 @@ lite_cc_library(subgraph_bridge_fc_op_huawei_ascend_npu SRCS fc_op.cc DEPS ${hua ...@@ -20,6 +20,12 @@ lite_cc_library(subgraph_bridge_fc_op_huawei_ascend_npu SRCS fc_op.cc DEPS ${hua
lite_cc_library(subgraph_bridge_reshape_op_huawei_ascend_npu SRCS reshape_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_reshape_op_huawei_ascend_npu SRCS reshape_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_transpose_op_huawei_ascend_npu SRCS transpose_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_transpose_op_huawei_ascend_npu SRCS transpose_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_flatten_op_huawei_ascend_npu SRCS flatten_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_flatten_op_huawei_ascend_npu SRCS flatten_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_layer_norm_op_huawei_ascend_npu SRCS layer_norm_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_matmul_op_huawei_ascend_npu SRCS matmul_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_cast_op_huawei_ascend_npu SRCS cast_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_scale_op_huawei_ascend_npu SRCS scale_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_slice_op_huawei_ascend_npu SRCS slice_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_gather_op_huawei_ascend_npu SRCS gather_op.cc DEPS ${huawei_ascend_npu_subgraph_bridge_deps})
set(huawei_ascend_npu_subgraph_bridges set(huawei_ascend_npu_subgraph_bridges
subgraph_bridge_registry subgraph_bridge_registry
...@@ -38,4 +44,10 @@ set(huawei_ascend_npu_subgraph_bridges ...@@ -38,4 +44,10 @@ set(huawei_ascend_npu_subgraph_bridges
subgraph_bridge_reshape_op_huawei_ascend_npu subgraph_bridge_reshape_op_huawei_ascend_npu
subgraph_bridge_transpose_op_huawei_ascend_npu subgraph_bridge_transpose_op_huawei_ascend_npu
subgraph_bridge_flatten_op_huawei_ascend_npu subgraph_bridge_flatten_op_huawei_ascend_npu
subgraph_bridge_layer_norm_op_huawei_ascend_npu
subgraph_bridge_matmul_op_huawei_ascend_npu
subgraph_bridge_cast_op_huawei_ascend_npu
subgraph_bridge_scale_op_huawei_ascend_npu
subgraph_bridge_slice_op_huawei_ascend_npu
subgraph_bridge_gather_op_huawei_ascend_npu
CACHE INTERNAL "huawei_ascend_npu_subgraph_bridges") CACHE INTERNAL "huawei_ascend_npu_subgraph_bridges")
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
// BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6;
// SIZE_T = 19;UINT8 = 20;INT8 = 21;
// auto in_dtype = op_info->GetAttr<int>("in_dtype");
auto out_dtype = op_info->GetAttr<int>("out_dtype");
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
PrecisionType ptype = PRECISION(kFloat);
ge::DataType otype = ge::DT_FLOAT;
switch (out_dtype) {
case 0: // BOOL = 0;
ptype = PRECISION(kBool);
otype = ge::DT_BOOL;
break;
case 1: // INT16 = 1
ptype = PRECISION(kInt16);
otype = ge::DT_INT16;
break;
case 2: // INT32 = 2
ptype = PRECISION(kInt32);
otype = ge::DT_INT32;
break;
case 3: // INT64 = 3
ptype = PRECISION(kInt64);
otype = ge::DT_INT64;
break;
case 4: // FP16 = 4
ptype = PRECISION(kFP16);
otype = ge::DT_FLOAT16;
break;
case 5: // FP32 = 5
ptype = PRECISION(kFloat);
otype = ge::DT_FLOAT;
break;
case 21: // INT8 = 21
ptype = PRECISION(kInt8);
otype = ge::DT_INT8;
break;
default:
LOG(FATAL) << "unsupported data type: " << out_dtype;
break;
}
// Cast node
auto cast_node = graph->Add<ge::op::Cast>(out_name, ptype);
auto cast_op = cast_node->data<ge::op::Cast>();
cast_op->set_input_x(*x_node->data());
cast_op->set_attr_dst_type(otype);
INPUT_UPDATE(cast_op, x, x_node);
OUTPUT_UPDATE(cast_op, y, cast_node);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
cast,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::CastConverter);
...@@ -138,7 +138,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -138,7 +138,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) { if (graph->Has(x_name)) {
x_node = graph->Get(x_name); x_node = graph->Get(x_name);
auto shape_node = graph->Add<int64_t>(x_name + "/shape", x_new_shape); auto shape_node = graph->Add<int64_t>(x_name + "/x_shape", x_new_shape);
auto reshaped_x_node = graph->Add<ge::op::Reshape>(x_name + "/reshape"); auto reshaped_x_node = graph->Add<ge::op::Reshape>(x_name + "/reshape");
auto reshaped_x_op = reshaped_x_node->data<ge::op::Reshape>(); auto reshaped_x_op = reshaped_x_node->data<ge::op::Reshape>();
reshaped_x_op->set_input_x(*x_node->data()); reshaped_x_op->set_input_x(*x_node->data());
...@@ -156,7 +156,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -156,7 +156,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> y_node = nullptr; std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) { if (graph->Has(y_name)) {
y_node = graph->Get(y_name); y_node = graph->Get(y_name);
auto shape_node = graph->Add<int64_t>(y_name + "/shape", y_new_shape); auto shape_node = graph->Add<int64_t>(y_name + "/y_shape", y_new_shape);
auto reshaped_y_node = graph->Add<ge::op::Reshape>(y_name + "/reshape"); auto reshaped_y_node = graph->Add<ge::op::Reshape>(y_name + "/reshape");
auto reshaped_y_op = reshaped_y_node->data<ge::op::Reshape>(); auto reshaped_y_op = reshaped_y_node->data<ge::op::Reshape>();
reshaped_y_op->set_input_x(*y_node->data()); reshaped_y_op->set_input_x(*y_node->data());
...@@ -224,7 +224,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -224,7 +224,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto out_shape = out_dims.Vectorize(); auto out_shape = out_dims.Vectorize();
if (out_shape != x_new_shape) { if (out_shape != x_new_shape) {
auto shape_node = graph->Add<int64_t>(out_name + "/shape", out_shape); auto shape_node = graph->Add<int64_t>(out_name + "/out_shape", out_shape);
auto reshaped_elt_node = graph->Add<ge::op::Reshape>(out_name); auto reshaped_elt_node = graph->Add<ge::op::Reshape>(out_name);
auto reshaped_elt_op = reshaped_elt_node->data<ge::op::Reshape>(); auto reshaped_elt_op = reshaped_elt_node->data<ge::op::Reshape>();
reshaped_elt_op->set_input_x(*elt_node->data()); reshaped_elt_op->set_input_x(*elt_node->data());
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto index_name = op_info->Input("Index").front();
auto index = scope->FindTensor(index_name);
auto index_dims = index->dims();
CHECK(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1))
<< "index dims unmatch";
auto out_name = op_info->Output("Out").front();
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Index node
std::shared_ptr<Node> index_node = nullptr;
if (graph->Has(index_name)) {
index_node = graph->Get(index_name);
} else {
index_node = graph->Add(index_name, *index);
}
// Gather node
auto gather_node = graph->Add<ge::op::Gather>(out_name);
auto gather_op = gather_node->data<ge::op::Gather>();
gather_op->set_input_x(*x_node->data());
gather_op->set_input_indices(*index_node->data());
INPUT_UPDATE(gather_op, x, x_node);
INPUT_UPDATE(gather_op, indices, index_node);
OUTPUT_UPDATE(gather_op, y, gather_node);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
gather,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::GatherConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input and output vars
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = static_cast<int>(x_dims.size());
CHECK(x_rank >= 2 && x_rank <= 4);
bool has_bias = op_info->HasInput("Bias");
bool has_scale = op_info->HasInput("Scale");
auto y_name = op_info->Output("Y").front();
auto y = scope->FindMutableTensor(y_name);
auto y_dims = y->dims();
auto mean_name = op_info->Output("Mean").front();
auto mean = scope->FindMutableTensor(mean_name);
auto mean_dims = mean->dims();
CHECK_EQ(mean_dims.size(), 1);
auto var_name = op_info->Output("Variance").front();
auto var = scope->FindMutableTensor(var_name);
auto var_dims = var->dims();
CHECK_EQ(var_dims.size(), 1);
// Get op attributes
auto epsilon = op_info->GetAttr<float>("epsilon");
auto begin_norm_axis = op_info->GetAttr<int>("begin_norm_axis");
if (begin_norm_axis < 0) {
begin_norm_axis += x_rank;
}
CHECK_GT(begin_norm_axis, 0);
CHECK_LT(begin_norm_axis, x_rank);
CHECK(begin_norm_axis >= 1 && begin_norm_axis < x_rank);
auto matrix_dim = x_dims.Flatten2D(begin_norm_axis);
int batch_size = matrix_dim[0];
int feature_size = matrix_dim[1];
CHECK_EQ(mean_dims.production(), batch_size);
CHECK_EQ(var_dims.production(), batch_size);
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Get shape of bias and scale
DDim scale_bias_dims = x_dims.Slice(begin_norm_axis, x_dims.size());
CHECK_EQ(scale_bias_dims.production(), feature_size);
// auto scale_bias_dims = DDim({x_dims[x_dims.size()-1]});
// Bias node
std::shared_ptr<Node> bias_node = nullptr;
if (has_bias) {
auto bias_name = op_info->Input("Bias").front();
auto bias = scope->FindMutableTensor(bias_name);
auto bias_dims = bias->dims();
CHECK_EQ(bias_dims.size(), 1);
CHECK_EQ(bias_dims.production(), feature_size);
bias_node = graph->Add(bias_name, *bias, scale_bias_dims);
} else {
bias_node = graph->Add<float>(y_name + "/bias", 0.f, scale_bias_dims);
}
// Scale node
std::shared_ptr<Node> scale_node = nullptr;
if (has_scale) {
auto scale_name = op_info->Input("Scale").front();
auto scale = scope->FindMutableTensor(scale_name);
auto scale_dims = scale->dims();
CHECK_EQ(scale_dims.size(), 1);
CHECK_EQ(scale_dims.production(), feature_size);
scale_node = graph->Add(scale_name, *scale, scale_bias_dims);
} else {
scale_node = graph->Add<float>(y_name + "/scale", 1.f, scale_bias_dims);
}
// LayerNorm node
auto layer_norm_node = graph->Add<ge::op::LayerNorm>(y_name + "/layer_norm");
auto layer_norm_op = layer_norm_node->data<ge::op::LayerNorm>();
layer_norm_op->set_input_x(*x_node->data());
layer_norm_op->set_input_gamma(*scale_node->data());
layer_norm_op->set_input_beta(*bias_node->data());
layer_norm_op->set_attr_begin_norm_axis(begin_norm_axis);
layer_norm_op->set_attr_begin_params_axis(begin_norm_axis);
layer_norm_op->set_attr_epsilon(epsilon);
INPUT_UPDATE(layer_norm_op, x, x_node);
INPUT_UPDATE(layer_norm_op, gamma, scale_node);
INPUT_UPDATE(layer_norm_op, beta, bias_node);
OUTPUT_UPDATE(layer_norm_op, y, layer_norm_node);
OUTPUT_UPDATE(layer_norm_op, mean, layer_norm_node);
OUTPUT_UPDATE(layer_norm_op, variance, layer_norm_node);
// Get output of Y
auto out_y_node = graph->Add<ge::op::Identity>(y_name);
auto out_y_op = out_y_node->data<ge::op::Identity>();
out_y_op->set_input_x(*layer_norm_node->data(), "y");
INPUT_UPDATE(out_y_op, x, layer_norm_node);
OUTPUT_UPDATE(out_y_op, y, out_y_node);
// Get output of Mean
auto out_mean_node = graph->Add<ge::op::Identity>(mean_name);
auto out_mean_op = out_mean_node->data<ge::op::Identity>();
out_mean_op->set_input_x(*layer_norm_node->data(), "mean");
INPUT_UPDATE(out_mean_op, x, layer_norm_node);
OUTPUT_UPDATE(out_mean_op, y, out_mean_node);
// Get output of Variance
auto out_var_node = graph->Add<ge::op::Identity>(var_name);
auto out_var_op = out_var_node->data<ge::op::Identity>();
out_var_op->set_input_x(*layer_norm_node->data(), "variance");
INPUT_UPDATE(out_var_op, x, layer_norm_node);
OUTPUT_UPDATE(out_var_op, y, out_var_node);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
layer_norm,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::LayerNormConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto x_dims = x->dims();
if (x_dims.size() < 2) {
LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Input dims should be equal or large "
"than 2 in Huawei Ascend NPU DDK.";
return FAILED;
}
auto y_name = op_info->Input("Y").front();
auto y = scope->FindTensor(y_name);
auto y_dims = y->dims();
if (y_dims.size() < 2) {
LOG(WARNING) << "[HUAWEI_ASCEND_NPU] Input dims should be equal or large "
"than 2 in Huawei Ascend NPU DDK.";
return FAILED;
}
if (x_dims.size() != y_dims.size()) {
LOG(WARNING) << "[HUAWEI_ASCEND_NPU] dims size of input x1 and x2 must be "
"same in Huawei Ascend NPU DDK.";
return FAILED;
}
auto out_name = op_info->Output("Out").front();
auto out = scope->FindTensor(out_name);
auto out_dims = out->dims();
bool transpose_x = op_info->GetAttr<bool>("transpose_X");
bool transpose_y = op_info->GetAttr<bool>("transpose_Y");
float alpha = op_info->GetAttr<float>("alpha");
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) {
y_node = graph->Get(y_name);
} else {
y_node = graph->Add(y_name, *y);
}
// Matmul node
std::shared_ptr<Node> matmul_node = nullptr;
if (x_dims.size() == 2) {
matmul_node = graph->Add<ge::op::MatMul>(out_name);
auto matmul_op = matmul_node->data<ge::op::MatMul>();
matmul_op->set_input_x1(*x_node->data());
matmul_op->set_input_x2(*y_node->data());
matmul_op->set_attr_transpose_x1(transpose_x);
matmul_op->set_attr_transpose_x2(transpose_y);
INPUT_UPDATE(matmul_op, x1, x_node);
INPUT_UPDATE(matmul_op, x2, y_node);
OUTPUT_UPDATE(matmul_op, y, matmul_node);
} else {
matmul_node = graph->Add<ge::op::BatchMatMul>(out_name);
auto matmul_op = matmul_node->data<ge::op::BatchMatMul>();
matmul_op->set_input_x1(*x_node->data());
matmul_op->set_input_x2(*y_node->data());
matmul_op->set_attr_adj_x1(transpose_x);
matmul_op->set_attr_adj_x2(transpose_y);
INPUT_UPDATE(matmul_op, x1, x_node);
INPUT_UPDATE(matmul_op, x2, y_node);
OUTPUT_UPDATE(matmul_op, y, matmul_node);
}
if (fabs(alpha - 1.f) > 1e-6f) {
auto scale_node = graph->Add<ge::op::Muls>(out_name);
auto scale_op = scale_node->data<ge::op::Muls>();
scale_op->set_input_x(*matmul_node->data());
scale_op->set_attr_value(alpha);
INPUT_UPDATE(scale_op, x, matmul_node);
OUTPUT_UPDATE(scale_op, y, scale_node);
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
matmul,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::MatMulConverter);
...@@ -48,3 +48,9 @@ USE_SUBGRAPH_BRIDGE(transpose, kHuaweiAscendNPU); ...@@ -48,3 +48,9 @@ USE_SUBGRAPH_BRIDGE(transpose, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(transpose2, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(transpose2, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(flatten, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(flatten, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(flatten2, kHuaweiAscendNPU); USE_SUBGRAPH_BRIDGE(flatten2, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(layer_norm, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(matmul, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(cast, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(scale, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(slice, kHuaweiAscendNPU);
USE_SUBGRAPH_BRIDGE(gather, kHuaweiAscendNPU);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
float scale = op_info->GetAttr<float>("scale");
float bias = op_info->GetAttr<float>("bias");
bool bias_after_scale = op_info->GetAttr<bool>("bias_after_scale");
if (!bias_after_scale) {
bias *= scale;
}
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// const node
auto input_scale_node =
graph->Add<float>(out_name + "/scale", scale, x_dims.Vectorize());
// scale node
auto scale_node = graph->Add<ge::op::Scale>(out_name);
auto scale_op = scale_node->data<ge::op::Scale>();
scale_op->set_input_x(*x_node->data());
scale_op->set_input_scale(*input_scale_node->data());
scale_op->set_attr_axis(0);
scale_op->set_attr_num_axes(-1);
scale_op->set_attr_scale_from_blob(true);
INPUT_UPDATE(scale_op, x, x_node);
INPUT_UPDATE(scale_op, scale, input_scale_node);
OUTPUT_UPDATE(scale_op, y, scale_node);
// Add bias node(fill with bias)
if (fabs(bias) > 1e-6f) {
auto bias_node = graph->Add(out_name + "/bias", bias, x_dims.Vectorize());
scale_op->set_input_bias(*bias_node->data());
INPUT_UPDATE(scale_op, bias, input_scale_node);
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
scale,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::ScaleConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/huawei_ascend_npu/bridges/graph.h"
#include "lite/kernels/huawei_ascend_npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace huawei_ascend_npu {
int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[HUAWEI_ASCEND_NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
auto input_rank = static_cast<int>(input_dims.size());
std::vector<int64_t> input_shape = input_dims.Vectorize();
auto out_name = op_info->Output("Out").front();
auto axes = op_info->GetAttr<std::vector<int>>("axes");
auto starts = op_info->GetAttr<std::vector<int>>("starts");
auto ends = op_info->GetAttr<std::vector<int>>("ends");
CHECK_EQ(axes.size(), starts.size());
CHECK_EQ(axes.size(), ends.size());
// X node
std::shared_ptr<Node> input_node = nullptr;
if (graph->Has(input_name)) {
input_node = graph->Get(input_name);
} else {
input_node = graph->Add(input_name, *input);
}
// Get begin/offset based on axes and starts
std::vector<int> offset_vec(input_rank, 0);
std::vector<int> size_vec(input_shape.begin(), input_shape.end());
// Get begin/offset based on axes and starts
for (int i = 0; i < axes.size(); i++) {
auto axis = axes[i];
CHECK_LE(axis, input_rank)
<< "[HUAWEI_ASCEND_NPU] axes value should less than input rank.";
offset_vec[axis] = starts[i];
size_vec[axis] = ends[i] - starts[i];
}
// Cast node
auto slice_node = graph->Add<ge::op::SliceD>(out_name);
auto slice_op = slice_node->data<ge::op::SliceD>();
slice_op->set_input_x(*input_node->data());
slice_op->set_attr_offsets(
ge::Operator::OpListInt(offset_vec.begin(), offset_vec.end()));
slice_op->set_attr_size(
ge::Operator::OpListInt(size_vec.begin(), size_vec.end()));
INPUT_UPDATE(slice_op, x, input_node);
OUTPUT_UPDATE(slice_op, y, slice_node);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace huawei_ascend_npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(
slice,
kHuaweiAscendNPU,
paddle::lite::subgraph::huawei_ascend_npu::SliceConverter);
...@@ -66,7 +66,7 @@ ge::DataType CvtPrecisionType(PrecisionType itype); ...@@ -66,7 +66,7 @@ ge::DataType CvtPrecisionType(PrecisionType itype);
ge::Format CvtDataLayoutType(DataLayoutType itype); ge::Format CvtDataLayoutType(DataLayoutType itype);
// Padding the shape to 4-dimensions(NCHW) for HiAI // Padding the shape to 4-dimensions(NCHW) for Huawei Ascend NPU
std::vector<int64_t> CvtShape(const std::vector<int64_t>& in_shape); std::vector<int64_t> CvtShape(const std::vector<int64_t>& in_shape);
std::vector<int64_t> CvtShape(const DDim& in_dims); std::vector<int64_t> CvtShape(const DDim& in_dims);
......
...@@ -137,17 +137,20 @@ TEST(Cast, precision) { ...@@ -137,17 +137,20 @@ TEST(Cast, precision) {
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place = TARGET(kXPU); place = TARGET(kXPU);
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#else #else
return; return;
#endif #endif
// BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6; // BOOL = 0;INT16 = 1;INT32 = 2;INT64 = 3;FP16 = 4;FP32 = 5;FP64 = 6;
// SIZE_T = 19;UINT8 = 20;INT8 = 21; // SIZE_T = 19;UINT8 = 20;INT8 = 21;
#ifndef LITE_WITH_XPU #if !defined(LITE_WITH_XPU) && !defined(LITE_WITH_HUAWEI_ASCEND_NPU)
TestCast(place, abs_error, 20, 5); TestCast(place, abs_error, 20, 5);
#endif #endif
TestCast(place, abs_error, 2, 5); TestCast(place, abs_error, 2, 5);
#ifdef LITE_WITH_XPU #if defined(LITE_WITH_XPU) || defined(LITE_WITH_HUAWEI_ASCEND_NPU)
TestCast(place, abs_error, 3, 5); TestCast(place, abs_error, 3, 5);
TestCast(place, abs_error, 5, 3); TestCast(place, abs_error, 5, 3);
#endif #endif
......
...@@ -96,6 +96,9 @@ TEST(Gather, precision) { ...@@ -96,6 +96,9 @@ TEST(Gather, precision) {
#if defined(LITE_WITH_NPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
......
...@@ -152,6 +152,9 @@ TEST(LayerNorm, precision) { ...@@ -152,6 +152,9 @@ TEST(LayerNorm, precision) {
#elif defined(LITE_WITH_NPU) #elif defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; abs_error = 1e-2;
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
abs_error = 6e-5; abs_error = 6e-5;
......
...@@ -118,6 +118,9 @@ TEST(LookupTable, precision) { ...@@ -118,6 +118,9 @@ TEST(LookupTable, precision) {
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place = TARGET(kXPU); place = TARGET(kXPU);
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#else #else
return; return;
#endif #endif
......
...@@ -455,6 +455,9 @@ TEST(Matmul2x2, precision) { ...@@ -455,6 +455,9 @@ TEST(Matmul2x2, precision) {
#if defined(LITE_WITH_NPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
...@@ -472,6 +475,9 @@ TEST(Matmul2x2_x_transpose, precision) { ...@@ -472,6 +475,9 @@ TEST(Matmul2x2_x_transpose, precision) {
#if defined(LITE_WITH_NPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#else #else
...@@ -487,6 +493,9 @@ TEST(Matmul2x2_y_transpose, precision) { ...@@ -487,6 +493,9 @@ TEST(Matmul2x2_y_transpose, precision) {
#if defined(LITE_WITH_NPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
...@@ -559,6 +568,9 @@ TEST(Matmulnxn, precision) { ...@@ -559,6 +568,9 @@ TEST(Matmulnxn, precision) {
#if defined(LITE_WITH_NPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU); place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
#else #else
......
...@@ -168,6 +168,9 @@ TEST(Scale, precision) { ...@@ -168,6 +168,9 @@ TEST(Scale, precision) {
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place = TARGET(kXPU); place = TARGET(kXPU);
abs_error = 3e-4; // Some operations use fp16 in XPU abs_error = 3e-4; // Some operations use fp16 in XPU
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_X86) #elif defined(LITE_WITH_X86)
place = TARGET(kX86); place = TARGET(kX86);
#else #else
......
...@@ -271,6 +271,9 @@ TEST(Slice, precision) { ...@@ -271,6 +271,9 @@ TEST(Slice, precision) {
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
Place place(TARGET(kXPU)); Place place(TARGET(kXPU));
test_slice(place); test_slice(place);
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
Place place = TARGET(kHuaweiAscendNPU);
test_slice(place);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册