提交 974c50db 编写于 作者: H hong19860320 提交者: GitHub

[LITE][NPU] Add layer_norm op bridge (#2767)

上级 789accae
...@@ -42,6 +42,7 @@ lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu ...@@ -42,6 +42,7 @@ lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu
lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_instance_norm_op_npu SRCS instance_norm_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_instance_norm_op_npu SRCS instance_norm_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_layer_norm_op_npu SRCS layer_norm_op.cc DEPS ${npu_subgraph_bridge_deps})
set(npu_subgraph_bridges set(npu_subgraph_bridges
subgraph_bridge_registry subgraph_bridge_registry
...@@ -71,6 +72,7 @@ set(npu_subgraph_bridges ...@@ -71,6 +72,7 @@ set(npu_subgraph_bridges
subgraph_bridge_argmax_op_npu subgraph_bridge_argmax_op_npu
subgraph_bridge_instance_norm_op_npu subgraph_bridge_instance_norm_op_npu
subgraph_bridge_dropout_op_npu subgraph_bridge_dropout_op_npu
subgraph_bridge_layer_norm_op_npu
CACHE INTERNAL "npu_subgraph_bridges") CACHE INTERNAL "npu_subgraph_bridges")
message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}") message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}")
...@@ -82,7 +82,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -82,7 +82,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
} else { } else {
if (!bias->persistable()) { if (!bias->persistable()) {
LOG(WARNING) << "[NPU] Only supporting persistable bias tensor."; LOG(WARNING) << "[NPU] Only supporting persistable bias tensor.";
bias->set_persistable(true); return FAILED;
} }
bias_node = graph->Add(bias_name, *bias, scale_bias_dims); bias_node = graph->Add(bias_name, *bias, scale_bias_dims);
} }
...@@ -108,7 +108,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -108,7 +108,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(channel_size, scale_dims.production()); CHECK_EQ(channel_size, scale_dims.production());
if (!scale->persistable()) { if (!scale->persistable()) {
LOG(WARNING) << "[NPU] Only supporting persistable scale tensor."; LOG(WARNING) << "[NPU] Only supporting persistable scale tensor.";
scale->set_persistable(true); return FAILED;
} }
scale_node = graph->Add(scale_name, *scale, scale_bias_dims); scale_node = graph->Add(scale_name, *scale, scale_bias_dims);
} else { } else {
...@@ -121,8 +121,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -121,8 +121,7 @@ int InstanceNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
instance_norm_op->set_input_x(*x_node->data()); instance_norm_op->set_input_x(*x_node->data());
instance_norm_op->set_input_scale(*scale_node->data()); instance_norm_op->set_input_scale(*scale_node->data());
instance_norm_op->set_input_bias(*bias_node->data()); instance_norm_op->set_input_bias(*bias_node->data());
instance_norm_op->set_attr_reduction_indices( instance_norm_op->set_attr_reduction_indices(ge::AttrValue::LIST_INT({2}));
ge::AttrValue::LIST_INT({0, 1, 2}));
instance_norm_op->set_attr_epsilon(epsilon); instance_norm_op->set_attr_epsilon(epsilon);
return SUCCESS; return SUCCESS;
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto padded_x_shape = CvtShape(x_dims);
auto x_rank = static_cast<int>(x_dims.size());
CHECK(x_rank >= 2 && x_rank <= 4);
auto y_name = op_info->Output("Y").front();
auto y_type = kernel->GetOutputDeclType("Y");
CHECK(y_type->precision() == PRECISION(kFloat));
CHECK(y_type->layout() == DATALAYOUT(kNCHW));
auto y = scope->FindMutableTensor(y_name);
auto y_dims = y->dims();
auto padded_y_shape = CvtShape(y_dims);
auto epsilon = op_info->GetAttr<float>("epsilon");
auto begin_norm_axis = op_info->GetAttr<int>("begin_norm_axis");
if (begin_norm_axis < 0) {
begin_norm_axis += x_rank;
}
CHECK(begin_norm_axis >= 1 && begin_norm_axis < x_rank);
auto x_mat_dims = x_dims.Flatten2D(begin_norm_axis);
auto left = x_mat_dims[0];
auto right = x_mat_dims[1];
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x, padded_x_shape);
}
// Reshaped X node if needs
bool reshape = false;
if (!(x_rank == 4 && begin_norm_axis == 1)) {
reshape = true;
// Only the input shape 4-D(n, c, h, w) and axis=1 is supported
// by HiAI DDK, So the input shape need to be padded to 4-D if it is less
// than 4 or axis!=1. For example:
// (1) (n, c, h, w), axis=1 -> no need
// (2) (n, c, h, w), axis=2 -> (n * c, h, w, 1)
// (3) (n, c, h, w), axis=3 -> (n * c * h, w, 1)
// (4) (n, h, w), axis=1 -> (n, h, w, 1)
// (5) (n, h, w), axis=2 -> (n * h, w, 1, 1)
// (6) (h, w), axis=1 -> (h, w, 1, 1)
padded_x_shape = {left};
for (int i = begin_norm_axis; i < x_rank; i++) {
padded_x_shape.push_back(x_dims[i]);
}
auto remain = 4 - padded_x_shape.size();
for (int i = 0; i < remain; i++) {
padded_x_shape.push_back(1);
}
auto reshaped_x_node = graph->Add<ge::op::Reshape>(
x_name + "/reshape", x_node->precision(), x_node->layout());
auto reshaped_x_op = reshaped_x_node->data<ge::op::Reshape>();
reshaped_x_op->set_input_tensor(*x_node->data());
reshaped_x_op->set_attr_shape(padded_x_shape);
x_node = reshaped_x_node;
}
// Bias node
auto scale_bias_dims =
DDim({1, padded_x_shape[1], padded_x_shape[2], padded_x_shape[3]});
std::shared_ptr<Node> bias_node = nullptr;
if (HasInputArg(op_info, scope, "Bias")) {
auto bias_name = op_info->Input("Bias").front();
auto bias_type = kernel->GetInputDeclType("Bias");
CHECK(bias_type->precision() == PRECISION(kFloat));
CHECK(bias_type->layout() == DATALAYOUT(kNCHW));
auto bias = scope->FindMutableTensor(bias_name);
auto bias_dims = bias->dims();
CHECK_EQ(bias_dims.size(), 1);
CHECK_EQ(bias_dims.production(), right);
if (!bias->persistable()) {
LOG(WARNING) << "[NPU] Only supporting persistable bias tensor.";
return FAILED;
}
bias_node = graph->Add(bias_name, *bias, scale_bias_dims);
} else {
bias_node = graph->Add(y_name + "/bias", 0.0f, scale_bias_dims);
}
// Scale node
std::shared_ptr<Node> scale_node = nullptr;
if (HasInputArg(op_info, scope, "Scale")) {
auto scale_name = op_info->Input("Scale").front();
auto scale_type = kernel->GetInputDeclType("Scale");
CHECK(scale_type->precision() == PRECISION(kFloat));
CHECK(scale_type->layout() == DATALAYOUT(kNCHW));
auto scale = scope->FindMutableTensor(scale_name);
auto scale_dims = scale->dims();
CHECK_EQ(scale_dims.size(), 1);
CHECK_EQ(scale_dims.production(), right);
if (!scale->persistable()) {
LOG(WARNING) << "[NPU] Only supporting persistable scale tensor.";
return FAILED;
}
scale_node = graph->Add(scale_name, *scale, scale_bias_dims);
} else {
scale_node = graph->Add(y_name + "/scale", 1.0f, scale_bias_dims);
}
// LayerNorm node
auto layer_norm_node = graph->Add<ge::op::InstanceNorm>(y_name);
auto layer_norm_op = layer_norm_node->data<ge::op::InstanceNorm>();
layer_norm_op->set_input_x(*x_node->data());
layer_norm_op->set_input_scale(*scale_node->data());
layer_norm_op->set_input_bias(*bias_node->data());
layer_norm_op->set_attr_reduction_indices(ge::AttrValue::LIST_INT({3}));
layer_norm_op->set_attr_epsilon(epsilon);
// Reshaped Y node if needs
if (reshape) {
auto reshaped_y_node = graph->Add<ge::op::Reshape>(
y_name, layer_norm_node->precision(), layer_norm_node->layout());
auto reshaped_y_op = reshaped_y_node->data<ge::op::Reshape>();
reshaped_y_op->set_input_tensor(*layer_norm_node->data());
reshaped_y_op->set_attr_shape(padded_y_shape);
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(layer_norm,
kNPU,
paddle::lite::subgraph::npu::LayerNormConverter);
...@@ -55,3 +55,4 @@ USE_SUBGRAPH_BRIDGE(transpose2, kNPU); ...@@ -55,3 +55,4 @@ USE_SUBGRAPH_BRIDGE(transpose2, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze, kNPU); USE_SUBGRAPH_BRIDGE(unsqueeze, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze2, kNPU); USE_SUBGRAPH_BRIDGE(unsqueeze2, kNPU);
USE_SUBGRAPH_BRIDGE(instance_norm, kNPU); USE_SUBGRAPH_BRIDGE(instance_norm, kNPU);
USE_SUBGRAPH_BRIDGE(layer_norm, kNPU);
...@@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ ...@@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
...@@ -122,8 +122,8 @@ class InstanceNormComputeTest : public arena::TestCase { ...@@ -122,8 +122,8 @@ class InstanceNormComputeTest : public arena::TestCase {
fill_data_rand(bias.data(), -1.f, 1.f, scale_bias_dims.production()); fill_data_rand(bias.data(), -1.f, 1.f, scale_bias_dims.production());
SetCommonTensor(x_, dims_, x.data()); SetCommonTensor(x_, dims_, x.data());
SetCommonTensor(scale_, scale_bias_dims, scale.data()); SetCommonTensor(scale_, scale_bias_dims, scale.data(), {}, true);
SetCommonTensor(bias_, scale_bias_dims, bias.data()); SetCommonTensor(bias_, scale_bias_dims, bias.data(), {}, true);
} }
}; };
......
...@@ -132,13 +132,13 @@ class LayerNormComputeTest : public arena::TestCase { ...@@ -132,13 +132,13 @@ class LayerNormComputeTest : public arena::TestCase {
DDim scale_dims({scale_bias_size}); DDim scale_dims({scale_bias_size});
std::vector<float> scale(scale_bias_size); std::vector<float> scale(scale_bias_size);
fill_data_rand(scale.data(), -1.f, 1.f, scale_bias_size); fill_data_rand(scale.data(), -1.f, 1.f, scale_bias_size);
SetCommonTensor(scale_, scale_dims, scale.data()); SetCommonTensor(scale_, scale_dims, scale.data(), {}, true);
} }
if (has_bias_) { if (has_bias_) {
DDim bias_dims({scale_bias_size}); DDim bias_dims({scale_bias_size});
std::vector<float> bias(scale_bias_size); std::vector<float> bias(scale_bias_size);
fill_data_rand(bias.data(), -1.f, 1.f, scale_bias_size); fill_data_rand(bias.data(), -1.f, 1.f, scale_bias_size);
SetCommonTensor(bias_, bias_dims, bias.data()); SetCommonTensor(bias_, bias_dims, bias.data(), {}, true);
} }
} }
}; };
...@@ -149,6 +149,9 @@ TEST(LayerNorm, precision) { ...@@ -149,6 +149,9 @@ TEST(LayerNorm, precision) {
Place place; Place place;
#if defined(LITE_WITH_XPU) #if defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
#elif defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2;
#elif defined(LITE_WITH_ARM) #elif defined(LITE_WITH_ARM)
place = TARGET(kARM); place = TARGET(kARM);
abs_error = 6e-5; abs_error = 6e-5;
...@@ -157,7 +160,7 @@ TEST(LayerNorm, precision) { ...@@ -157,7 +160,7 @@ TEST(LayerNorm, precision) {
#endif #endif
for (auto dims : for (auto dims :
std::vector<std::vector<int64_t>>{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}}) { std::vector<std::vector<int64_t>>{{2, 3, 4, 5}, {3, 4, 5}, {4, 5}}) {
for (auto epsilon : {1e-5f}) { for (auto epsilon : {1e-5f}) {
for (auto axis : {1, 2, 3}) { for (auto axis : {1, 2, 3}) {
for (bool has_bias : {true, false}) { for (bool has_bias : {true, false}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册