diff --git a/lite/kernels/xpu/bridges/CMakeLists.txt b/lite/kernels/xpu/bridges/CMakeLists.txt index 339eb5976f30ca5dfced09e19815b0f7a014b5c1..84984d6c498e6cec69627b97ee07a2060bd2f361 100644 --- a/lite/kernels/xpu/bridges/CMakeLists.txt +++ b/lite/kernels/xpu/bridges/CMakeLists.txt @@ -16,6 +16,7 @@ lite_cc_library(subgraph_bridge_mul_op_xpu SRCS mul_op.cc DEPS ${xpu_subgraph_br lite_cc_library(subgraph_bridge_batch_norm_op_xpu SRCS batch_norm_op.cc DEPS ${xpu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_transpose_op_xpu SRCS transpose_op.cc DEPS ${xpu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_reshape_op_xpu SRCS reshape_op.cc DEPS ${xpu_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_layer_norm_op_xpu SRCS layer_norm_op.cc DEPS ${xpu_subgraph_bridge_deps}) set(xpu_subgraph_bridges subgraph_bridge_registry @@ -30,6 +31,7 @@ set(xpu_subgraph_bridges subgraph_bridge_batch_norm_op_xpu subgraph_bridge_transpose_op_xpu subgraph_bridge_reshape_op_xpu + subgraph_bridge_layer_norm_op_xpu CACHE INTERNAL "xpu_subgraph_bridges") message(STATUS "+++++ xpu_subgraph_bridges: ${xpu_subgraph_bridges}") diff --git a/lite/kernels/xpu/bridges/layer_norm_op.cc b/lite/kernels/xpu/bridges/layer_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..68dcab188868363393ee7f81d3e6977e3363cafc --- /dev/null +++ b/lite/kernels/xpu/bridges/layer_norm_op.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/npu/bridges/registry.h" +#include "lite/kernels/xpu/bridges/graph.h" +#include "lite/kernels/xpu/bridges/utility.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace xpu { + +int LayerNormConverter(void* ctx, OpLite* op) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[XPU] Converting " + op_type + "..."; + + // Get input vars and op attributes + auto x_var_name = op_info->Input("X").front(); + + auto scale_var_name = op_info->Input("Scale").front(); + auto* scale = scope->FindMutableTensor(scale_var_name); + auto bias_var_name = op_info->Input("Bias").front(); + auto* bias = scope->FindMutableTensor(bias_var_name); + + auto y_var_name = op_info->Output("Y").front(); + auto epsilon = op_info->GetAttr("epsilon"); + auto axis = op_info->GetAttr("begin_norm_axis"); + + // Create scale, bias nodes + auto scale_const_node = graph->AddNode(scale_var_name, *scale); + auto bias_const_node = graph->AddNode(bias_var_name, *bias); + + // Create node and set params from op + auto layer_norm_node = + graph->builder_.CreateLayerNorm(*graph->GetNode(x_var_name), + *scale_const_node, + *bias_const_node, + axis, + epsilon, + true, + true); + graph->AddNode(y_var_name, graph->builder_.GetField(layer_norm_node, 0)); + return SUCCESS; +} + +} // namespace xpu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(XPU, + layer_norm, + paddle::lite::subgraph::xpu::LayerNormConverter); diff --git a/lite/operators/layer_norm_op.cc b/lite/operators/layer_norm_op.cc index 061355733c9a6722fcca4ba01af81981d2b5c9ac..18ea6cbf281846600273d6e7d462ed43f2e45637 100644 --- a/lite/operators/layer_norm_op.cc +++ b/lite/operators/layer_norm_op.cc @@ -30,7 +30,7 @@ bool LayerNormOp::CheckShape() const { bool LayerNormOp::InferShape() const { auto out_dims = param_.X->dims(); param_.Y->Resize(out_dims); - auto inner_size = out_dims.Flatten2D(param_.begin_norm_axis)[1]; + auto inner_size = out_dims.Flatten2D(param_.begin_norm_axis)[0]; param_.Mean->Resize(std::vector({inner_size})); param_.Variance->Resize(std::vector({inner_size})); diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index f8fbb732674a37256b959b182df4a3a859b3999f..dbe85f17f71c52127b0feffe79167f3d979ed2dd 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -26,6 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) if(LITE_BUILD_EXTRA) lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/layer_norm_compute_test.cc b/lite/tests/kernels/layer_norm_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5bb122e7b6c762f47d89ddc2bfbd1663f430298c --- /dev/null +++ b/lite/tests/kernels/layer_norm_compute_test.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" + +namespace paddle { +namespace lite { + +class LayerNormComputeTest : public arena::TestCase { + protected: + // common attributes for this op. + std::string op_type_ = "layer_norm"; + std::string input_ = "x"; + std::string scale_ = "scale"; + std::string bias_ = "bias"; + std::string output_ = "y"; + std::string mean_ = "mean"; + std::string variance_ = "variance"; + DDim dims_{{4, 5, 19, 19}}; + float epsilon_ = 1e-5f; + int begin_norm_axis_ = 1; + bool has_bias_ = true; + bool has_scale_ = true; + + public: + LayerNormComputeTest(const Place& place, + const std::string& alias, + DDim dims, + float epsilon, + int begin_norm_axis, + bool has_bias, + bool has_scale) + : TestCase(place, alias), + dims_(dims), + epsilon_(epsilon), + begin_norm_axis_(begin_norm_axis), + has_bias_(has_bias), + has_scale_(has_scale) {} + + void RunBaseline(Scope* scope) override { + auto x = scope->FindTensor(input_); + auto scale = scope->FindTensor(scale_); + auto bias = scope->FindTensor(bias_); + + auto y = scope->NewTensor(output_); + auto mean = scope->NewTensor(mean_); + auto variance = scope->NewTensor(variance_); + CHECK(y); + CHECK(mean); + CHECK(variance); + y->Resize(dims_); + + auto matrix_dim = dims_.Flatten2D(begin_norm_axis_); + int batch_size = matrix_dim[0]; + int feature_size = matrix_dim[1]; + mean->Resize(std::vector{batch_size}); + variance->Resize(std::vector{batch_size}); + + auto* x_data = x->data(); + auto* scale_data = (scale == nullptr ? nullptr : scale->data()); + auto* bias_data = (bias == nullptr ? nullptr : bias->data()); + auto* out_data = y->mutable_data(); + auto* mean_data = mean->mutable_data(); + auto* variance_data = variance->mutable_data(); + + for (int i = 0; i < batch_size; ++i) { + int start = i * feature_size; + int end = start + feature_size; + + float mean_t = 0; + float variance_t = 0; + for (int j = start; j < end; ++j) { + mean_t += x_data[j]; + variance_t += x_data[j] * x_data[j]; + } + mean_t /= feature_size; + variance_t = variance_t / feature_size - mean_t * mean_t; + mean_data[i] = mean_t; + variance_data[i] = variance_t; + variance_t = sqrt(variance_t + epsilon_); + for (int j = start; j < end; ++j) { + out_data[j] = (x_data[j] - mean_t) / variance_t; + if (scale_data) { + out_data[j] *= scale_data[j - start]; + } + if (bias_data) { + out_data[j] += bias_data[j - start]; + } + } + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType(op_type_); + op_desc->SetInput("X", {input_}); + op_desc->SetInput("Bias", {bias_}); + op_desc->SetInput("Scale", {scale_}); + op_desc->SetOutput("Y", {output_}); + op_desc->SetOutput("Mean", {mean_}); + op_desc->SetOutput("Variance", {variance_}); + op_desc->SetAttr("epsilon", epsilon_); + op_desc->SetAttr("begin_norm_axis", begin_norm_axis_); + } + + void PrepareData() override { + std::vector din(dims_.production()); + fill_data_rand(din.data(), -1.f, 1.f, dims_.production()); + + std::vector scale_v; + for (size_t i = begin_norm_axis_; i < dims_.size(); i++) { + scale_v.push_back(dims_[i]); + } + DDim scale_dim(scale_v); + std::vector scale(scale_dim.production()); + fill_data_rand(scale.data(), -1.f, 1.f, scale_dim.production()); + + std::vector bias(scale_dim.production()); + fill_data_rand(bias.data(), -1.f, 1.f, scale_dim.production()); + + SetCommonTensor(input_, dims_, din.data()); + SetCommonTensor(scale_, scale_dim, scale.data()); + SetCommonTensor(bias_, scale_dim, bias.data()); + } +}; + +TEST(LayerNorm, precision) { + LOG(INFO) << "test layer_norm op"; + float abs_error = 2e-5; + Place place; +#if defined(LITE_WITH_XPU) + place = TARGET(kXPU); +#elif defined(LITE_WITH_ARM) + place = TARGET(kARM); + abs_error = 6e-5; +#else + return; +#endif + + std::vector> dims{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}}; + for (auto dim_in : dims) { + for (auto epsilon : {1e-5f}) { + for (auto axis : {0, 1, 2, 3}) { + for (bool has_bias : {true, false}) { + for (bool has_scale : {true, false}) { + if (axis >= dim_in.size()) continue; + std::unique_ptr tester( + new LayerNormComputeTest(place, + "def", + DDim(dim_in), + epsilon, + axis, + has_bias, + has_scale)); +#ifdef LITE_WITH_ARM + auto& ctx = tester->context()->As(); + ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 4); +#endif + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision({"mean", "variance"}); + } + } + } + } + } +} + +} // namespace lite +} // namespace paddle