From 4dd6a4b8e1c5fee9c699bf8b114502174a9353df Mon Sep 17 00:00:00 2001
From: zhupengyang <zhu_py@qq.com>
Date: Sat, 21 Dec 2019 12:26:17 +0800
Subject: [PATCH] [XPU] add layer_norm bridge and unit test (#2640)

test=develop
---
 lite/kernels/xpu/bridges/CMakeLists.txt       |   2 +
 lite/kernels/xpu/bridges/layer_norm_op.cc     |  69 +++++++
 lite/operators/layer_norm_op.cc               |   2 +-
 lite/tests/kernels/CMakeLists.txt             |   1 +
 lite/tests/kernels/layer_norm_compute_test.cc | 183 ++++++++++++++++++
 5 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 lite/kernels/xpu/bridges/layer_norm_op.cc
 create mode 100644 lite/tests/kernels/layer_norm_compute_test.cc
diff --git a/lite/kernels/xpu/bridges/CMakeLists.txt b/lite/kernels/xpu/bridges/CMakeLists.txt
index 339eb5976f..84984d6c49 100644
--- a/lite/kernels/xpu/bridges/CMakeLists.txt
+++ b/lite/kernels/xpu/bridges/CMakeLists.txt
@@ -16,6 +16,7 @@ lite_cc_library(subgraph_bridge_mul_op_xpu SRCS mul_op.cc DEPS ${xpu_subgraph_br
 lite_cc_library(subgraph_bridge_batch_norm_op_xpu SRCS batch_norm_op.cc DEPS ${xpu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_transpose_op_xpu SRCS transpose_op.cc DEPS ${xpu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_reshape_op_xpu SRCS reshape_op.cc DEPS ${xpu_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_layer_norm_op_xpu SRCS layer_norm_op.cc DEPS ${xpu_subgraph_bridge_deps})
 
 set(xpu_subgraph_bridges
         subgraph_bridge_registry
@@ -30,6 +31,7 @@ set(xpu_subgraph_bridges
         subgraph_bridge_batch_norm_op_xpu
         subgraph_bridge_transpose_op_xpu
         subgraph_bridge_reshape_op_xpu
+        subgraph_bridge_layer_norm_op_xpu
         CACHE INTERNAL "xpu_subgraph_bridges")
 
 message(STATUS "+++++ xpu_subgraph_bridges: ${xpu_subgraph_bridges}")
diff --git a/lite/kernels/xpu/bridges/layer_norm_op.cc b/lite/kernels/xpu/bridges/layer_norm_op.cc
new file mode 100644
index 0000000000..68dcab1888
--- /dev/null
+++ b/lite/kernels/xpu/bridges/layer_norm_op.cc
@@ -0,0 +1,69 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/npu/bridges/registry.h"
+#include "lite/kernels/xpu/bridges/graph.h"
+#include "lite/kernels/xpu/bridges/utility.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace xpu {
+
+int LayerNormConverter(void* ctx, OpLite* op) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[XPU] Converting " + op_type + "...";
+
+  // Get input vars and op attributes
+  auto x_var_name = op_info->Input("X").front();
+
+  auto scale_var_name = op_info->Input("Scale").front();
+  auto* scale = scope->FindMutableTensor(scale_var_name);
+  auto bias_var_name = op_info->Input("Bias").front();
+  auto* bias = scope->FindMutableTensor(bias_var_name);
+
+  auto y_var_name = op_info->Output("Y").front();
+  auto epsilon = op_info->GetAttr<float>("epsilon");
+  auto axis = op_info->GetAttr<int>("begin_norm_axis");
+
+  // Create scale, bias nodes
+  auto scale_const_node = graph->AddNode(scale_var_name, *scale);
+  auto bias_const_node = graph->AddNode(bias_var_name, *bias);
+
+  // Create node and set params from op
+  auto layer_norm_node =
+      graph->builder_.CreateLayerNorm(*graph->GetNode(x_var_name),
+                                      *scale_const_node,
+                                      *bias_const_node,
+                                      axis,
+                                      epsilon,
+                                      true,
+                                      true);
+  graph->AddNode(y_var_name, graph->builder_.GetField(layer_norm_node, 0));
+  return SUCCESS;
+}
+
+}  // namespace xpu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(XPU,
+                         layer_norm,
+                         paddle::lite::subgraph::xpu::LayerNormConverter);
diff --git a/lite/operators/layer_norm_op.cc b/lite/operators/layer_norm_op.cc
index 061355733c..18ea6cbf28 100644
--- a/lite/operators/layer_norm_op.cc
+++ b/lite/operators/layer_norm_op.cc
@@ -30,7 +30,7 @@ bool LayerNormOp::CheckShape() const {
 bool LayerNormOp::InferShape() const {
   auto out_dims = param_.X->dims();
   param_.Y->Resize(out_dims);
-  auto inner_size = out_dims.Flatten2D(param_.begin_norm_axis)[1];
+  auto inner_size = out_dims.Flatten2D(param_.begin_norm_axis)[0];
   param_.Mean->Resize(std::vector<int64_t>({inner_size}));
   param_.Variance->Resize(std::vector<int64_t>({inner_size}));
 
diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt
index f8fbb73267..dbe85f17f7 100644
--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -26,6 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
     lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
 
 if(LITE_BUILD_EXTRA)
     lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
diff --git a/lite/tests/kernels/layer_norm_compute_test.cc b/lite/tests/kernels/layer_norm_compute_test.cc
new file mode 100644
index 0000000000..5bb122e7b6
--- /dev/null
+++ b/lite/tests/kernels/layer_norm_compute_test.cc
@@ -0,0 +1,183 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/core/arena/framework.h"
+#include "lite/tests/utils/fill_data.h"
+
+namespace paddle {
+namespace lite {
+
+class LayerNormComputeTest : public arena::TestCase {
+ protected:
+  // common attributes for this op.
+  std::string op_type_ = "layer_norm";
+  std::string input_ = "x";
+  std::string scale_ = "scale";
+  std::string bias_ = "bias";
+  std::string output_ = "y";
+  std::string mean_ = "mean";
+  std::string variance_ = "variance";
+  DDim dims_{{4, 5, 19, 19}};
+  float epsilon_ = 1e-5f;
+  int begin_norm_axis_ = 1;
+  bool has_bias_ = true;
+  bool has_scale_ = true;
+
+ public:
+  LayerNormComputeTest(const Place& place,
+                       const std::string& alias,
+                       DDim dims,
+                       float epsilon,
+                       int begin_norm_axis,
+                       bool has_bias,
+                       bool has_scale)
+      : TestCase(place, alias),
+        dims_(dims),
+        epsilon_(epsilon),
+        begin_norm_axis_(begin_norm_axis),
+        has_bias_(has_bias),
+        has_scale_(has_scale) {}
+
+  void RunBaseline(Scope* scope) override {
+    auto x = scope->FindTensor(input_);
+    auto scale = scope->FindTensor(scale_);
+    auto bias = scope->FindTensor(bias_);
+
+    auto y = scope->NewTensor(output_);
+    auto mean = scope->NewTensor(mean_);
+    auto variance = scope->NewTensor(variance_);
+    CHECK(y);
+    CHECK(mean);
+    CHECK(variance);
+    y->Resize(dims_);
+
+    auto matrix_dim = dims_.Flatten2D(begin_norm_axis_);
+    int batch_size = matrix_dim[0];
+    int feature_size = matrix_dim[1];
+    mean->Resize(std::vector<int64_t>{batch_size});
+    variance->Resize(std::vector<int64_t>{batch_size});
+
+    auto* x_data = x->data<float>();
+    auto* scale_data = (scale == nullptr ? nullptr : scale->data<float>());
+    auto* bias_data = (bias == nullptr ? nullptr : bias->data<float>());
+    auto* out_data = y->mutable_data<float>();
+    auto* mean_data = mean->mutable_data<float>();
+    auto* variance_data = variance->mutable_data<float>();
+
+    for (int i = 0; i < batch_size; ++i) {
+      int start = i * feature_size;
+      int end = start + feature_size;
+
+      float mean_t = 0;
+      float variance_t = 0;
+      for (int j = start; j < end; ++j) {
+        mean_t += x_data[j];
+        variance_t += x_data[j] * x_data[j];
+      }
+      mean_t /= feature_size;
+      variance_t = variance_t / feature_size - mean_t * mean_t;
+      mean_data[i] = mean_t;
+      variance_data[i] = variance_t;
+      variance_t = sqrt(variance_t + epsilon_);
+      for (int j = start; j < end; ++j) {
+        out_data[j] = (x_data[j] - mean_t) / variance_t;
+        if (scale_data) {
+          out_data[j] *= scale_data[j - start];
+        }
+        if (bias_data) {
+          out_data[j] += bias_data[j - start];
+        }
+      }
+    }
+  }
+
+  void PrepareOpDesc(cpp::OpDesc* op_desc) {
+    op_desc->SetType(op_type_);
+    op_desc->SetInput("X", {input_});
+    op_desc->SetInput("Bias", {bias_});
+    op_desc->SetInput("Scale", {scale_});
+    op_desc->SetOutput("Y", {output_});
+    op_desc->SetOutput("Mean", {mean_});
+    op_desc->SetOutput("Variance", {variance_});
+    op_desc->SetAttr("epsilon", epsilon_);
+    op_desc->SetAttr("begin_norm_axis", begin_norm_axis_);
+  }
+
+  void PrepareData() override {
+    std::vector<float> din(dims_.production());
+    fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
+
+    std::vector<int64_t> scale_v;
+    for (size_t i = begin_norm_axis_; i < dims_.size(); i++) {
+      scale_v.push_back(dims_[i]);
+    }
+    DDim scale_dim(scale_v);
+    std::vector<float> scale(scale_dim.production());
+    fill_data_rand(scale.data(), -1.f, 1.f, scale_dim.production());
+
+    std::vector<float> bias(scale_dim.production());
+    fill_data_rand(bias.data(), -1.f, 1.f, scale_dim.production());
+
+    SetCommonTensor(input_, dims_, din.data());
+    SetCommonTensor(scale_, scale_dim, scale.data());
+    SetCommonTensor(bias_, scale_dim, bias.data());
+  }
+};
+
+TEST(LayerNorm, precision) {
+  LOG(INFO) << "test layer_norm op";
+  float abs_error = 2e-5;
+  Place place;
+#if defined(LITE_WITH_XPU)
+  place = TARGET(kXPU);
+#elif defined(LITE_WITH_ARM)
+  place = TARGET(kARM);
+  abs_error = 6e-5;
+#else
+  return;
+#endif
+
+  std::vector<std::vector<int64_t>> dims{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}};
+  for (auto dim_in : dims) {
+    for (auto epsilon : {1e-5f}) {
+      for (auto axis : {0, 1, 2, 3}) {
+        for (bool has_bias : {true, false}) {
+          for (bool has_scale : {true, false}) {
+            if (axis >= dim_in.size()) continue;
+            std::unique_ptr<arena::TestCase> tester(
+                new LayerNormComputeTest(place,
+                                         "def",
+                                         DDim(dim_in),
+                                         epsilon,
+                                         axis,
+                                         has_bias,
+                                         has_scale));
+#ifdef LITE_WITH_ARM
+            auto& ctx = tester->context()->As<ARMContext>();
+            ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 4);
+#endif
+            arena::Arena arena(std::move(tester), place, abs_error);
+            arena.TestPrecision({"mean", "variance"});
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace lite
+}  // namespace paddle
-- 
GitLab