[XPU] fp16 for layer_norm op (#44778)

* [XPU] fp16 for layer_norm op. test=kunlun

[XPU] fp16 for layer_norm op (#44778)
* [XPU] fp16 for layer_norm op. test=kunlun
4c3e13de · houj04 · GitHub · c3d4a3d8 · 4c3e13de · 4c3e13de
4 changed file
--- a/paddle/fluid/operators/layer_norm_op_xpu.cc
+++ b/paddle/fluid/operators/layer_norm_op_xpu.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/device/device_wrapper.h"
 namespace paddle {
 namespace operators {
@@ -48,6 +49,9 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
    auto* mean_data = mean->mutable_data<float>(ctx.GetPlace());
    auto* variance_data = variance->mutable_data<float>(ctx.GetPlace());
    auto& dev_ctx = ctx.template device_context<DeviceContext>();
+    // int layer_norm(Context* ctx, const T* x, T* y, int m, int n, float eps,
+    // const float* scale, const float* bias, float* mean, float* var);
    int r = xpu::layer_norm(dev_ctx.x_context(),
                            reinterpret_cast<const XPUType*>(x_data),
                            reinterpret_cast<XPUType*>(y_data),
@@ -58,12 +62,7 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
                            bias_data,
                            mean_data,
                            variance_data);
-    PADDLE_ENFORCE_EQ(r,
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm");
-                      XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU layer_norm kernel return wrong value[%d %s]",
-                          r,
-                          XPUAPIErrorMsg[r]));
  }
 };
@@ -103,6 +102,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
        (dx == nullptr ? nullptr : dx->mutable_data<T>(ctx.GetPlace()));
    auto& dev_ctx = ctx.template device_context<DeviceContext>();
+    // int layer_norm_grad(Context* ctx, const T* x, const T* dy, T* dx, int m,
+    // int n, float eps, const float* scale, const float* mean, const float*
+    // var, float* dscale, float* dbias);
    int r = xpu::layer_norm_grad(dev_ctx.x_context(),
                                 reinterpret_cast<const XPUType*>(x_data),
                                 reinterpret_cast<const XPUType*>(dy_data),
@@ -115,13 +117,7 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
                                 variance_data,
                                 dscale_data,
                                 dbias_data);
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm_grad");
-        r,
-        XPU_SUCCESS,
-        platform::errors::External(
-            "XPU layer_norm_grad kernel return wrong value[%d %s]",
-            r,
-            XPUAPIErrorMsg[r]));
  }
 };

--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -266,7 +266,6 @@ XPUOpMap& get_kl2_ops() {
      {"layer_norm_grad",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                     pOpKernelType(vartype::FP16, XPUPlace())})},
-      {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
      {"layer_norm",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                     pOpKernelType(vartype::FP16, XPUPlace())})},

--- a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -60,14 +60,19 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
            self.begin_norm_axis = 1
            self.set_attrs()
+            self.atol = 1e-4
+            if self.dtype == np.float16:
+                self.atol = 1e-2
            right = reduce(mul,
                           self.shape[self.begin_norm_axis:len(self.shape)], 1)
            np.random.seed(10)
            x_np = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
-            scale_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype)
+            scale_np = np.random.uniform(0.1, 1, [right]).astype('float32')
-            bias_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype)
+            bias_np = np.random.uniform(0.1, 1, [right]).astype('float32')
            ref_y_np, ref_mean_np, ref_variance_np = ref_layer_norm(
                x_np, scale_np, bias_np, self.epsilon, self.begin_norm_axis)
+            ref_y_np = ref_y_np.astype(self.dtype)
            self.inputs = {'X': x_np, 'Scale': scale_np, 'Bias': bias_np}
            self.outputs = {
@@ -84,12 +89,12 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
            pass
        def test_check_output(self):
-            self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4)
+            self.check_output_with_place(paddle.XPUPlace(0), atol=self.atol)
        def test_check_grad(self):
            self.check_grad_with_place(paddle.XPUPlace(0), ['X'],
                                       'Y',
-                                       max_relative_error=0.02)
+                                       max_relative_error=self.atol)
    class TestXPULayerNormOpAxis2(TestXPULayerNormOp):

--- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py
@@ -121,7 +121,6 @@ class XPUTestOneHotOP(XPUOpTestWrapper):
 support_types = get_xpu_op_support_types('one_hot')
-print("support_types: %s" % str(support_types))
 for stype in support_types:
    create_test_class(globals(), XPUTestOneHotOP, stype)