From 4c3e13de9509d4c854bc5b5ffe8fe8bb692dccfb Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Tue, 2 Aug 2022 14:22:51 +0800 Subject: [PATCH] [XPU] fp16 for layer_norm op (#44778) * [XPU] fp16 for layer_norm op. test=kunlun --- paddle/fluid/operators/layer_norm_op_xpu.cc | 24 ++++++++----------- .../fluid/platform/device/xpu/xpu2_op_list.h | 1 - .../unittests/xpu/test_layer_norm_op_xpu.py | 15 ++++++++---- .../unittests/xpu/test_one_hot_op_xpu.py | 1 - 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/operators/layer_norm_op_xpu.cc b/paddle/fluid/operators/layer_norm_op_xpu.cc index 85ee950a6b3..ddb580f1a1d 100644 --- a/paddle/fluid/operators/layer_norm_op_xpu.cc +++ b/paddle/fluid/operators/layer_norm_op_xpu.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { namespace operators { @@ -48,6 +49,9 @@ class LayerNormXPUKernel : public framework::OpKernel { auto* mean_data = mean->mutable_data(ctx.GetPlace()); auto* variance_data = variance->mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); + + // int layer_norm(Context* ctx, const T* x, T* y, int m, int n, float eps, + // const float* scale, const float* bias, float* mean, float* var); int r = xpu::layer_norm(dev_ctx.x_context(), reinterpret_cast(x_data), reinterpret_cast(y_data), @@ -58,12 +62,7 @@ class LayerNormXPUKernel : public framework::OpKernel { bias_data, mean_data, variance_data); - PADDLE_ENFORCE_EQ(r, - XPU_SUCCESS, - platform::errors::External( - "XPU layer_norm kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm"); } }; @@ -103,6 +102,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel { (dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace())); auto& dev_ctx = ctx.template device_context(); + // int layer_norm_grad(Context* ctx, const T* x, const T* dy, T* dx, int m, + // int n, float eps, const float* scale, const float* mean, const float* + // var, float* dscale, float* dbias); int r = xpu::layer_norm_grad(dev_ctx.x_context(), reinterpret_cast(x_data), reinterpret_cast(dy_data), @@ -115,13 +117,7 @@ class LayerNormGradXPUKernel : public framework::OpKernel { variance_data, dscale_data, dbias_data); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External( - "XPU layer_norm_grad kernel return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm_grad"); } }; diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 036479793f9..22f2d9a60a5 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -266,7 +266,6 @@ XPUOpMap& get_kl2_ops() { {"layer_norm_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, - {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace())})}, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py index 8cab945b459..5bfe9892aa4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -60,14 +60,19 @@ class XPUTestLayerNormOp(XPUOpTestWrapper): self.begin_norm_axis = 1 self.set_attrs() + self.atol = 1e-4 + if self.dtype == np.float16: + self.atol = 1e-2 + right = reduce(mul, self.shape[self.begin_norm_axis:len(self.shape)], 1) np.random.seed(10) x_np = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) - scale_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype) - bias_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype) + scale_np = np.random.uniform(0.1, 1, [right]).astype('float32') + bias_np = np.random.uniform(0.1, 1, [right]).astype('float32') ref_y_np, ref_mean_np, ref_variance_np = ref_layer_norm( x_np, scale_np, bias_np, self.epsilon, self.begin_norm_axis) + ref_y_np = ref_y_np.astype(self.dtype) self.inputs = {'X': x_np, 'Scale': scale_np, 'Bias': bias_np} self.outputs = { @@ -84,12 +89,12 @@ class XPUTestLayerNormOp(XPUOpTestWrapper): pass def test_check_output(self): - self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4) + self.check_output_with_place(paddle.XPUPlace(0), atol=self.atol) def test_check_grad(self): self.check_grad_with_place(paddle.XPUPlace(0), ['X'], 'Y', - max_relative_error=0.02) + max_relative_error=self.atol) class TestXPULayerNormOpAxis2(TestXPULayerNormOp): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py index 258168b1da8..2b092c5815b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py @@ -121,7 +121,6 @@ class XPUTestOneHotOP(XPUOpTestWrapper): support_types = get_xpu_op_support_types('one_hot') -print("support_types: %s" % str(support_types)) for stype in support_types: create_test_class(globals(), XPUTestOneHotOP, stype) -- GitLab