未验证 提交 4c3e13de 编写于 作者: H houj04 提交者: GitHub

[XPU] fp16 for layer_norm op (#44778)

* [XPU] fp16 for layer_norm op. test=kunlun
上级 c3d4a3d8
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -48,6 +49,9 @@ class LayerNormXPUKernel : public framework::OpKernel<T> { ...@@ -48,6 +49,9 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
auto* mean_data = mean->mutable_data<float>(ctx.GetPlace()); auto* mean_data = mean->mutable_data<float>(ctx.GetPlace());
auto* variance_data = variance->mutable_data<float>(ctx.GetPlace()); auto* variance_data = variance->mutable_data<float>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
// int layer_norm(Context* ctx, const T* x, T* y, int m, int n, float eps,
// const float* scale, const float* bias, float* mean, float* var);
int r = xpu::layer_norm(dev_ctx.x_context(), int r = xpu::layer_norm(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data), reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<XPUType*>(y_data), reinterpret_cast<XPUType*>(y_data),
...@@ -58,12 +62,7 @@ class LayerNormXPUKernel : public framework::OpKernel<T> { ...@@ -58,12 +62,7 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
bias_data, bias_data,
mean_data, mean_data,
variance_data); variance_data);
PADDLE_ENFORCE_EQ(r, PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm");
XPU_SUCCESS,
platform::errors::External(
"XPU layer_norm kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
} }
}; };
...@@ -103,6 +102,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> { ...@@ -103,6 +102,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
(dx == nullptr ? nullptr : dx->mutable_data<T>(ctx.GetPlace())); (dx == nullptr ? nullptr : dx->mutable_data<T>(ctx.GetPlace()));
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
// int layer_norm_grad(Context* ctx, const T* x, const T* dy, T* dx, int m,
// int n, float eps, const float* scale, const float* mean, const float*
// var, float* dscale, float* dbias);
int r = xpu::layer_norm_grad(dev_ctx.x_context(), int r = xpu::layer_norm_grad(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data), reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<const XPUType*>(dy_data), reinterpret_cast<const XPUType*>(dy_data),
...@@ -115,13 +117,7 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> { ...@@ -115,13 +117,7 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
variance_data, variance_data,
dscale_data, dscale_data,
dbias_data); dbias_data);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm_grad");
r,
XPU_SUCCESS,
platform::errors::External(
"XPU layer_norm_grad kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
} }
}; };
......
...@@ -266,7 +266,6 @@ XPUOpMap& get_kl2_ops() { ...@@ -266,7 +266,6 @@ XPUOpMap& get_kl2_ops() {
{"layer_norm_grad", {"layer_norm_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
{"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm", {"layer_norm",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -60,14 +60,19 @@ class XPUTestLayerNormOp(XPUOpTestWrapper): ...@@ -60,14 +60,19 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
self.begin_norm_axis = 1 self.begin_norm_axis = 1
self.set_attrs() self.set_attrs()
self.atol = 1e-4
if self.dtype == np.float16:
self.atol = 1e-2
right = reduce(mul, right = reduce(mul,
self.shape[self.begin_norm_axis:len(self.shape)], 1) self.shape[self.begin_norm_axis:len(self.shape)], 1)
np.random.seed(10) np.random.seed(10)
x_np = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) x_np = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
scale_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype) scale_np = np.random.uniform(0.1, 1, [right]).astype('float32')
bias_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype) bias_np = np.random.uniform(0.1, 1, [right]).astype('float32')
ref_y_np, ref_mean_np, ref_variance_np = ref_layer_norm( ref_y_np, ref_mean_np, ref_variance_np = ref_layer_norm(
x_np, scale_np, bias_np, self.epsilon, self.begin_norm_axis) x_np, scale_np, bias_np, self.epsilon, self.begin_norm_axis)
ref_y_np = ref_y_np.astype(self.dtype)
self.inputs = {'X': x_np, 'Scale': scale_np, 'Bias': bias_np} self.inputs = {'X': x_np, 'Scale': scale_np, 'Bias': bias_np}
self.outputs = { self.outputs = {
...@@ -84,12 +89,12 @@ class XPUTestLayerNormOp(XPUOpTestWrapper): ...@@ -84,12 +89,12 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
pass pass
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4) self.check_output_with_place(paddle.XPUPlace(0), atol=self.atol)
def test_check_grad(self): def test_check_grad(self):
self.check_grad_with_place(paddle.XPUPlace(0), ['X'], self.check_grad_with_place(paddle.XPUPlace(0), ['X'],
'Y', 'Y',
max_relative_error=0.02) max_relative_error=self.atol)
class TestXPULayerNormOpAxis2(TestXPULayerNormOp): class TestXPULayerNormOpAxis2(TestXPULayerNormOp):
......
...@@ -121,7 +121,6 @@ class XPUTestOneHotOP(XPUOpTestWrapper): ...@@ -121,7 +121,6 @@ class XPUTestOneHotOP(XPUOpTestWrapper):
support_types = get_xpu_op_support_types('one_hot') support_types = get_xpu_op_support_types('one_hot')
print("support_types: %s" % str(support_types))
for stype in support_types: for stype in support_types:
create_test_class(globals(), XPUTestOneHotOP, stype) create_test_class(globals(), XPUTestOneHotOP, stype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册