未验证 提交 4c3e13de 编写于 作者: H houj04 提交者: GitHub

[XPU] fp16 for layer_norm op (#44778)

* [XPU] fp16 for layer_norm op. test=kunlun
上级 c3d4a3d8
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle {
namespace operators {
......@@ -48,6 +49,9 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
auto* mean_data = mean->mutable_data<float>(ctx.GetPlace());
auto* variance_data = variance->mutable_data<float>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>();
// int layer_norm(Context* ctx, const T* x, T* y, int m, int n, float eps,
// const float* scale, const float* bias, float* mean, float* var);
int r = xpu::layer_norm(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<XPUType*>(y_data),
......@@ -58,12 +62,7 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
bias_data,
mean_data,
variance_data);
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU layer_norm kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm");
}
};
......@@ -103,6 +102,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
(dx == nullptr ? nullptr : dx->mutable_data<T>(ctx.GetPlace()));
auto& dev_ctx = ctx.template device_context<DeviceContext>();
// int layer_norm_grad(Context* ctx, const T* x, const T* dy, T* dx, int m,
// int n, float eps, const float* scale, const float* mean, const float*
// var, float* dscale, float* dbias);
int r = xpu::layer_norm_grad(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<const XPUType*>(dy_data),
......@@ -115,13 +117,7 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
variance_data,
dscale_data,
dbias_data);
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External(
"XPU layer_norm_grad kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "layer_norm_grad");
}
};
......
......@@ -266,7 +266,6 @@ XPUOpMap& get_kl2_ops() {
{"layer_norm_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -60,14 +60,19 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
self.begin_norm_axis = 1
self.set_attrs()
self.atol = 1e-4
if self.dtype == np.float16:
self.atol = 1e-2
right = reduce(mul,
self.shape[self.begin_norm_axis:len(self.shape)], 1)
np.random.seed(10)
x_np = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
scale_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype)
bias_np = np.random.uniform(0.1, 1, [right]).astype(self.dtype)
scale_np = np.random.uniform(0.1, 1, [right]).astype('float32')
bias_np = np.random.uniform(0.1, 1, [right]).astype('float32')
ref_y_np, ref_mean_np, ref_variance_np = ref_layer_norm(
x_np, scale_np, bias_np, self.epsilon, self.begin_norm_axis)
ref_y_np = ref_y_np.astype(self.dtype)
self.inputs = {'X': x_np, 'Scale': scale_np, 'Bias': bias_np}
self.outputs = {
......@@ -84,12 +89,12 @@ class XPUTestLayerNormOp(XPUOpTestWrapper):
pass
def test_check_output(self):
self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4)
self.check_output_with_place(paddle.XPUPlace(0), atol=self.atol)
def test_check_grad(self):
self.check_grad_with_place(paddle.XPUPlace(0), ['X'],
'Y',
max_relative_error=0.02)
max_relative_error=self.atol)
class TestXPULayerNormOpAxis2(TestXPULayerNormOp):
......
......@@ -121,7 +121,6 @@ class XPUTestOneHotOP(XPUOpTestWrapper):
support_types = get_xpu_op_support_types('one_hot')
print("support_types: %s" % str(support_types))
for stype in support_types:
create_test_class(globals(), XPUTestOneHotOP, stype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册