未验证 提交 1fbf423a 编写于 作者: Y Yuang Liu 提交者: GitHub

[AMP OP&Test] Support bf16/fp16 for roll op and add ut. (#51565)

上级 8fc9a19f
......@@ -14,7 +14,9 @@
#include "paddle/phi/kernels/roll_grad_kernel.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpu/roll_kernel_impl.h"
......@@ -81,6 +83,7 @@ PD_REGISTER_KERNEL(roll_grad,
ALL_LAYOUT,
phi::RollGradKernel,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double,
int,
......
......@@ -14,7 +14,9 @@
#include "paddle/phi/kernels/roll_kernel.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/array.h"
#include "paddle/phi/kernels/gpu/roll_kernel_impl.h"
......@@ -83,6 +85,7 @@ PD_REGISTER_KERNEL(roll,
ALL_LAYOUT,
phi::RollKernel,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double,
int,
......
......@@ -15,10 +15,11 @@
import unittest
import numpy as np
from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import Program, program_guard
......@@ -27,13 +28,17 @@ class TestRollOp(OpTest):
self.python_api = paddle.roll
self.op_type = "roll"
self.init_dtype_type()
self.inputs = {'X': np.random.random(self.x_shape).astype(self.dtype)}
self.attrs = {'shifts': self.shifts, 'axis': self.axis}
self.outputs = {
'Out': np.roll(
self.inputs['X'], self.attrs['shifts'], self.attrs['axis']
)
}
bf16_ut = self.dtype == np.uint16
x = np.random.random(self.x_shape).astype(
np.float32 if bf16_ut else self.dtype
)
out = np.roll(x, self.attrs['shifts'], self.attrs['axis'])
if bf16_ut:
x = convert_float_to_uint16(x)
out = convert_float_to_uint16(out)
self.inputs = {'X': x}
self.outputs = {'Out': out}
def init_dtype_type(self):
self.dtype = np.float64
......@@ -56,6 +61,62 @@ class TestRollOpCase2(TestRollOp):
self.axis = [-1, -2]
class TestRollFP16OP(TestRollOp):
def init_dtype_type(self):
self.dtype = np.float16
self.x_shape = (100, 4, 5)
self.shifts = [101, -1]
self.axis = [0, -2]
class TestRollFP16OpCase2(TestRollOp):
def init_dtype_type(self):
self.dtype = np.float16
self.x_shape = (100, 10, 5)
self.shifts = [8, -1]
self.axis = [-1, -2]
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not complied with CUDA and not support the bfloat16",
)
class TestRollBF16OP(TestRollOp):
def init_dtype_type(self):
self.dtype = np.uint16
self.x_shape = (10, 4, 5)
self.shifts = [101, -1]
self.axis = [0, -2]
self.place = core.CUDAPlace(0)
def test_check_output(self):
self.check_output_with_place(self.place, check_eager=True)
def test_check_grad_normal(self):
self.check_grad_with_place(self.place, ['X'], 'Out', check_eager=True)
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not complied with CUDA and not support the bfloat16",
)
class TestRollBF16OpCase2(TestRollOp):
def init_dtype_type(self):
self.dtype = np.uint16
self.x_shape = (10, 5, 5)
self.shifts = [8, -1]
self.axis = [-1, -2]
self.place = core.CUDAPlace(0)
def test_check_output(self):
self.check_output_with_place(self.place, check_eager=True)
def test_check_grad_normal(self):
self.check_grad_with_place(self.place, ['X'], 'Out', check_eager=True)
class TestRollAPI(unittest.TestCase):
def input_data(self):
self.data_x = np.array(
......
......@@ -1710,6 +1710,21 @@ def roll(x, shifts, axis=None, name=None):
if in_dygraph_mode():
return _C_ops.roll(x, shifts, axis)
else:
check_variable_and_dtype(
x,
'dtype',
[
'float16',
'float32',
'uint16',
'float64',
'int32',
'int64',
'complex64',
'complex128',
],
'roll',
)
helper = LayerHelper("roll", **locals())
check_type(axis, 'axis', (list, tuple), 'roll')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册