未验证 提交 1fbf423a 编写于 作者: Y Yuang Liu 提交者: GitHub

[AMP OP&Test] Support bf16/fp16 for roll op and add ut. (#51565)

上级 8fc9a19f
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#include "paddle/phi/kernels/roll_grad_kernel.h" #include "paddle/phi/kernels/roll_grad_kernel.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h" #include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpu/roll_kernel_impl.h" #include "paddle/phi/kernels/gpu/roll_kernel_impl.h"
...@@ -81,6 +83,7 @@ PD_REGISTER_KERNEL(roll_grad, ...@@ -81,6 +83,7 @@ PD_REGISTER_KERNEL(roll_grad,
ALL_LAYOUT, ALL_LAYOUT,
phi::RollGradKernel, phi::RollGradKernel,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16,
float, float,
double, double,
int, int,
......
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#include "paddle/phi/kernels/roll_kernel.h" #include "paddle/phi/kernels/roll_kernel.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h" #include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/array.h" #include "paddle/phi/core/utils/array.h"
#include "paddle/phi/kernels/gpu/roll_kernel_impl.h" #include "paddle/phi/kernels/gpu/roll_kernel_impl.h"
...@@ -83,6 +85,7 @@ PD_REGISTER_KERNEL(roll, ...@@ -83,6 +85,7 @@ PD_REGISTER_KERNEL(roll,
ALL_LAYOUT, ALL_LAYOUT,
phi::RollKernel, phi::RollKernel,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16,
float, float,
double, double,
int, int,
......
...@@ -15,10 +15,11 @@ ...@@ -15,10 +15,11 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest, convert_float_to_uint16
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
...@@ -27,13 +28,17 @@ class TestRollOp(OpTest): ...@@ -27,13 +28,17 @@ class TestRollOp(OpTest):
self.python_api = paddle.roll self.python_api = paddle.roll
self.op_type = "roll" self.op_type = "roll"
self.init_dtype_type() self.init_dtype_type()
self.inputs = {'X': np.random.random(self.x_shape).astype(self.dtype)}
self.attrs = {'shifts': self.shifts, 'axis': self.axis} self.attrs = {'shifts': self.shifts, 'axis': self.axis}
self.outputs = { bf16_ut = self.dtype == np.uint16
'Out': np.roll( x = np.random.random(self.x_shape).astype(
self.inputs['X'], self.attrs['shifts'], self.attrs['axis'] np.float32 if bf16_ut else self.dtype
) )
} out = np.roll(x, self.attrs['shifts'], self.attrs['axis'])
if bf16_ut:
x = convert_float_to_uint16(x)
out = convert_float_to_uint16(out)
self.inputs = {'X': x}
self.outputs = {'Out': out}
def init_dtype_type(self): def init_dtype_type(self):
self.dtype = np.float64 self.dtype = np.float64
...@@ -56,6 +61,62 @@ class TestRollOpCase2(TestRollOp): ...@@ -56,6 +61,62 @@ class TestRollOpCase2(TestRollOp):
self.axis = [-1, -2] self.axis = [-1, -2]
class TestRollFP16OP(TestRollOp):
def init_dtype_type(self):
self.dtype = np.float16
self.x_shape = (100, 4, 5)
self.shifts = [101, -1]
self.axis = [0, -2]
class TestRollFP16OpCase2(TestRollOp):
def init_dtype_type(self):
self.dtype = np.float16
self.x_shape = (100, 10, 5)
self.shifts = [8, -1]
self.axis = [-1, -2]
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not complied with CUDA and not support the bfloat16",
)
class TestRollBF16OP(TestRollOp):
def init_dtype_type(self):
self.dtype = np.uint16
self.x_shape = (10, 4, 5)
self.shifts = [101, -1]
self.axis = [0, -2]
self.place = core.CUDAPlace(0)
def test_check_output(self):
self.check_output_with_place(self.place, check_eager=True)
def test_check_grad_normal(self):
self.check_grad_with_place(self.place, ['X'], 'Out', check_eager=True)
@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not complied with CUDA and not support the bfloat16",
)
class TestRollBF16OpCase2(TestRollOp):
def init_dtype_type(self):
self.dtype = np.uint16
self.x_shape = (10, 5, 5)
self.shifts = [8, -1]
self.axis = [-1, -2]
self.place = core.CUDAPlace(0)
def test_check_output(self):
self.check_output_with_place(self.place, check_eager=True)
def test_check_grad_normal(self):
self.check_grad_with_place(self.place, ['X'], 'Out', check_eager=True)
class TestRollAPI(unittest.TestCase): class TestRollAPI(unittest.TestCase):
def input_data(self): def input_data(self):
self.data_x = np.array( self.data_x = np.array(
......
...@@ -1710,6 +1710,21 @@ def roll(x, shifts, axis=None, name=None): ...@@ -1710,6 +1710,21 @@ def roll(x, shifts, axis=None, name=None):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.roll(x, shifts, axis) return _C_ops.roll(x, shifts, axis)
else: else:
check_variable_and_dtype(
x,
'dtype',
[
'float16',
'float32',
'uint16',
'float64',
'int32',
'int64',
'complex64',
'complex128',
],
'roll',
)
helper = LayerHelper("roll", **locals()) helper = LayerHelper("roll", **locals())
check_type(axis, 'axis', (list, tuple), 'roll') check_type(axis, 'axis', (list, tuple), 'roll')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册