未验证 提交 791963ab 编写于 作者: C Charles-hit 提交者: GitHub

support some prim ops bf16 dtype (#54399)

上级 2f0b4ad0
...@@ -49,6 +49,7 @@ PD_REGISTER_KERNEL(sum_raw, ...@@ -49,6 +49,7 @@ PD_REGISTER_KERNEL(sum_raw,
float, float,
double, double,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16,
int16_t, int16_t,
int, int,
int64_t, int64_t,
......
...@@ -44,6 +44,7 @@ PD_REGISTER_KERNEL(sum, ...@@ -44,6 +44,7 @@ PD_REGISTER_KERNEL(sum,
float, float,
double, double,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16,
int16_t, int16_t,
int, int,
int64_t, int64_t,
......
...@@ -136,8 +136,8 @@ class TestConcatOp6(TestConcatOp): ...@@ -136,8 +136,8 @@ class TestConcatOp6(TestConcatOp):
self.dtype = self.get_dtype() self.dtype = self.get_dtype()
self.python_api = paddle.concat self.python_api = paddle.concat
self.public_python_api = paddle.concat self.public_python_api = paddle.concat
self.enable_cinn = False
self.init_test_data() self.init_test_data()
self.if_enable_cinn()
self.lod = [[20, 80]] self.lod = [[20, 80]]
self.out_lod = [[20, 80, 20, 80, 20, 80]] self.out_lod = [[20, 80, 20, 80, 20, 80]]
self.inputs = { self.inputs = {
...@@ -156,6 +156,9 @@ class TestConcatOp6(TestConcatOp): ...@@ -156,6 +156,9 @@ class TestConcatOp6(TestConcatOp):
out = np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis) out = np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis)
self.outputs = {'Out': (out, self.out_lod)} self.outputs = {'Out': (out, self.out_lod)}
def if_enable_cinn(self):
pass
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -177,7 +180,7 @@ class TestConcatOp7(TestConcatOp): ...@@ -177,7 +180,7 @@ class TestConcatOp7(TestConcatOp):
self.python_api = paddle.concat self.python_api = paddle.concat
self.public_python_api = paddle.concat self.public_python_api = paddle.concat
self.prim_op_type = "prim" self.prim_op_type = "prim"
self.enable_cinn = True self.if_enable_cinn()
self.dtype = self.get_dtype() self.dtype = self.get_dtype()
self.init_test_data() self.init_test_data()
self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]}
...@@ -194,6 +197,9 @@ class TestConcatOp7(TestConcatOp): ...@@ -194,6 +197,9 @@ class TestConcatOp7(TestConcatOp):
) )
} }
def if_enable_cinn(self):
pass
def get_dtype(self): def get_dtype(self):
return "float64" return "float64"
...@@ -226,7 +232,6 @@ def create_test_AxisTensor(parent): ...@@ -226,7 +232,6 @@ def create_test_AxisTensor(parent):
self.op_type = "concat" self.op_type = "concat"
self.python_api = paddle.concat self.python_api = paddle.concat
self.public_python_api = paddle.concat self.public_python_api = paddle.concat
self.enable_cinn = False
self.dtype = self.get_dtype() self.dtype = self.get_dtype()
self.init_test_data() self.init_test_data()
self.inputs = { self.inputs = {
...@@ -286,7 +291,6 @@ def create_test_fp16(parent): ...@@ -286,7 +291,6 @@ def create_test_fp16(parent):
self.op_type = "concat" self.op_type = "concat"
self.python_api = paddle.concat self.python_api = paddle.concat
self.public_python_api = paddle.concat self.public_python_api = paddle.concat
self.enable_cinn = False
self.dtype = self.get_dtype() self.dtype = self.get_dtype()
self.init_test_data() self.init_test_data()
self.inputs = { self.inputs = {
......
...@@ -198,7 +198,6 @@ class TestElementwiseBF16Op(OpTest): ...@@ -198,7 +198,6 @@ class TestElementwiseBF16Op(OpTest):
self.python_api = paddle.maximum self.python_api = paddle.maximum
self.public_python_api = paddle.maximum self.public_python_api = paddle.maximum
self.prim_op_type = "prim" self.prim_op_type = "prim"
self.enable_cinn = False
self.dtype = np.uint16 self.dtype = np.uint16
self.inputs = { self.inputs = {
'X': convert_float_to_uint16(self.x), 'X': convert_float_to_uint16(self.x),
...@@ -207,6 +206,7 @@ class TestElementwiseBF16Op(OpTest): ...@@ -207,6 +206,7 @@ class TestElementwiseBF16Op(OpTest):
self.outputs = { self.outputs = {
'Out': convert_float_to_uint16(np.maximum(self.x, self.y)) 'Out': convert_float_to_uint16(np.maximum(self.x, self.y))
} }
self.if_enable_cinn()
def test_check_output(self): def test_check_output(self):
if hasattr(self, 'attrs'): if hasattr(self, 'attrs'):
...@@ -214,6 +214,9 @@ class TestElementwiseBF16Op(OpTest): ...@@ -214,6 +214,9 @@ class TestElementwiseBF16Op(OpTest):
else: else:
self.check_output(check_dygraph=True) self.check_output(check_dygraph=True)
def if_enable_cinn(self):
pass
def test_check_grad_normal(self): def test_check_grad_normal(self):
if hasattr(self, 'attrs'): if hasattr(self, 'attrs'):
# check_prim=False, bfloat16 is not supported in `less_equal` # check_prim=False, bfloat16 is not supported in `less_equal`
...@@ -221,16 +224,26 @@ class TestElementwiseBF16Op(OpTest): ...@@ -221,16 +224,26 @@ class TestElementwiseBF16Op(OpTest):
['X', 'Y'], 'Out', numeric_grad_delta=0.05, check_dygraph=False ['X', 'Y'], 'Out', numeric_grad_delta=0.05, check_dygraph=False
) )
else: else:
self.check_grad(['X', 'Y'], 'Out', numeric_grad_delta=0.05) self.check_grad(
['X', 'Y'], 'Out', numeric_grad_delta=0.05, check_prim=True
)
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
self.check_grad( self.check_grad(
['Y'], 'Out', numeric_grad_delta=0.05, no_grad_set=set("X") ['Y'],
'Out',
numeric_grad_delta=0.05,
no_grad_set=set("X"),
check_prim=True,
) )
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
self.check_grad( self.check_grad(
['X'], 'Out', numeric_grad_delta=0.05, no_grad_set=set('Y') ['X'],
'Out',
numeric_grad_delta=0.05,
no_grad_set=set('Y'),
check_prim=True,
) )
......
...@@ -19,6 +19,7 @@ from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci ...@@ -19,6 +19,7 @@ from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
import paddle import paddle
from paddle import fluid from paddle import fluid
from paddle.fluid import core
def pow_grad(x, y, dout): def pow_grad(x, y, dout):
...@@ -270,8 +271,10 @@ class TestElementwisePowOpFP16(OpTest): ...@@ -270,8 +271,10 @@ class TestElementwisePowOpFP16(OpTest):
class TestElementwisePowBF16Op(OpTest): class TestElementwisePowBF16Op(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_pow" self.op_type = "elementwise_pow"
self.prim_op_type = "prim"
self.dtype = np.uint16 self.dtype = np.uint16
self.python_api = paddle.pow self.python_api = paddle.pow
self.public_python_api = paddle.pow
x = np.random.uniform(0, 1, [20, 5]).astype(np.float32) x = np.random.uniform(0, 1, [20, 5]).astype(np.float32)
y = np.random.uniform(0, 1, [20, 5]).astype(np.float32) y = np.random.uniform(0, 1, [20, 5]).astype(np.float32)
...@@ -290,6 +293,14 @@ class TestElementwisePowBF16Op(OpTest): ...@@ -290,6 +293,14 @@ class TestElementwisePowBF16Op(OpTest):
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X', 'Y'], 'Out') self.check_grad(['X', 'Y'], 'Out')
if core.is_compiled_with_cuda():
self.check_grad_with_place(
core.CUDAPlace(0),
['X', 'Y'],
'Out',
check_prim=True,
only_check_prim=True,
)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -15,7 +15,11 @@ ...@@ -15,7 +15,11 @@
import unittest import unittest
import numpy as np import numpy as np
from eager_op_test import OpTest, convert_float_to_uint16 from eager_op_test import (
OpTest,
convert_float_to_uint16,
convert_uint16_to_float,
)
import paddle import paddle
from paddle import fluid from paddle import fluid
...@@ -31,6 +35,7 @@ class TestGatherNdOpWithEmptyIndex(OpTest): ...@@ -31,6 +35,7 @@ class TestGatherNdOpWithEmptyIndex(OpTest):
self.python_api = paddle.gather_nd self.python_api = paddle.gather_nd
self.public_python_api = paddle.gather_nd self.public_python_api = paddle.gather_nd
self.config_dtype() self.config_dtype()
self.if_enable_cinn()
if self.dtype == np.float64: if self.dtype == np.float64:
target_dtype = "float64" target_dtype = "float64"
elif self.dtype == np.float16: elif self.dtype == np.float16:
...@@ -45,6 +50,9 @@ class TestGatherNdOpWithEmptyIndex(OpTest): ...@@ -45,6 +50,9 @@ class TestGatherNdOpWithEmptyIndex(OpTest):
self.inputs = {'X': xnp, 'Index': np.array([[], []]).astype("int32")} self.inputs = {'X': xnp, 'Index': np.array([[], []]).astype("int32")}
self.outputs = {'Out': output} self.outputs = {'Out': output}
def if_enable_cinn(self):
pass
def config_dtype(self): def config_dtype(self):
self.dtype = np.float64 self.dtype = np.float64
...@@ -85,6 +93,7 @@ class TestGatherNdOpWithIndex1(OpTest): ...@@ -85,6 +93,7 @@ class TestGatherNdOpWithIndex1(OpTest):
self.python_api = paddle.gather_nd self.python_api = paddle.gather_nd
self.public_python_api = paddle.gather_nd self.public_python_api = paddle.gather_nd
self.config_dtype() self.config_dtype()
self.if_enable_cinn()
if self.dtype == np.float64: if self.dtype == np.float64:
target_dtype = "float64" target_dtype = "float64"
elif self.dtype == np.float16: elif self.dtype == np.float16:
...@@ -100,6 +109,9 @@ class TestGatherNdOpWithIndex1(OpTest): ...@@ -100,6 +109,9 @@ class TestGatherNdOpWithIndex1(OpTest):
self.inputs = {'X': xnp, 'Index': index} self.inputs = {'X': xnp, 'Index': index}
self.outputs = {'Out': output} self.outputs = {'Out': output}
def if_enable_cinn(self):
pass
def config_dtype(self): def config_dtype(self):
self.dtype = np.float64 self.dtype = np.float64
...@@ -189,7 +201,9 @@ class TestGatherNdOpWithLowIndexBF16(TestGatherNdOpWithLowIndex): ...@@ -189,7 +201,9 @@ class TestGatherNdOpWithLowIndexBF16(TestGatherNdOpWithLowIndex):
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place(place, ['X'], 'Out', check_prim=True) self.check_grad_with_place(
place, ['X'], 'Out', check_prim=True, numeric_grad_delta=0.5
)
class TestGatherNdOpIndex1(OpTest): class TestGatherNdOpIndex1(OpTest):
...@@ -208,6 +222,8 @@ class TestGatherNdOpIndex1(OpTest): ...@@ -208,6 +222,8 @@ class TestGatherNdOpIndex1(OpTest):
else: else:
target_dtype = "float32" target_dtype = "float32"
xnp = np.random.uniform(0, 100, (10, 10)).astype(target_dtype) xnp = np.random.uniform(0, 100, (10, 10)).astype(target_dtype)
if self.dtype == np.uint16:
xnp = convert_uint16_to_float(convert_float_to_uint16(xnp))
index = np.array([1, 2]).astype("int32") index = np.array([1, 2]).astype("int32")
output = xnp[tuple(index.T)] output = xnp[tuple(index.T)]
if self.dtype == np.uint16: if self.dtype == np.uint16:
...@@ -215,6 +231,9 @@ class TestGatherNdOpIndex1(OpTest): ...@@ -215,6 +231,9 @@ class TestGatherNdOpIndex1(OpTest):
output = convert_float_to_uint16(output) output = convert_float_to_uint16(output)
self.inputs = {'X': xnp, 'Index': index} self.inputs = {'X': xnp, 'Index': index}
self.outputs = {'Out': output} self.outputs = {'Out': output}
self.if_enable_cinn()
def if_enable_cinn(self):
# the outputs are 0D-tensor, CINN not support # the outputs are 0D-tensor, CINN not support
self.enable_cinn = False self.enable_cinn = False
...@@ -225,7 +244,7 @@ class TestGatherNdOpIndex1(OpTest): ...@@ -225,7 +244,7 @@ class TestGatherNdOpIndex1(OpTest):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['X'], 'Out', check_prim=True) self.check_grad(['X'], 'Out', check_prim=True, numeric_grad_delta=0.05)
class TestGatherNdOpIndex1FP16(TestGatherNdOpIndex1): class TestGatherNdOpIndex1FP16(TestGatherNdOpIndex1):
...@@ -248,7 +267,9 @@ class TestGatherNdOpIndex1BF16(TestGatherNdOpIndex1): ...@@ -248,7 +267,9 @@ class TestGatherNdOpIndex1BF16(TestGatherNdOpIndex1):
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place(place, ['X'], 'Out', check_prim=True) self.check_grad_with_place(
place, ['X'], 'Out', check_prim=True, numeric_grad_delta=0.5
)
class TestGatherNdOpWithSameIndexAsX(OpTest): class TestGatherNdOpWithSameIndexAsX(OpTest):
...@@ -304,7 +325,9 @@ class TestGatherNdOpWithSameIndexAsXBF16(TestGatherNdOpWithSameIndexAsX): ...@@ -304,7 +325,9 @@ class TestGatherNdOpWithSameIndexAsXBF16(TestGatherNdOpWithSameIndexAsX):
def test_check_grad(self): def test_check_grad(self):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
self.check_grad_with_place(place, ['X'], 'Out', check_prim=True) self.check_grad_with_place(
place, ['X'], 'Out', check_prim=True, numeric_grad_delta=0.5
)
class TestGatherNdOpWithHighRankSame(OpTest): class TestGatherNdOpWithHighRankSame(OpTest):
......
...@@ -277,13 +277,16 @@ class TestMaxOp_ZeroDim(OpTest): ...@@ -277,13 +277,16 @@ class TestMaxOp_ZeroDim(OpTest):
self.prim_op_type = "prim" self.prim_op_type = "prim"
self.python_api = paddle.max self.python_api = paddle.max
self.public_python_api = paddle.max self.public_python_api = paddle.max
self.enable_cinn = False self.if_enable_cinn()
self.inputs = {'X': np.random.random([]).astype("float64")} self.inputs = {'X': np.random.random([]).astype("float64")}
self.attrs = {'dim': []} self.attrs = {'dim': []}
self.outputs = { self.outputs = {
'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])) 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim']))
} }
def if_enable_cinn(self):
self.enable_cinn = False
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册