【PaddlePaddle Hackathon 4】No.63 fix temporal_shift and conj (#51532)

* add fp16 and bfp16 for temporalshift * add fp16 and bfp16 for complex * fix bug * fix bug * add fp16 and bf16 for conj * fix bug * fix bug * Update complex_kernel.h fix bug * Update temporal_shift_grad_kernel.h fix bug * Update temporal_shift_kernel.h fix bug

【PaddlePaddle Hackathon 4】No.63 fix temporal_shift and conj (#51532)
* add fp16 and bfp16 for temporalshift * add fp16 and bfp16 for complex * fix bug * fix bug * add fp16 and bf16 for conj * fix bug * fix bug * Update complex_kernel.h fix bug * Update temporal_shift_grad_kernel.h fix bug * Update temporal_shift_kernel.h fix bug
1550348e · LoneRanger · GitHub · a82911a5 · 1550348e · 1550348e
5 changed file
--- a/paddle/phi/kernels/gpu/complex_kernel.cu
+++ b/paddle/phi/kernels/gpu/complex_kernel.cu
@@ -26,6 +26,7 @@ PD_REGISTER_KERNEL(conj,
                   ALL_LAYOUT,
                   phi::ConjKernel,
                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
                   phi::dtype::complex<float>,
                   phi::dtype::complex<double>,
                   float,

--- a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
@@ -146,4 +146,5 @@ PD_REGISTER_KERNEL(temporal_shift_grad,
                   phi::TemporalShiftGradKernel,
                   float,
                   double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
--- a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
@@ -146,4 +146,5 @@ PD_REGISTER_KERNEL(temporal_shift,
                   phi::TemporalShiftKernel,
                   float,
                   double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
--- a/python/paddle/fluid/tests/unittests/test_conj_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conj_op.py
@@ -20,9 +20,10 @@ import numpy as np
 import paddle
 sys.path.append("..")
-from eager_op_test import OpTest
+from eager_op_test import OpTest, convert_float_to_uint16
 from numpy.random import random as rand
+import paddle.fluid.core as core
 import paddle.fluid.dygraph as dg
 import paddle.static as static
@@ -147,5 +148,43 @@ class Testfp16ConjOp(unittest.TestCase):
                out = exe.run(feed={'x': input_x}, fetch_list=[out])
+class TestConjFP16OP(TestConjOp):
+    def init_dtype_type(self):
+        self.dtype = np.float16
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestConjBF16(OpTest):
+    def setUp(self):
+        self.op_type = "conj"
+        self.python_api = paddle.tensor.conj
+        self.init_dtype_type()
+        self.init_input_output()
+    def init_dtype_type(self):
+        self.dtype = np.uint16
+    def init_input_output(self):
+        x = (
+            np.random.random((12, 14)) + 1j * np.random.random((12, 14))
+        ).astype(np.float32)
+        out = np.conj(x)
+        self.inputs = {'X': convert_float_to_uint16(x)}
+        self.outputs = {'Out': convert_float_to_uint16(out)}
+    def test_check_output(self):
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place)
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        self.check_grad_with_place(place, ['X'], 'Out')
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
@@ -15,7 +15,7 @@
 import unittest
 import numpy as np
-from op_test import OpTest
+from op_test import OpTest, convert_float_to_uint16
 import paddle
 from paddle.fluid import core
@@ -44,6 +44,7 @@ def temporal_shift(x, seg_num, shift_ratio, data_format):
 class TestTemporalShift(OpTest):
    def setUp(self):
        self.initTestCase()
+        self.init_dtype()
        self.op_type = 'temporal_shift'
        self.python_api = paddle.nn.functional.temporal_shift
        x = np.random.random(self.x_shape).astype(self.dtype)
@@ -64,6 +65,9 @@ class TestTemporalShift(OpTest):
        self.outputs = {"Out": output}
        self.python_out_sig = ["Out"]
+    def init_dtype(self):
+        self.dtype = 'float64'
    def test_check_output(self):
        self.check_output(check_eager=True)
@@ -74,7 +78,6 @@ class TestTemporalShift(OpTest):
        self.x_shape = (6, 4, 4, 4)
        self.seg_num = 3
        self.shift_ratio = 0.25
-        self.dtype = 'float64'
        self.data_format = 'NCHW'
@@ -174,6 +177,56 @@ class TestTemporalShiftAPI(unittest.TestCase):
        self.assertRaises(ValueError, attr_data_format)
+class TestTemporalShiftFP16OP(TestTemporalShift):
+    def init_dtype(self):
+        self.dtype = np.float16
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestTemporalShiftBF16(OpTest):
+    def initTestCase(self):
+        self.x_shape = (3, 10, 5, 5)
+        self.seg_num = 1
+        self.shift_ratio = 0.3
+        self.dtype = np.uint16
+        self.data_format = 'NCHW'
+    def setUp(self):
+        self.initTestCase()
+        self.op_type = 'temporal_shift'
+        self.python_api = paddle.nn.functional.temporal_shift
+        x = np.random.random(self.x_shape).astype(np.float32)
+        self.attrs = {
+            "seg_num": self.seg_num,
+            "shift_ratio": self.shift_ratio,
+            "data_format": self.data_format,
+        }
+        self.inputs = {
+            "X": convert_float_to_uint16(x),
+        }
+        output = temporal_shift(
+            x, self.seg_num, self.shift_ratio, self.data_format
+        )
+        self.outputs = {"Out": convert_float_to_uint16(output)}
+        self.python_out_sig = ["Out"]
+    def test_check_output(self):
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place)
+    def test_check_grad_ignore_uv(self):
+        place = core.CUDAPlace(0)
+        self.check_grad_with_place(place, ['X'], 'Out')
 if __name__ == "__main__":
    paddle.enable_static()
    unittest.main()