From 9bac4a76617b9e563bb665b3ee51cee5847a4e78 Mon Sep 17 00:00:00 2001
From: Li Min <11663212+limin2021@users.noreply.github.com>
Date: Thu, 16 Dec 2021 10:21:52 +0800
Subject: [PATCH] Add float16 type for scatter op. (#38136)

* Add float16 type for scatter op.

* Add fp16 test for scatter op.

* Add int and int64 support for scatter_grad on gpu.

* Add int and int64 for check_variable_and_dtype routine.

* Minors.

* Code format.
---
 paddle/fluid/operators/scatter_op.cu          | 11 +++--
 .../fluid/tests/unittests/test_scatter_op.py  | 43 +++++++++++++++++++
 python/paddle/tensor/manipulation.py          |  4 +-
 3 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/paddle/fluid/operators/scatter_op.cu b/paddle/fluid/operators/scatter_op.cu
index 1556099d6f1..c8dae358225 100644
--- a/paddle/fluid/operators/scatter_op.cu
+++ b/paddle/fluid/operators/scatter_op.cu
@@ -110,6 +110,11 @@ namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(scatter, ops::ScatterOpCUDAKernel<float>,
                         ops::ScatterOpCUDAKernel<double>,
                         ops::ScatterOpCUDAKernel<int>,
-                        ops::ScatterOpCUDAKernel<int64_t>);
-REGISTER_OP_CUDA_KERNEL(scatter_grad, ops::ScatterGradOpCUDAKernel<float>,
-                        ops::ScatterGradOpCUDAKernel<double>);
+                        ops::ScatterOpCUDAKernel<int64_t>,
+                        ops::ScatterOpCUDAKernel<paddle::platform::float16>);
+
+REGISTER_OP_CUDA_KERNEL(
+    scatter_grad, ops::ScatterGradOpCUDAKernel<float>,
+    ops::ScatterGradOpCUDAKernel<double>, ops::ScatterOpCUDAKernel<int>,
+    ops::ScatterOpCUDAKernel<int64_t>,
+    ops::ScatterGradOpCUDAKernel<paddle::platform::float16>);
diff --git a/python/paddle/fluid/tests/unittests/test_scatter_op.py b/python/paddle/fluid/tests/unittests/test_scatter_op.py
index e58b2279e56..ad542da7816 100644
--- a/python/paddle/fluid/tests/unittests/test_scatter_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scatter_op.py
@@ -269,6 +269,49 @@ class TestScatterAPI(unittest.TestCase):
         self.assertTrue(np.array_equal(test_dygraph(), test_static_graph()))
 
 
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestScatterOpFp16(OpTest):
+    def setUp(self):
+        self.__class__.op_type = "scatter"
+        # compute grad in the following code handly.
+        self.__class__.no_need_check_grad = True
+        self.x_type = 'float16'
+        self.x_np = np.ones((3, 3)).astype(self.x_type)
+        self.index_np = np.array([1, 2]).astype("int32")
+        self.updates_np = np.random.random((2, 3)).astype(self.x_type)
+        self.output_np = np.copy(self.x_np)
+        self.output_np[self.index_np] = self.updates_np
+        self.dout_np = np.random.random((3, 3)).astype(self.x_type)
+
+        # compute ref_dx
+        self.ref_dx = np.copy(self.dout_np)
+        zero_np = np.zeros((2, 3)).astype(self.x_type)
+        self.ref_dx[self.index_np] = zero_np
+
+    def compute_ref_grad_updates(self):
+        ref_grad_updates = paddle.gather(
+            paddle.to_tensor(self.dout_np), paddle.to_tensor(self.index_np))
+        return ref_grad_updates
+
+    def test_scatter_fp16(self):
+        paddle.disable_static(place=paddle.CUDAPlace(0))
+        x_tensor = paddle.to_tensor(self.x_np, stop_gradient=False)
+        index_tensor = paddle.to_tensor(self.index_np)
+        updates_tensor = paddle.to_tensor(self.updates_np, stop_gradient=False)
+        out_tensor = paddle.scatter(x_tensor, index_tensor, updates_tensor)
+        paddle.autograd.backward(
+            [out_tensor], [paddle.to_tensor(self.dout_np)], retain_graph=True)
+        ref_grad_updates = self.compute_ref_grad_updates()
+        np.testing.assert_allclose(
+            ref_grad_updates.numpy(),
+            updates_tensor.grad.numpy(),
+            rtol=1e-5,
+            atol=1e-5)
+        np.testing.assert_allclose(
+            self.ref_dx, x_tensor.grad.numpy(), rtol=1e-5, atol=1e-5)
+
+
 class TestScatterInplaceAPI(TestScatterAPI):
     def executed_api(self):
         self.scatter = paddle.scatter_
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 42abf4b1466..5d263bde8b3 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -1566,7 +1566,9 @@ def scatter(x, index, updates, overwrite=True, name=None):
     if in_dygraph_mode():
         return _C_ops.scatter(x, index, updates, 'overwrite', overwrite)
 
-    check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'scatter')
+    check_variable_and_dtype(
+        x, 'dtype', ['float32', 'float64', 'float16', 'int32', 'int64'],
+        'scatter')
     check_type(overwrite, 'overwrite', bool, 'scatter')
     helper = LayerHelper('scatter', **locals())
     out = helper.create_variable_for_type_inference(x.dtype)
-- 
GitLab