From c3328288627713a1f4dc75c27e328ac6c7bb6fe4 Mon Sep 17 00:00:00 2001
From: wuyefeilin <30919197+wuyefeilin@users.noreply.github.com>
Date: Thu, 22 Apr 2021 19:45:07 +0800
Subject: [PATCH] support int32 and int64 kernel for clip operator (#32373)

support int32 and int64 kernel for clip operator
---
 paddle/fluid/operators/clip_op.cc             |  8 ++++--
 paddle/fluid/operators/clip_op.cu             |  8 ++++--
 .../fluid/tests/unittests/test_clip_op.py     | 28 +++++++++++++++++--
 python/paddle/tensor/math.py                  | 26 +++++++++++------
 4 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/paddle/fluid/operators/clip_op.cc b/paddle/fluid/operators/clip_op.cc
index eb27df8a367..7176a0466bb 100644
--- a/paddle/fluid/operators/clip_op.cc
+++ b/paddle/fluid/operators/clip_op.cc
@@ -145,10 +145,14 @@ REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad, ops::ClipGradInplaceInferer,
                   ops::ClipDoubleGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OP_CPU_KERNEL(
     clip, ops::ClipKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ClipKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, int64_t>);
 REGISTER_OP_CPU_KERNEL(
     clip_grad, ops::ClipGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
 
 REGISTER_OP_VERSION(clip)
     .AddCheckpoint(
diff --git a/paddle/fluid/operators/clip_op.cu b/paddle/fluid/operators/clip_op.cu
index d31b81c13c5..fd61e4ea61d 100644
--- a/paddle/fluid/operators/clip_op.cu
+++ b/paddle/fluid/operators/clip_op.cu
@@ -17,8 +17,12 @@ limitations under the License. */
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
     clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ClipKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);
 
 REGISTER_OP_CUDA_KERNEL(
     clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py
index 2946798a82f..b05100fc7b4 100644
--- a/python/paddle/fluid/tests/unittests/test_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_op.py
@@ -50,10 +50,14 @@ class TestClipOp(OpTest):
         self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)}
 
     def test_check_output(self):
+        paddle.enable_static()
         self.check_output()
+        paddle.disable_static()
 
     def test_check_grad_normal(self):
+        paddle.enable_static()
         self.check_grad(['X'], 'Out')
+        paddle.disable_static()
 
     def initTestCase(self):
         self.shape = (4, 10, 10)
@@ -102,6 +106,7 @@ class TestCase5(TestClipOp):
 
 class TestClipOpError(unittest.TestCase):
     def test_errors(self):
+        paddle.enable_static()
         with program_guard(Program(), Program()):
             input_data = np.random.random((2, 4)).astype("float32")
 
@@ -115,6 +120,7 @@ class TestClipOpError(unittest.TestCase):
                 fluid.layers.clip(x=x2, min=-1.0, max=1.0)
 
             self.assertRaises(TypeError, test_dtype)
+        paddle.disable_static()
 
 
 class TestClipAPI(unittest.TestCase):
@@ -140,7 +146,10 @@ class TestClipAPI(unittest.TestCase):
         out_8 = paddle.clip(images)
         out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9)
 
-        res1, res2, res3, res4, res5, res6, res7, res8, res9 = exe.run(
+        out_10 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
+        out_11 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
+
+        res1, res2, res3, res4, res5, res6, res7, res8, res9, res10, res11 = exe.run(
             fluid.default_main_program(),
             feed={
                 "image": data,
@@ -148,7 +157,8 @@ class TestClipAPI(unittest.TestCase):
                 "max": np.array([0.8]).astype('float32')
             },
             fetch_list=[
-                out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9
+                out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9,
+                out_10, out_11
             ])
 
         self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8)))
@@ -161,8 +171,14 @@ class TestClipAPI(unittest.TestCase):
         self.assertTrue(np.allclose(res8, data))
         self.assertTrue(
             np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9)))
+        self.assertTrue(
+            np.allclose(res10, (data * 10).astype(np.int32).clip(2, 8)))
+        self.assertTrue(
+            np.allclose(res11, (data * 10).astype(np.int64).clip(2, 8)))
+        paddle.disable_static()
 
     def test_clip_dygraph(self):
+        paddle.disable_static()
         place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
         ) else fluid.CPUPlace()
         paddle.disable_static(place)
@@ -176,9 +192,16 @@ class TestClipAPI(unittest.TestCase):
         out_2 = paddle.clip(images, min=0.2, max=0.9)
         out_3 = paddle.clip(images, min=v_min, max=v_max)
 
+        out_4 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
+        out_5 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
+
         self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8)))
         self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9)))
         self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
+        self.assertTrue(
+            np.allclose(out_4.numpy(), (data * 10).astype(np.int32).clip(2, 8)))
+        self.assertTrue(
+            np.allclose(out_5.numpy(), (data * 10).astype(np.int64).clip(2, 8)))
 
     def test_errors(self):
         paddle.enable_static()
@@ -186,6 +209,7 @@ class TestClipAPI(unittest.TestCase):
         x2 = fluid.data(name='x2', shape=[1], dtype="int8")
         self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8)
         self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8)
+        paddle.disable_static()
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 87efa9ac442..215d467828a 100755
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -1475,10 +1475,10 @@ def clip(x, min=None, max=None, name=None):
         Out = MIN(MAX(x, min), max)
 
     Args:
-        x (Tensor): An N-D Tensor with data type float32 or float64.
-        min (float32|Tensor): The lower bound with type ``float32`` or a ``Tensor``
+        x (Tensor): An N-D Tensor with data type float32, float64, int32 or int64.
+        min (float|int|Tensor): The lower bound with type ``float`` , ``int`` or a ``Tensor``
             with shape [1] and type ``int32``, ``float32``, ``float64``.
-        max (float32|Tensor): The upper bound with type ``float32`` or a ``Tensor``
+        max (float|int|Tensor): The upper bound with type ``float``, ``int`` or a ``Tensor``
             with shape [1] and type ``int32``, ``float32``, ``float64``.
         name (str, optional): The default value is None. Normally there is no
             need for user to set this property. For more information, please
@@ -1503,16 +1503,24 @@ def clip(x, min=None, max=None, name=None):
             # [[4.5, 6.4]
     """
 
-    fmin = float(np.finfo(np.float32).min)
-    fmax = float(np.finfo(np.float32).max)
+    x_dtype = str(x.dtype)
+    if x_dtype == 'paddle.int32':
+        min_ = np.iinfo(np.int32).min
+        max_ = np.iinfo(np.int32).max - 2**7
+    elif x_dtype == 'paddle.int64':
+        min_ = np.iinfo(np.int64).min
+        max_ = np.iinfo(np.int64).max - 2**39
+    else:
+        min_ = float(np.finfo(np.float32).min)
+        max_ = float(np.finfo(np.float32).max)
 
     if in_dygraph_mode():
         if isinstance(min, Variable):
             min = min.numpy().item(0)
         if isinstance(max, Variable):
             max = max.numpy().item(0)
-        min = fmin if min is None else min
-        max = fmax if max is None else max
+        min = min_ if min is None else min
+        max = max_ if max is None else max
         return core.ops.clip(x, "min", min, "max", max)
 
     if min is not None:
@@ -1526,10 +1534,10 @@ def clip(x, min=None, max=None, name=None):
             check_dtype(max.dtype, 'max', ['float32', 'float64', 'int32'],
                         'clip', '(When the type of max in clip is Variable.)')
 
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'clip')
+    check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'clip')
 
     inputs = {'X': x}
-    attrs = {'min': fmin, 'max': fmax}
+    attrs = {'min': min_, 'max': max_}
 
     if isinstance(min, Variable):
         min.stop_gradient = True
-- 
GitLab