support int32 and int64 kernel for clip operator (#32373)

support int32 and int64 kernel for clip operator

support int32 and int64 kernel for clip operator (#32373)
support int32 and int64 kernel for clip operator
c3328288 · wuyefeilin · GitHub · a1a527fb · c3328288 · c3328288
4 changed file
--- a/paddle/fluid/operators/clip_op.cc
+++ b/paddle/fluid/operators/clip_op.cc
@@ -145,10 +145,14 @@ REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad, ops::ClipGradInplaceInferer,
                  ops::ClipDoubleGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OP_CPU_KERNEL(
    clip, ops::ClipKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ClipKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ClipKernel<paddle::platform::CPUDeviceContext, int64_t>);
 REGISTER_OP_CPU_KERNEL(
    clip_grad, ops::ClipGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>);
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int64_t>);

 REGISTER_OP_VERSION(clip)
    .AddCheckpoint(

--- a/paddle/fluid/operators/clip_op.cu
+++ b/paddle/fluid/operators/clip_op.cu
@@ -17,8 +17,12 @@ limitations under the License. */
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
    clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ClipKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);

 REGISTER_OP_CUDA_KERNEL(
    clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
--- a/python/paddle/fluid/tests/unittests/test_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_op.py
@@ -50,10 +50,14 @@ class TestClipOp(OpTest):
        self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)}

    def test_check_output(self):
+        paddle.enable_static()
        self.check_output()
+        paddle.disable_static()

    def test_check_grad_normal(self):
+        paddle.enable_static()
        self.check_grad(['X'], 'Out')
+        paddle.disable_static()

    def initTestCase(self):
        self.shape = (4, 10, 10)
@@ -102,6 +106,7 @@ class TestCase5(TestClipOp):

 class TestClipOpError(unittest.TestCase):
    def test_errors(self):
+        paddle.enable_static()
        with program_guard(Program(), Program()):
            input_data = np.random.random((2, 4)).astype("float32")

@@ -115,6 +120,7 @@ class TestClipOpError(unittest.TestCase):
                fluid.layers.clip(x=x2, min=-1.0, max=1.0)

            self.assertRaises(TypeError, test_dtype)
+        paddle.disable_static()


 class TestClipAPI(unittest.TestCase):
@@ -140,7 +146,10 @@ class TestClipAPI(unittest.TestCase):
        out_8 = paddle.clip(images)
        out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9)

-        res1, res2, res3, res4, res5, res6, res7, res8, res9 = exe.run(
+        out_10 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
+        out_11 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
+
+        res1, res2, res3, res4, res5, res6, res7, res8, res9, res10, res11 = exe.run(
            fluid.default_main_program(),
            feed={
                "image": data,
@@ -148,7 +157,8 @@ class TestClipAPI(unittest.TestCase):
                "max": np.array([0.8]).astype('float32')
            },
            fetch_list=[
-                out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9
+                out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9,
+                out_10, out_11
            ])

        self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8)))
@@ -161,8 +171,14 @@ class TestClipAPI(unittest.TestCase):
        self.assertTrue(np.allclose(res8, data))
        self.assertTrue(
            np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9)))
+        self.assertTrue(
+            np.allclose(res10, (data * 10).astype(np.int32).clip(2, 8)))
+        self.assertTrue(
+            np.allclose(res11, (data * 10).astype(np.int64).clip(2, 8)))
+        paddle.disable_static()

    def test_clip_dygraph(self):
+        paddle.disable_static()
        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        paddle.disable_static(place)
@@ -176,9 +192,16 @@ class TestClipAPI(unittest.TestCase):
        out_2 = paddle.clip(images, min=0.2, max=0.9)
        out_3 = paddle.clip(images, min=v_min, max=v_max)

+        out_4 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
+        out_5 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
+
        self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8)))
        self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9)))
        self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
+        self.assertTrue(
+            np.allclose(out_4.numpy(), (data * 10).astype(np.int32).clip(2, 8)))
+        self.assertTrue(
+            np.allclose(out_5.numpy(), (data * 10).astype(np.int64).clip(2, 8)))

    def test_errors(self):
        paddle.enable_static()
@@ -186,6 +209,7 @@ class TestClipAPI(unittest.TestCase):
        x2 = fluid.data(name='x2', shape=[1], dtype="int8")
        self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8)
        self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8)
+        paddle.disable_static()


 if __name__ == '__main__':

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -1475,10 +1475,10 @@ def clip(x, min=None, max=None, name=None):
        Out = MIN(MAX(x, min), max)

    Args:
-        x (Tensor): An N-D Tensor with data type float32 or float64.
-        min (float32|Tensor): The lower bound with type ``float32`` or a ``Tensor``
+        x (Tensor): An N-D Tensor with data type float32, float64, int32 or int64.
+        min (float|int|Tensor): The lower bound with type ``float`` , ``int`` or a ``Tensor``
            with shape [1] and type ``int32``, ``float32``, ``float64``.
-        max (float32|Tensor): The upper bound with type ``float32`` or a ``Tensor``
+        max (float|int|Tensor): The upper bound with type ``float``, ``int`` or a ``Tensor``
            with shape [1] and type ``int32``, ``float32``, ``float64``.
        name (str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
@@ -1503,16 +1503,24 @@ def clip(x, min=None, max=None, name=None):
            # [[4.5, 6.4]
    """

-    fmin = float(np.finfo(np.float32).min)
-    fmax = float(np.finfo(np.float32).max)
+    x_dtype = str(x.dtype)
+    if x_dtype == 'paddle.int32':
+        min_ = np.iinfo(np.int32).min
+        max_ = np.iinfo(np.int32).max - 2**7
+    elif x_dtype == 'paddle.int64':
+        min_ = np.iinfo(np.int64).min
+        max_ = np.iinfo(np.int64).max - 2**39
+    else:
+        min_ = float(np.finfo(np.float32).min)
+        max_ = float(np.finfo(np.float32).max)

    if in_dygraph_mode():
        if isinstance(min, Variable):
            min = min.numpy().item(0)
        if isinstance(max, Variable):
            max = max.numpy().item(0)
-        min = fmin if min is None else min
-        max = fmax if max is None else max
+        min = min_ if min is None else min
+        max = max_ if max is None else max
        return core.ops.clip(x, "min", min, "max", max)

    if min is not None:
@@ -1526,10 +1534,10 @@ def clip(x, min=None, max=None, name=None):
            check_dtype(max.dtype, 'max', ['float32', 'float64', 'int32'],
                        'clip', '(When the type of max in clip is Variable.)')

-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'clip')
+    check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'clip')

    inputs = {'X': x}
-    attrs = {'min': fmin, 'max': fmax}
+    attrs = {'min': min_, 'max': max_}

    if isinstance(min, Variable):
        min.stop_gradient = True