未验证 提交 c3328288 编写于 作者: W wuyefeilin 提交者: GitHub

support int32 and int64 kernel for clip operator (#32373)

support int32 and int64 kernel for clip operator 
上级 a1a527fb
...@@ -145,10 +145,14 @@ REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad, ops::ClipGradInplaceInferer, ...@@ -145,10 +145,14 @@ REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad, ops::ClipGradInplaceInferer,
ops::ClipDoubleGradOpMaker<paddle::imperative::OpBase>); ops::ClipDoubleGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
clip, ops::ClipKernel<paddle::platform::CPUDeviceContext, float>, clip, ops::ClipKernel<paddle::platform::CPUDeviceContext, float>,
ops::ClipKernel<paddle::platform::CPUDeviceContext, double>); ops::ClipKernel<paddle::platform::CPUDeviceContext, double>,
ops::ClipKernel<paddle::platform::CPUDeviceContext, int>,
ops::ClipKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
clip_grad, ops::ClipGradKernel<paddle::platform::CPUDeviceContext, float>, clip_grad, ops::ClipGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>); ops::ClipGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ClipGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_VERSION(clip) REGISTER_OP_VERSION(clip)
.AddCheckpoint( .AddCheckpoint(
......
...@@ -17,8 +17,12 @@ limitations under the License. */ ...@@ -17,8 +17,12 @@ limitations under the License. */
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>, clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, double>); ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>, clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>); ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -50,10 +50,14 @@ class TestClipOp(OpTest): ...@@ -50,10 +50,14 @@ class TestClipOp(OpTest):
self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)} self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)}
def test_check_output(self): def test_check_output(self):
paddle.enable_static()
self.check_output() self.check_output()
paddle.disable_static()
def test_check_grad_normal(self): def test_check_grad_normal(self):
paddle.enable_static()
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
paddle.disable_static()
def initTestCase(self): def initTestCase(self):
self.shape = (4, 10, 10) self.shape = (4, 10, 10)
...@@ -102,6 +106,7 @@ class TestCase5(TestClipOp): ...@@ -102,6 +106,7 @@ class TestCase5(TestClipOp):
class TestClipOpError(unittest.TestCase): class TestClipOpError(unittest.TestCase):
def test_errors(self): def test_errors(self):
paddle.enable_static()
with program_guard(Program(), Program()): with program_guard(Program(), Program()):
input_data = np.random.random((2, 4)).astype("float32") input_data = np.random.random((2, 4)).astype("float32")
...@@ -115,6 +120,7 @@ class TestClipOpError(unittest.TestCase): ...@@ -115,6 +120,7 @@ class TestClipOpError(unittest.TestCase):
fluid.layers.clip(x=x2, min=-1.0, max=1.0) fluid.layers.clip(x=x2, min=-1.0, max=1.0)
self.assertRaises(TypeError, test_dtype) self.assertRaises(TypeError, test_dtype)
paddle.disable_static()
class TestClipAPI(unittest.TestCase): class TestClipAPI(unittest.TestCase):
...@@ -140,7 +146,10 @@ class TestClipAPI(unittest.TestCase): ...@@ -140,7 +146,10 @@ class TestClipAPI(unittest.TestCase):
out_8 = paddle.clip(images) out_8 = paddle.clip(images)
out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9) out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9)
res1, res2, res3, res4, res5, res6, res7, res8, res9 = exe.run( out_10 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
out_11 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
res1, res2, res3, res4, res5, res6, res7, res8, res9, res10, res11 = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed={ feed={
"image": data, "image": data,
...@@ -148,7 +157,8 @@ class TestClipAPI(unittest.TestCase): ...@@ -148,7 +157,8 @@ class TestClipAPI(unittest.TestCase):
"max": np.array([0.8]).astype('float32') "max": np.array([0.8]).astype('float32')
}, },
fetch_list=[ fetch_list=[
out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9 out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9,
out_10, out_11
]) ])
self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8)))
...@@ -161,8 +171,14 @@ class TestClipAPI(unittest.TestCase): ...@@ -161,8 +171,14 @@ class TestClipAPI(unittest.TestCase):
self.assertTrue(np.allclose(res8, data)) self.assertTrue(np.allclose(res8, data))
self.assertTrue( self.assertTrue(
np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9))) np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9)))
self.assertTrue(
np.allclose(res10, (data * 10).astype(np.int32).clip(2, 8)))
self.assertTrue(
np.allclose(res11, (data * 10).astype(np.int64).clip(2, 8)))
paddle.disable_static()
def test_clip_dygraph(self): def test_clip_dygraph(self):
paddle.disable_static()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace() ) else fluid.CPUPlace()
paddle.disable_static(place) paddle.disable_static(place)
...@@ -176,9 +192,16 @@ class TestClipAPI(unittest.TestCase): ...@@ -176,9 +192,16 @@ class TestClipAPI(unittest.TestCase):
out_2 = paddle.clip(images, min=0.2, max=0.9) out_2 = paddle.clip(images, min=0.2, max=0.9)
out_3 = paddle.clip(images, min=v_min, max=v_max) out_3 = paddle.clip(images, min=v_min, max=v_max)
out_4 = paddle.clip(paddle.cast(images * 10, 'int32'), min=2, max=8)
out_5 = paddle.clip(paddle.cast(images * 10, 'int64'), min=2, max=8)
self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8)))
self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9)))
self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8))) self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
self.assertTrue(
np.allclose(out_4.numpy(), (data * 10).astype(np.int32).clip(2, 8)))
self.assertTrue(
np.allclose(out_5.numpy(), (data * 10).astype(np.int64).clip(2, 8)))
def test_errors(self): def test_errors(self):
paddle.enable_static() paddle.enable_static()
...@@ -186,6 +209,7 @@ class TestClipAPI(unittest.TestCase): ...@@ -186,6 +209,7 @@ class TestClipAPI(unittest.TestCase):
x2 = fluid.data(name='x2', shape=[1], dtype="int8") x2 = fluid.data(name='x2', shape=[1], dtype="int8")
self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8) self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8)
self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8) self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8)
paddle.disable_static()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -1475,10 +1475,10 @@ def clip(x, min=None, max=None, name=None): ...@@ -1475,10 +1475,10 @@ def clip(x, min=None, max=None, name=None):
Out = MIN(MAX(x, min), max) Out = MIN(MAX(x, min), max)
Args: Args:
x (Tensor): An N-D Tensor with data type float32 or float64. x (Tensor): An N-D Tensor with data type float32, float64, int32 or int64.
min (float32|Tensor): The lower bound with type ``float32`` or a ``Tensor`` min (float|int|Tensor): The lower bound with type ``float`` , ``int`` or a ``Tensor``
with shape [1] and type ``int32``, ``float32``, ``float64``. with shape [1] and type ``int32``, ``float32``, ``float64``.
max (float32|Tensor): The upper bound with type ``float32`` or a ``Tensor`` max (float|int|Tensor): The upper bound with type ``float``, ``int`` or a ``Tensor``
with shape [1] and type ``int32``, ``float32``, ``float64``. with shape [1] and type ``int32``, ``float32``, ``float64``.
name (str, optional): The default value is None. Normally there is no name (str, optional): The default value is None. Normally there is no
need for user to set this property. For more information, please need for user to set this property. For more information, please
...@@ -1503,16 +1503,24 @@ def clip(x, min=None, max=None, name=None): ...@@ -1503,16 +1503,24 @@ def clip(x, min=None, max=None, name=None):
# [[4.5, 6.4] # [[4.5, 6.4]
""" """
fmin = float(np.finfo(np.float32).min) x_dtype = str(x.dtype)
fmax = float(np.finfo(np.float32).max) if x_dtype == 'paddle.int32':
min_ = np.iinfo(np.int32).min
max_ = np.iinfo(np.int32).max - 2**7
elif x_dtype == 'paddle.int64':
min_ = np.iinfo(np.int64).min
max_ = np.iinfo(np.int64).max - 2**39
else:
min_ = float(np.finfo(np.float32).min)
max_ = float(np.finfo(np.float32).max)
if in_dygraph_mode(): if in_dygraph_mode():
if isinstance(min, Variable): if isinstance(min, Variable):
min = min.numpy().item(0) min = min.numpy().item(0)
if isinstance(max, Variable): if isinstance(max, Variable):
max = max.numpy().item(0) max = max.numpy().item(0)
min = fmin if min is None else min min = min_ if min is None else min
max = fmax if max is None else max max = max_ if max is None else max
return core.ops.clip(x, "min", min, "max", max) return core.ops.clip(x, "min", min, "max", max)
if min is not None: if min is not None:
...@@ -1526,10 +1534,10 @@ def clip(x, min=None, max=None, name=None): ...@@ -1526,10 +1534,10 @@ def clip(x, min=None, max=None, name=None):
check_dtype(max.dtype, 'max', ['float32', 'float64', 'int32'], check_dtype(max.dtype, 'max', ['float32', 'float64', 'int32'],
'clip', '(When the type of max in clip is Variable.)') 'clip', '(When the type of max in clip is Variable.)')
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'clip') check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'clip')
inputs = {'X': x} inputs = {'X': x}
attrs = {'min': fmin, 'max': fmax} attrs = {'min': min_, 'max': max_}
if isinstance(min, Variable): if isinstance(min, Variable):
min.stop_gradient = True min.stop_gradient = True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册