diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index b61d9aeff7d4c2b92c4861444e8c4d1bb5d9d1cc..4f2d7ce3cff9e18b0d0a247d892f834984d9ad3c 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -33,8 +33,8 @@ __global__ void KeTemporalShiftFw(const T* input, T* output, const int ntchw, int ih = (tid % hw) / w; int iw = tid % w; - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); if (ic < c1) { src_it = it - 1; @@ -69,8 +69,8 @@ __global__ void KeTemporalShiftBw(const T* output_grad, T* input_grad, int ih = (tid % hw) / w; int iw = tid % w; - const int c1 = static_cast(c * shift_ratio); - const int c2 = static_cast(c * 2 * shift_ratio); + const int c1 = static_cast(c * shift_ratio); + const int c2 = static_cast(c * 2 * shift_ratio); if (ic < c1) { src_it = it - 1; @@ -163,8 +163,11 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(temporal_shift, ops::TemporalShiftOpCUDAKernel, - ops::TemporalShiftOpCUDAKernel); -REGISTER_OP_CUDA_KERNEL(temporal_shift_grad, - ops::TemporalShiftGradOpCUDAKernel, - ops::TemporalShiftGradOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL( + temporal_shift, ops::TemporalShiftOpCUDAKernel, + ops::TemporalShiftOpCUDAKernel, + ops::TemporalShiftOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL( + temporal_shift_grad, ops::TemporalShiftGradOpCUDAKernel, + ops::TemporalShiftGradOpCUDAKernel, + ops::TemporalShiftGradOpCUDAKernel); diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py index 12eec2073b3d041d58cee19226e7dc05ae03d4d0..050c38e5499be8628ebefbc1d13aba81dde195c6 100644 --- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py +++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py @@ -40,7 +40,7 @@ class TestTemporalShift(OpTest): def setUp(self): self.initTestCase() self.op_type = 'temporal_shift' - x = np.random.random(self.x_shape).astype('float64') + x = np.random.random(self.x_shape).astype(self.dtype) self.attrs = { "seg_num": self.seg_num, @@ -62,6 +62,7 @@ class TestTemporalShift(OpTest): self.x_shape = (6, 4, 4, 4) self.seg_num = 3 self.shift_ratio = 0.25 + self.dtype = 'float64' class TestTemporalShift2(TestTemporalShift): @@ -78,6 +79,26 @@ class TestTemporalShift3(TestTemporalShift): self.shift_ratio = 0.3 +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestTemporalShiftFP16(TestTemporalShift): + def initTestCase(self): + self.x_shape = (3, 10, 5, 5) + self.seg_num = 1 + self.shift_ratio = 0.3 + self.dtype = 'float16' + + def test_check_output(self): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_output_with_place(place) + + def test_check_grad_ignore_uv(self): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_grad_with_place(place, ['X'], 'Out') + + class TestTemporalShiftAPI(unittest.TestCase): def test_api(self): input = paddle.randn([6, 4, 2, 2])