From cdadc8f01948669cb4ed6409435c0492fb4b4c67 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 27 Oct 2020 15:43:15 +0800 Subject: [PATCH] refine temporal_shift_op for performance optimization using gpu kernel config (#28114) --- paddle/fluid/operators/temporal_shift_op.cu | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index a292f16fe20..b61d9aeff7d 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -11,6 +11,7 @@ #include "paddle/fluid/operators/temporal_shift_op.h" #include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_launch_config.h" namespace paddle { namespace operators { @@ -112,11 +113,11 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel { T* output_data = output->mutable_data({nt, c, h, w}, ctx.GetPlace()); int pixelNum = nt * chw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + platform::GpuLaunchConfig config = + platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), pixelNum); - KeTemporalShiftFw< - T><<>>( + KeTemporalShiftFw<<>>( input_data, output_data, ntchw, tchw, chw, hw, w, t, c, shift_ratio); } }; @@ -148,11 +149,11 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { static_cast(0)); int pixelNum = nt * chw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + platform::GpuLaunchConfig config = + platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), pixelNum); - KeTemporalShiftBw< - T><<>>( + KeTemporalShiftBw<<>>( output_grad_data, input_grad_data, ntchw, tchw, chw, hw, w, t, c, shift_ratio); } -- GitLab