diff --git a/paddle/fluid/operators/bilateral_slice_op.cu b/paddle/fluid/operators/bilateral_slice_op.cu index e56a4be53d14968da60259b69e74e1e988caac8a..3c64ed1acc847d8f60ad39bf3437b22ad8f2bb4a 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cu +++ b/paddle/fluid/operators/bilateral_slice_op.cu @@ -472,8 +472,8 @@ class BilateralSliceGradOpCUDAKernel : public framework::OpKernel { grid_sizes.gw = gw; grid_sizes.input_chans = input_chans; - platform::GpuLaunchConfig config = - platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), grid_count); + platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D( + ctx.cuda_device_context(), grid_count, 512); BilateralSliceCudaGridGradKernel< T><< { grid_grad_data, output_grad_data, guide_data, input_data, grid_sizes, has_offset, grid_count, output_chans); - config = - platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), guide_count); + config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), + guide_count, 512); BilateralSliceCudaGuideGradKernel< T><< { guide_grad_data, output_grad_data, grid_data, guide_data, input_data, grid_sizes, has_offset, guide_count, output_chans); - config = - platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), input_count); + config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), + input_count, 512); BilateralSliceCudaInputGradKernel< T><<