diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 6121389c12864140b4822cb1e6a9bb3ec60b2239..d12a25f360eece7bda47411c5bb4cd8625cdf729 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -9,9 +9,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include "paddle/fluid/operators/interpolate_op.h" #include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_launch_config.h" namespace paddle { namespace operators { @@ -586,17 +588,18 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, int out_chw = c * out_hw; int pixelNum = n * out_chw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + + platform::GpuLaunchConfig config = + platform::getGpuLaunchConfig(pixelNum, ctx); if ("nearest" == interp_method) { - KeNearestNeighborInterpFw< - T><<>>( + KeNearestNeighborInterpFw<<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { - KeBilinearInterpFw< - T><<>>( + KeBilinearInterpFw<<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_layout); } @@ -696,12 +699,13 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, int out_cdhw = c * out_dhw; int pixelNum = n * out_cdhw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + + platform::GpuLaunchConfig config = + platform::getGpuLaunchConfig(pixelNum, ctx); if ("trilinear" == interp_method) { - KeTrilinearInterpFw< - T><<>>( + KeTrilinearInterpFw<<>>( input_data, in_d, in_h, in_w, n, in_cdhw, output_data, out_d, out_h, out_w, n, out_cdhw, c, ratio_d, ratio_h, ratio_w, align_corners, align_mode, data_layout); @@ -787,17 +791,18 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, int out_chw = c * out_hw; int pixelNum = n * out_chw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + + platform::GpuLaunchConfig config = + platform::getGpuLaunchConfig(pixelNum, ctx); if ("nearest" == interp_method) { - KeNearestNeighborInterpBw< - T><<>>( + KeNearestNeighborInterpBw<<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { - KeBilinearInterpBw< - T><<>>( + KeBilinearInterpBw<<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_layout); @@ -892,12 +897,13 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, int out_cdhw = c * out_dhw; int pixelNum = n * out_cdhw; - int grid_dim = (pixelNum + 512 - 1) / 512; - grid_dim = grid_dim > 8 ? 8 : grid_dim; + + platform::GpuLaunchConfig config = + platform::getGpuLaunchConfig(pixelNum, ctx); if ("trilinear" == interp_method) { - KeTrilinearInterpBw< - T><<>>( + KeTrilinearInterpBw<<>>( input_grad_data, in_d, in_h, in_w, n, in_cdhw, output_grad_data, out_d, out_h, out_w, n, out_cdhw, c, ratio_d, ratio_h, ratio_w, align_corners, align_mode, data_layout); diff --git a/paddle/fluid/platform/gpu_launch_config.h b/paddle/fluid/platform/gpu_launch_config.h new file mode 100644 index 0000000000000000000000000000000000000000..d57478b89781ed073cef0fa73e201784f73dfc6b --- /dev/null +++ b/paddle/fluid/platform/gpu_launch_config.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace platform { + +struct GpuLaunchConfig { + // Number of threads per block. + int threads; + // Number of blocks for GPU kernel launch. + int blocks; + + GpuLaunchConfig(int threads, int blocks) : threads(threads), blocks(blocks) {} +}; + +inline GpuLaunchConfig getGpuLaunchConfig( + const int N, const framework::ExecutionContext& ctx) { + int threads = + std::min(1024, ctx.cuda_device_context().GetMaxThreadsPerBlock()); + int physical_thread_count = + std::min(ctx.cuda_device_context().GetMaxPhysicalThreadCount(), N); + int blocks = std::min((physical_thread_count + threads - 1) / threads, + ctx.cuda_device_context().GetSMCount()); + + GpuLaunchConfig config(threads, blocks); + + return config; +} + +} // namespace platform +} // namespace paddle