diff --git a/paddle/fluid/operators/math/pooling.cu b/paddle/fluid/operators/math/pooling.cu index 32348e908a5c7bd87af973285fe35bd675bf9f3e..267f8c409df301f9b1a8c68f337473198cf827f4 100644 --- a/paddle/fluid/operators/math/pooling.cu +++ b/paddle/fluid/operators/math/pooling.cu @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -22,7 +20,7 @@ namespace operators { namespace math { template -__global__ void KernelPool2D(const int nthreads, const T* input_data, // NOLINT +__global__ void KernelPool2D(const int nthreads, const T* input_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, @@ -60,8 +58,8 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, // NOLINT template __global__ void KernelPool2DGrad( - const int nthreads, const T* input_data, const T* output_data, // NOLINT - const T* output_grad, const int channels, const int input_height, // NOLINT + const int nthreads, const T* input_data, const T* output_data, + const T* output_grad, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, const int padding_width, @@ -108,8 +106,8 @@ __global__ void KernelPool2DGrad( template __global__ void KernelMaxPool2DGrad( - const int nthreads, const T* input_data, const T* output_data, // NOLINT - const T* output_grad, const int channels, const int input_height, // NOLINT + const int nthreads, const T* input_data, const T* output_data, + const T* output_grad, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, const int padding_width, @@ -160,10 +158,8 @@ template class Pool2dFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -205,10 +201,8 @@ class Pool2dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& output_grad, std::vector& ksize, + std::vector& strides, std::vector& paddings, PoolProcess pool_process, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -252,10 +246,8 @@ class MaxPool2dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& output_grad, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -314,7 +306,7 @@ template class Pool2dGradFunctor; template -__global__ void KernelPool3D(const int nthreads, const T* input_data, // NOLINT +__global__ void KernelPool3D(const int nthreads, const T* input_data, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, @@ -360,8 +352,8 @@ __global__ void KernelPool3D(const int nthreads, const T* input_data, // NOLINT template __global__ void KernelPool3DGrad( - const int nthreads, const T* input_data, const T* output_data, // NOLINT - const T* output_grad, const int channels, const int input_depth, // NOLINT + const int nthreads, const T* input_data, const T* output_data, + const T* output_grad, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, @@ -424,8 +416,8 @@ __global__ void KernelPool3DGrad( template __global__ void KernelMaxPool3DGrad( - const int nthreads, const T* input_data, const T* output_data, // NOLINT - const T* output_grad, const int channels, const int input_depth, // NOLINT + const int nthreads, const T* input_data, const T* output_data, + const T* output_grad, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, @@ -482,10 +474,8 @@ template class Pool3dFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -535,10 +525,8 @@ class Pool3dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& output_grad, std::vector& ksize, + std::vector& strides, std::vector& paddings, PoolProcess pool_process, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -590,10 +578,8 @@ class MaxPool3dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& output_grad, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -750,10 +736,8 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -795,10 +779,8 @@ class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& mask, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1]; @@ -955,10 +937,8 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -1007,10 +987,8 @@ class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, - std::vector& ksize, // NOLINT - std::vector& strides, // NOLINT - std::vector& paddings, // NOLINT + const framework::Tensor& mask, std::vector& ksize, + std::vector& strides, std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1];