From 1bb0e2943b13b1d65da65c99897105f665ae09d7 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 11 Oct 2017 19:02:40 +0800 Subject: [PATCH] Add pool2d cudnn --- paddle/framework/operator.h | 9 + paddle/operators/CMakeLists.txt | 7 + paddle/operators/pool_cudnn_op.cc | 34 ++++ paddle/operators/pool_cudnn_op.cu | 174 ++++++++++++++++++ paddle/operators/pool_cudnn_op.h | 22 +++ .../framework/tests/test_pool2d_cudnn_op.py | 144 +++++++++++++++ 6 files changed, 390 insertions(+) create mode 100644 paddle/operators/pool_cudnn_op.cc create mode 100644 paddle/operators/pool_cudnn_op.cu create mode 100644 paddle/operators/pool_cudnn_op.h create mode 100644 python/paddle/v2/framework/tests/test_pool2d_cudnn_op.py diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 15f80b57206..5db637abbc9 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -289,6 +289,15 @@ class ExecutionContext { return device_context_; } +#ifdef PADDLE_WITH_CUDA + const platform::CUDADeviceContext& cuda_device_context() const { + PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace())); + auto cuda_ctx = + reinterpret_cast(&device_context_); + return *cuda_ctx; + } +#endif // PADDLE_WITH_CUDA + private: const OperatorBase& op_; const Scope& scope_; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index ad941bde2be..e2a8615f908 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -69,6 +69,13 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") endif() + # pool_cudnn_op contains several operators + if ("${TARGET}" STREQUAL "pool_cudnn_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") + endif() + # activation_op contains several operators if ("${TARGET}" STREQUAL "activation_op") set(pybind_flag 1) diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc new file mode 100644 index 00000000000..8307561194e --- /dev/null +++ b/paddle/operators/pool_cudnn_op.cc @@ -0,0 +1,34 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/pool_cudnn_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, + ops::PoolOpGrad); + +REGISTER_OP_CPU_KERNEL(pool2d_cudnn, + ops::PoolKernel); +REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad, + ops::PoolGradKernel) + +// REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad, +// ops::PoolOpGrad); +// +// REGISTER_OP_CPU_KERNEL(pool3d_cudnn, +// ops::PoolKernel); +// REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad, +// ops::PoolGradKernel); diff --git a/paddle/operators/pool_cudnn_op.cu b/paddle/operators/pool_cudnn_op.cu new file mode 100644 index 00000000000..c5c9bf73b97 --- /dev/null +++ b/paddle/operators/pool_cudnn_op.cu @@ -0,0 +1,174 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/pool_cudnn_op.h" +#include "paddle/platform/cudnn_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; +using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor; +using DataLayout = platform::DataLayout; +using PoolingMode = platform::PoolingMode; + +// NOTE: copy from conv_cudnn +std::vector Dims2Vector(const framework::DDim &dims) { + std::vector ret; + for (int i = 0; i < dims.size(); i++) { + ret.push_back(dims[i]); + } + return ret; +} + +template +class PoolCudnnOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use GPUPlace."); + + const Tensor *input = ctx.Input("X"); + Tensor *output = ctx.Output("Out"); + + const T *input_data = input->data(); + T *output_data = output->mutable_data(ctx.GetPlace()); + + std::string pooling_type = ctx.Attr("poolingType"); + std::vector ksize = ctx.Attr>("ksize"); + std::vector strides = ctx.Attr>("strides"); + std::vector paddings = ctx.Attr>("paddings"); + if (ctx.Attr("globalPooling")) { + for (size_t i = 0; i < ksize.size(); ++i) { + ksize[i] = static_cast(input->dims()[i + 2]); + } + } + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_desc; + ScopedPoolingDescriptor pool_desc; + DataLayout layout = DataLayout::kNCHW; + + cudnnTensorDescriptor_t cudnn_input_desc = + input_desc.descriptor(layout, Dims2Vector(input->dims())); + cudnnTensorDescriptor_t cudnn_output_desc = + output_desc.descriptor(layout, Dims2Vector(output->dims())); + + PoolingMode pooling_mode; + if (pooling_type == "max") { + pooling_mode = PoolingMode::kMaximum; + } else { + pooling_mode = PoolingMode::kAverage; + } + + cudnnPoolingDescriptor_t cudnn_pool_desc = + pool_desc.descriptor(pooling_mode, ksize, paddings, strides); + + // ------------------- cudnn pool algorithm --------------------- + auto handle = ctx.cuda_device_context().cudnn_handle(); + T alpha = 1.0f, beta = 0.0f; + + PADDLE_ENFORCE(platform::dynload::cudnnPoolingForward( + handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta, + cudnn_output_desc, output_data)); + } +}; + +template +class PoolCudnnGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use GPUPlace."); + + const Tensor *input = ctx.Input("X"); + const Tensor *output = ctx.Input("Out"); + const Tensor *output_grad = + ctx.Input(framework::GradVarName("Out")); + Tensor *input_grad = ctx.Output(framework::GradVarName("X")); + + std::string pooling_type = ctx.Attr("poolingType"); + std::vector ksize = ctx.Attr>("ksize"); + std::vector strides = ctx.Attr>("strides"); + std::vector paddings = ctx.Attr>("paddings"); + + if (ctx.Attr("globalPooling")) { + for (size_t i = 0; i < ksize.size(); ++i) + ksize[i] = static_cast(input->dims()[i + 2]); + } + + const T *input_data = input->data(); + const T *output_data = output->data(); + const T *output_grad_data = output_grad->data(); + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_desc; + ScopedTensorDescriptor input_grad_desc; + ScopedTensorDescriptor output_grad_desc; + ScopedPoolingDescriptor pool_desc; + DataLayout layout = DataLayout::kNCHW; + + cudnnTensorDescriptor_t cudnn_input_desc = + input_desc.descriptor(layout, Dims2Vector(input->dims())); + cudnnTensorDescriptor_t cudnn_output_desc = + output_desc.descriptor(layout, Dims2Vector(output->dims())); + cudnnTensorDescriptor_t cudnn_output_grad_desc = + output_grad_desc.descriptor(layout, + Dims2Vector(output_grad->dims())); + + PoolingMode pooling_mode; + if (pooling_type == "max") { + pooling_mode = PoolingMode::kMaximum; + } else { + pooling_mode = PoolingMode::kAverage; + } + + cudnnPoolingDescriptor_t cudnn_pool_desc = + pool_desc.descriptor(pooling_mode, ksize, paddings, strides); + + // ------------------- cudnn pool algorithm --------------------- + auto handle = ctx.cuda_device_context().cudnn_handle(); + T alpha = 1.0f, beta = 0.0f; + + if (input_grad) { + T *input_grad_data = input_grad->mutable_data(ctx.GetPlace()); + auto temp = framework::EigenVector::Flatten(*input_grad); + temp.device(ctx.GetEigenDevice()) = + temp.constant(static_cast(0)); + + cudnnTensorDescriptor_t cudnn_input_grad_desc = + input_grad_desc.descriptor(layout, + Dims2Vector(input_grad->dims())); + + PADDLE_ENFORCE(platform::dynload::cudnnPoolingBackward( + handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data, + cudnn_output_grad_desc, output_grad_data, cudnn_input_desc, + input_data, &beta, cudnn_input_grad_desc, input_grad_data)); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel); +// +// REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel); +// REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel); diff --git a/paddle/operators/pool_cudnn_op.h b/paddle/operators/pool_cudnn_op.h new file mode 100644 index 00000000000..8940967ab7a --- /dev/null +++ b/paddle/operators/pool_cudnn_op.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/pool_op.h" + +namespace paddle { +namespace operators {} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_pool2d_cudnn_op.py b/python/paddle/v2/framework/tests/test_pool2d_cudnn_op.py new file mode 100644 index 00000000000..8180468014b --- /dev/null +++ b/python/paddle/v2/framework/tests/test_pool2d_cudnn_op.py @@ -0,0 +1,144 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): + + N, C, H, W = x.shape + if global_pool == 1: + ksize = [H, W] + H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1 + W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + out = np.zeros((N, C, H_out, W_out)) + for i in xrange(H_out): + for j in xrange(W_out): + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) + c_start = np.max((j * strides[1] - paddings[1], 0)) + c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) + x_masked = x[:, :, r_start:r_end, c_start:c_end] + + out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + return out + + +def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): + + N, C, H, W = x.shape + if global_pool == 1: + ksize = [H, W] + H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1 + W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + out = np.zeros((N, C, H_out, W_out)) + for i in xrange(H_out): + for j in xrange(W_out): + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) + c_start = np.max((j * strides[1] - paddings[1], 0)) + c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) + x_masked = x[:, :, r_start:r_end, c_start:c_end] + + out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / ( + (r_end - r_start) * (c_end - c_start)) + return out + + +class TestPool2d_cudnn_Op(OpTest): + def setUp(self): + self.initTestCase() + input = np.random.random(self.shape).astype("float32") + output = self.pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool) + self.inputs = {'X': input} + + self.attrs = { + 'strides': self.strides, + 'paddings': self.paddings, + 'ksize': self.ksize, + 'poolingType': self.pool_type, + 'globalPooling': self.global_pool, + } + + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + if self.pool_type != "max": + self.check_grad(set(['X']), 'Out', max_relative_error=0.07) + + def initTestCase(self): + self.global_pool = True + self.op_type = "pool2d_cudnn" + self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + self.shape = [2, 3, 5, 5] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [0, 0] + + +class TestCase1(TestPool2d_cudnn_Op): + def initTestCase(self): + self.global_pool = False + self.op_type = "pool2d_cudnn" + self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + self.shape = [2, 3, 7, 7] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [0, 0] + + +class TestCase2(TestPool2d_cudnn_Op): + def initTestCase(self): + self.global_pool = False + self.op_type = "pool2d_cudnn" + self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + self.shape = [2, 3, 7, 7] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 1] + + +class TestCase3(TestPool2d_cudnn_Op): + def initTestCase(self): + self.global_pool = True + self.op_type = "pool2d_cudnn" + self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive + self.shape = [2, 3, 5, 5] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [0, 0] + + +class TestCase4(TestPool2d_cudnn_Op): + def initTestCase(self): + self.global_pool = False + self.op_type = "pool2d_cudnn" + self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive + self.shape = [2, 3, 7, 7] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [0, 0] + + +class TestCase5(TestPool2d_cudnn_Op): + def initTestCase(self): + self.global_pool = False + self.op_type = "pool2d_cudnn" + self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive + self.shape = [2, 3, 7, 7] + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 1] + + +if __name__ == '__main__': + unittest.main() -- GitLab