diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 120eb1e4af9cef43e76e27d4ad66acfbbd597a36..17bdac8749e31565b119b2cb84aed199fac0f441 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -22,3 +22,5 @@ ENDIF() cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) + +nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..24ddf3441caa6e5f45a7b96af26a23ed324dc1b6 --- /dev/null +++ b/paddle/platform/cudnn_helper.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/platform/dynload/cudnn.h" +#include "paddle/platform/enforce.h" +#include "paddle/platform/macros.h" + +namespace paddle { +namespace platform { + +enum class DataLayout { + kNHWC, + kNCHW, + kNCHW_VECT_C, +}; + +enum class PoolingMode { + kMaximum, + kAverage, +}; + +template +class CudnnDataType; + +template <> +class CudnnDataType { + public: + static const cudnnDataType_t type = CUDNN_DATA_FLOAT; +}; + +template <> +class CudnnDataType { + public: + static const cudnnDataType_t type = CUDNN_DATA_DOUBLE; +}; + +inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { + switch (order) { + case DataLayout::kNHWC: + return CUDNN_TENSOR_NHWC; + case DataLayout::kNCHW: + return CUDNN_TENSOR_NCHW; + default: + PADDLE_THROW("Unknown cudnn equivalent for order"); + } + return CUDNN_TENSOR_NCHW; +} + +class ScopedTensorDescriptor { + public: + ScopedTensorDescriptor() { + PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_)); + } + ~ScopedTensorDescriptor() { + PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_)); + } + + inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, + const cudnnDataType_t type, + const std::vector& dims) { + // the format is not used now, but it maybe useful feature + std::vector strides(dims.size()); + strides[dims.size() - 1] = 1; + for (int i = dims.size() - 2; i >= 0; i--) { + strides[i] = dims[i + 1] * strides[i + 1]; + } + PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( + desc_, type, dims.size(), dims.data(), strides.data())); + return desc_; + } + + template + inline cudnnTensorDescriptor_t descriptor(const DataLayout& order, + const std::vector& dims) { + return descriptor(GetCudnnTensorFormat(order), CudnnDataType::type, + dims); + } + + private: + cudnnTensorDescriptor_t desc_; + DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor); +}; + +class ScopedFilterDescriptor { + public: + ScopedFilterDescriptor() { + PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_)); + } + ~ScopedFilterDescriptor() { + PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_)); + } + + inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, + const cudnnDataType_t type, + const std::vector& kernel) { + // filter layout: output input spatial_dim_y spatial_dim_x + PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( + desc_, type, format, kernel.size(), kernel.data())); + return desc_; + } + + template + inline cudnnFilterDescriptor_t descriptor(const DataLayout& order, + const std::vector& kernel) { + return descriptor(GetCudnnTensorFormat(order), CudnnDataType::type, + kernel); + } + + private: + cudnnFilterDescriptor_t desc_; + DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor); +}; + +class ScopedConvolutionDescriptor { + public: + ScopedConvolutionDescriptor() { + PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_)); + } + ~ScopedConvolutionDescriptor() { + PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_)); + } + + inline cudnnConvolutionDescriptor_t descriptor( + cudnnDataType_t type, const std::vector& pads, + const std::vector& strides, const std::vector& dilations) { + PADDLE_ENFORCE_EQ(pads.size(), strides.size()); + PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); + +#if CUDNN_VERSION < 6000 + // cudnn v5 does not support dilation conv, the argument is called upscale + // instead of dilations and it is must be one. + for (size_t i = 0; i < dilations.size(); ++i) { + PADDLE_ENFORCE_EQ( + dilations[i], 1, + "Dilations conv is not supported in this cuDNN version"); + } +#endif + + PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor( + desc_, pads.size(), pads.data(), strides.data(), dilations.data(), + CUDNN_CROSS_CORRELATION, type)); + return desc_; + } + + template + inline cudnnConvolutionDescriptor_t descriptor( + const std::vector& pads, const std::vector& strides, + const std::vector& dilations) { + return descriptor(CudnnDataType::type, pads, strides, dilations); + } + + private: + cudnnConvolutionDescriptor_t desc_; + DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor); +}; + +class ScopedPoolingDescriptor { + public: + ScopedPoolingDescriptor() { + PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_)); + } + ~ScopedPoolingDescriptor() { + PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_)); + } + + inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, + const std::vector& kernel, + const std::vector& pads, + const std::vector& strides) { + PADDLE_ENFORCE_EQ(kernel.size(), pads.size()); + PADDLE_ENFORCE_EQ(kernel.size(), strides.size()); + PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor( + desc_, (mode == PoolingMode::kMaximum + ? CUDNN_POOLING_MAX + : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), + CUDNN_PROPAGATE_NAN, // Always propagate nans. + kernel.size(), kernel.data(), pads.data(), strides.data())); + return desc_; + } + + private: + cudnnPoolingDescriptor_t desc_; + DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor); +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/cudnn_helper_test.cc b/paddle/platform/cudnn_helper_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6bd85ae1ca8b47b203e0321e9d9224d5cfd3a586 --- /dev/null +++ b/paddle/platform/cudnn_helper_test.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/cudnn_helper.h" +#include + +TEST(CudnnHelper, ScopedTensorDescriptor) { + using paddle::platform::ScopedTensorDescriptor; + using paddle::platform::DataLayout; + + ScopedTensorDescriptor tensor_desc; + std::vector shape = {2, 4, 6, 6}; + auto desc = tensor_desc.descriptor(DataLayout::kNCHW, shape); + + cudnnDataType_t type; + int nd; + std::vector dims(4); + std::vector strides(4); + paddle::platform::dynload::cudnnGetTensorNdDescriptor( + desc, 4, &type, &nd, dims.data(), strides.data()); + + EXPECT_EQ(nd, 4); + for (size_t i = 0; i < dims.size(); ++i) { + EXPECT_EQ(dims[i], shape[i]); + } + EXPECT_EQ(strides[3], 1); + EXPECT_EQ(strides[2], 6); + EXPECT_EQ(strides[1], 36); + EXPECT_EQ(strides[0], 144); +} + +TEST(CudnnHelper, ScopedFilterDescriptor) { + using paddle::platform::ScopedFilterDescriptor; + using paddle::platform::DataLayout; + + ScopedFilterDescriptor filter_desc; + std::vector shape = {2, 3, 3}; + auto desc = filter_desc.descriptor(DataLayout::kNCHW, shape); + + cudnnDataType_t type; + int nd; + cudnnTensorFormat_t format; + std::vector kernel(3); + paddle::platform::dynload::cudnnGetFilterNdDescriptor(desc, 3, &type, &format, + &nd, kernel.data()); + + EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < shape.size(); ++i) { + EXPECT_EQ(kernel[i], shape[i]); + } +} + +TEST(CudnnHelper, ScopedConvolutionDescriptor) { + using paddle::platform::ScopedConvolutionDescriptor; + + ScopedConvolutionDescriptor conv_desc; + std::vector src_pads = {2, 2, 2}; + std::vector src_strides = {1, 1, 1}; + std::vector src_dilations = {1, 1, 1}; + auto desc = conv_desc.descriptor(src_pads, src_strides, src_dilations); + + cudnnDataType_t type; + cudnnConvolutionMode_t mode; + int nd; + std::vector pads(3); + std::vector strides(3); + std::vector dilations(3); + paddle::platform::dynload::cudnnGetConvolutionNdDescriptor( + desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode, + &type); + + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < src_pads.size(); ++i) { + EXPECT_EQ(pads[i], src_pads[i]); + EXPECT_EQ(strides[i], src_strides[i]); + EXPECT_EQ(dilations[i], src_dilations[i]); + } + EXPECT_EQ(mode, CUDNN_CROSS_CORRELATION); +} + +TEST(CudnnHelper, ScopedPoolingDescriptor) { + using paddle::platform::ScopedPoolingDescriptor; + using paddle::platform::PoolingMode; + + ScopedPoolingDescriptor pool_desc; + std::vector src_kernel = {2, 2, 5}; + std::vector src_pads = {1, 1, 2}; + std::vector src_strides = {2, 2, 3}; + auto desc = pool_desc.descriptor(PoolingMode::kMaximum, src_kernel, src_pads, + src_strides); + + cudnnPoolingMode_t mode; + cudnnNanPropagation_t nan_t = CUDNN_PROPAGATE_NAN; + int nd; + std::vector kernel(3); + std::vector pads(3); + std::vector strides(3); + paddle::platform::dynload::cudnnGetPoolingNdDescriptor( + desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data()); + + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < src_pads.size(); ++i) { + EXPECT_EQ(kernel[i], src_kernel[i]); + EXPECT_EQ(pads[i], src_pads[i]); + EXPECT_EQ(strides[i], src_strides[i]); + } + EXPECT_EQ(mode, CUDNN_POOLING_MAX); +} diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index d205ead84598e04eea523be32139959a02e0dd83..ceb66f84b6b01892cbaf61c79a47ae60d2589164 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) -nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc) +nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader) diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h index ef0dd85b083dc2335dd5c70d3dc5f59eda25daeb..0120625b7c14448f1b8deb88c24a3ee06eaf4f01 100644 --- a/paddle/platform/dynload/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -62,19 +62,27 @@ extern void* cudnn_dso_handle; #define CUDNN_DNN_ROUTINE_EACH(__macro) \ __macro(cudnnSetTensor4dDescriptor); \ __macro(cudnnSetTensor4dDescriptorEx); \ + __macro(cudnnSetTensorNdDescriptor); \ + __macro(cudnnGetTensorNdDescriptor); \ __macro(cudnnGetConvolutionNdForwardOutputDim); \ __macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnCreateTensorDescriptor); \ __macro(cudnnDestroyTensorDescriptor); \ __macro(cudnnCreateFilterDescriptor); \ __macro(cudnnSetFilter4dDescriptor); \ + __macro(cudnnSetFilterNdDescriptor); \ + __macro(cudnnGetFilterNdDescriptor); \ __macro(cudnnSetPooling2dDescriptor); \ + __macro(cudnnSetPoolingNdDescriptor); \ + __macro(cudnnGetPoolingNdDescriptor); \ __macro(cudnnDestroyFilterDescriptor); \ __macro(cudnnCreateConvolutionDescriptor); \ __macro(cudnnCreatePoolingDescriptor); \ __macro(cudnnDestroyPoolingDescriptor); \ __macro(cudnnSetConvolution2dDescriptor); \ __macro(cudnnDestroyConvolutionDescriptor); \ + __macro(cudnnSetConvolutionNdDescriptor); \ + __macro(cudnnGetConvolutionNdDescriptor); \ __macro(cudnnCreate); \ __macro(cudnnDestroy); \ __macro(cudnnSetStream); \ diff --git a/paddle/platform/macros.h b/paddle/platform/macros.h new file mode 100644 index 0000000000000000000000000000000000000000..4a04a38c0c6f905639004dea2f4416ecc57c8620 --- /dev/null +++ b/paddle/platform/macros.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +// Disable the copy and assignment operator for a class. +#ifndef DISABLE_COPY_AND_ASSIGN +#define DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname&) = delete; \ + classname& operator=(const classname&) = delete +#endif