diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index ef722e984c996f4267f5428d6eb6cb77d33468fb..9a3ad8eb642052a848ce5dc895005cf8c597dc7c 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -22,4 +22,6 @@ ENDIF() cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) -nv_test(cudnn_helper SRCS cudnn_helper.cc) + +nv_library(cudnn_helper SRCS cudnn_helper.cc DEPS dynload_cuda) +nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index 91047236a281a53837862b5b918fdafac6ed8c70..6a43f49cfc3fbfd786017889fd00232446961c0b 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once +#ifndef PADDLE_ONLY_CPU #include +#include "glog/logging.h" #include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/enforce.h" #include "paddle/platform/macros.h" @@ -93,11 +95,11 @@ class ScopedTensorDescriptor { // the format is not used now, but it maybe useful feature std::vector strides(dims.size()); strides[dims.size() - 1] = 1; - for (int i = dims.size() - 1; i >= 0; i++) { - strides[i] = dims[i + 1] * strides[i]; + for (int i = dims.size() - 2; i >= 0; i--) { + strides[i] = dims[i + 1] * strides[i + 1]; } - PADDLE_ENFORCE(cudnnSetTensorNdDescriptor(desc_, type, dims.size(), - dims.data(), strides.data())); + PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( + desc_, type, dims.size(), dims.data(), strides.data())); return desc_; } @@ -126,8 +128,8 @@ class ScopedFilterDescriptor { const cudnnDataType_t type, const std::vector& kernel) { // filter layout: output input spatial_dim_y spatial_dim_x - PADDLE_ENFORCE(cudnnSetFilterNdDescriptor(desc_, type, format, - kernel.size(), kernel.data())); + PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( + desc_, type, format, kernel.size(), kernel.data())); return desc_; } @@ -157,9 +159,21 @@ class ScopedConvolutionDescriptor { const std::vector& strides, const std::vector& dilations) { PADDLE_ENFORCE_EQ(pads.size(), strides.size()); PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); - PADDLE_ENFORCE(cudnnSetConvolutionNdDescriptor( + +#if CUDNN_VERSION < 6000 + // cudnn v5 does not support dilation conv, the argument is called upscale + // instead of dilations and it is must be one. + for (size_t i = 0; i < dilations.size(); ++i) { + PADDLE_ENFORCE_EQ( + dilations[i], 1, + "Dilations conv is not supported in this cuDNN version"); + } +#endif + + PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor( desc_, pads.size(), pads.data(), strides.data(), dilations.data(), CUDNN_CROSS_CORRELATION, type)); + return desc_; } template @@ -184,26 +198,18 @@ class ScopedPoolingDescriptor { } inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, - cudnnDataType_t type, const std::vector& kernel, const std::vector& pads, const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size()); PADDLE_ENFORCE_EQ(kernel.size(), strides.size()); - PADDLE_ENFORCE(cudnnSetPoolingNdDescriptor( + PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor( desc_, (mode == PoolingMode::kMaximum ? CUDNN_POOLING_MAX : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING), CUDNN_PROPAGATE_NAN, // Always propagate nans. kernel.size(), kernel.data(), pads.data(), strides.data())); - } - - template - inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, - const std::vector& kernel, - const std::vector& pads, - const std::vector& strides) { - return descriptor(mode, CudnnDataType::type, kernel, pads, strides); + return desc_; } private: @@ -213,3 +219,4 @@ class ScopedPoolingDescriptor { } // namespace platform } // namespace paddle +#endif diff --git a/paddle/platform/cudnn_helper_test.cc b/paddle/platform/cudnn_helper_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..729f2f8a10fcb1dac5c9510cb1cf0345dc8f542f --- /dev/null +++ b/paddle/platform/cudnn_helper_test.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/cudnn_helper.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +TEST(CudnnHelper, ScopedTensorDescriptor) { + using paddle::platform::ScopedTensorDescriptor; + using paddle::platform::DataLayout; + + ScopedTensorDescriptor tensor_desc; + std::vector shape = {2, 4, 6, 6}; + auto desc = tensor_desc.descriptor(DataLayout::kNCHW, shape); + + cudnnDataType_t type; + int nd; + std::vector dims(4); + std::vector strides(4); + paddle::platform::dynload::cudnnGetTensorNdDescriptor( + desc, 4, &type, &nd, dims.data(), strides.data()); + + EXPECT_EQ(nd, 4); + for (size_t i = 0; i < dims.size(); ++i) { + EXPECT_EQ(dims[i], shape[i]); + } + EXPECT_EQ(strides[3], 1); + EXPECT_EQ(strides[2], 6); + EXPECT_EQ(strides[1], 36); + EXPECT_EQ(strides[0], 144); +} + +TEST(CudnnHelper, ScopedFilterDescriptor) { + using paddle::platform::ScopedFilterDescriptor; + using paddle::platform::DataLayout; + + ScopedFilterDescriptor filter_desc; + std::vector shape = {2, 3, 3}; + auto desc = filter_desc.descriptor(DataLayout::kNCHW, shape); + + cudnnDataType_t type; + int nd; + cudnnTensorFormat_t format; + std::vector kernel(3); + paddle::platform::dynload::cudnnGetFilterNdDescriptor(desc, 3, &type, &format, + &nd, kernel.data()); + + EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < shape.size(); ++i) { + EXPECT_EQ(kernel[i], shape[i]); + } +} + +TEST(CudnnHelper, ScopedConvolutionDescriptor) { + using paddle::platform::ScopedConvolutionDescriptor; + + ScopedConvolutionDescriptor conv_desc; + std::vector src_pads = {2, 2, 2}; + std::vector src_strides = {1, 1, 1}; + std::vector src_dilations = {1, 1, 1}; + auto desc = conv_desc.descriptor(src_pads, src_strides, src_dilations); + + cudnnDataType_t type; + cudnnConvolutionMode_t mode; + int nd; + std::vector pads(3); + std::vector strides(3); + std::vector dilations(3); + paddle::platform::dynload::cudnnGetConvolutionNdDescriptor( + desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode, + &type); + + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < src_pads.size(); ++i) { + EXPECT_EQ(pads[i], src_pads[i]); + EXPECT_EQ(strides[i], src_strides[i]); + EXPECT_EQ(dilations[i], src_dilations[i]); + } + EXPECT_EQ(mode, CUDNN_CROSS_CORRELATION); +} + +TEST(CudnnHelper, ScopedPoolingDescriptor) { + using paddle::platform::ScopedPoolingDescriptor; + using paddle::platform::PoolingMode; + + ScopedPoolingDescriptor pool_desc; + std::vector src_kernel = {2, 2, 5}; + std::vector src_pads = {1, 1, 2}; + std::vector src_strides = {2, 2, 3}; + auto desc = pool_desc.descriptor(PoolingMode::kMaximum, src_kernel, src_pads, + src_strides); + + cudnnPoolingMode_t mode; + cudnnNanPropagation_t nan_t = CUDNN_PROPAGATE_NAN; + int nd; + std::vector kernel(3); + std::vector pads(3); + std::vector strides(3); + paddle::platform::dynload::cudnnGetPoolingNdDescriptor( + desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data()); + + EXPECT_EQ(nd, 3); + for (size_t i = 0; i < src_pads.size(); ++i) { + EXPECT_EQ(kernel[i], src_kernel[i]); + EXPECT_EQ(pads[i], src_pads[i]); + EXPECT_EQ(strides[i], src_strides[i]); + } + EXPECT_EQ(mode, CUDNN_POOLING_MAX); +} diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index d205ead84598e04eea523be32139959a02e0dd83..ceb66f84b6b01892cbaf61c79a47ae60d2589164 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) -nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc) +nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader) diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h index ef0dd85b083dc2335dd5c70d3dc5f59eda25daeb..0120625b7c14448f1b8deb88c24a3ee06eaf4f01 100644 --- a/paddle/platform/dynload/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -62,19 +62,27 @@ extern void* cudnn_dso_handle; #define CUDNN_DNN_ROUTINE_EACH(__macro) \ __macro(cudnnSetTensor4dDescriptor); \ __macro(cudnnSetTensor4dDescriptorEx); \ + __macro(cudnnSetTensorNdDescriptor); \ + __macro(cudnnGetTensorNdDescriptor); \ __macro(cudnnGetConvolutionNdForwardOutputDim); \ __macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnCreateTensorDescriptor); \ __macro(cudnnDestroyTensorDescriptor); \ __macro(cudnnCreateFilterDescriptor); \ __macro(cudnnSetFilter4dDescriptor); \ + __macro(cudnnSetFilterNdDescriptor); \ + __macro(cudnnGetFilterNdDescriptor); \ __macro(cudnnSetPooling2dDescriptor); \ + __macro(cudnnSetPoolingNdDescriptor); \ + __macro(cudnnGetPoolingNdDescriptor); \ __macro(cudnnDestroyFilterDescriptor); \ __macro(cudnnCreateConvolutionDescriptor); \ __macro(cudnnCreatePoolingDescriptor); \ __macro(cudnnDestroyPoolingDescriptor); \ __macro(cudnnSetConvolution2dDescriptor); \ __macro(cudnnDestroyConvolutionDescriptor); \ + __macro(cudnnSetConvolutionNdDescriptor); \ + __macro(cudnnGetConvolutionNdDescriptor); \ __macro(cudnnCreate); \ __macro(cudnnDestroy); \ __macro(cudnnSetStream); \