cudnn_helper.h 19.8 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Q
qingqing01 已提交
17
#include <string>
Y
Pass CI  
Yu Yang 已提交
18
#include <vector>
19 20

#include "paddle/fluid/framework/operator.h"
Y
Yi Wang 已提交
21 22
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
K
Kexin Zhao 已提交
23
#include "paddle/fluid/platform/float16.h"
Y
Yi Wang 已提交
24
#include "paddle/fluid/platform/macros.h"
D
dangqingqing 已提交
25

D
dzhwinter 已提交
26 27
DECLARE_bool(cudnn_deterministic);

D
dangqingqing 已提交
28 29 30
namespace paddle {
namespace platform {

Q
Qiao Longfei 已提交
31 32 33
#define CUDNN_VERSION_MIN(major, minor, patch) \
  (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))

D
"fix"  
dzhwinter 已提交
34 35 36 37
enum class DataLayout {  // Not use
  kNHWC,
  kNCHW,
  kNCDHW,
38
  kNDHWC,  // add, liyamei
D
"fix"  
dzhwinter 已提交
39 40 41 42 43 44
  kNCHW_VECT_C,
};

enum class PoolingMode {
  kMaximum,
  kMaximumDeterministic,
45 46
  kAverageExclusive,
  kAverageInclusive,
D
"fix"  
dzhwinter 已提交
47 48
};

49
enum class ActivationMode {
Q
qingqing01 已提交
50 51 52 53 54 55 56 57 58
  kNone,  // activation identity
  kSigmoid,
  kRelu,
  kRelu6,
  kReluX,
  kTanh,
  kBandPass,
};

D
dzhwinter 已提交
59 60 61 62
inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
  switch (mode) {
    case PoolingMode::kMaximumDeterministic:
      return CUDNN_POOLING_MAX_DETERMINISTIC;
63
    case PoolingMode::kAverageExclusive:
D
dzhwinter 已提交
64
      return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
65 66
    case PoolingMode::kAverageInclusive:
      return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
D
dzhwinter 已提交
67 68 69
    case PoolingMode::kMaximum:
      return CUDNN_POOLING_MAX;
    default:
G
GaoWei8 已提交
70 71
      PADDLE_THROW(
          platform::errors::Unimplemented("Unexpected CUDNN pooling mode."));
D
dzhwinter 已提交
72 73
  }
}
D
dzhwinter 已提交
74

Q
qingqing01 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
inline ActivationMode StringToActivationMode(const std::string& str) {
  if (str == "identity") {
    return ActivationMode::kNone;
  } else if (str == "sigmoid") {
    return ActivationMode::kSigmoid;
  } else if (str == "relu") {
    return ActivationMode::kRelu;
  } else if (str == "relu6") {
    return ActivationMode::kRelu6;
  } else if (str == "relux") {
    return ActivationMode::kReluX;
  } else if (str == "tanh") {
    return ActivationMode::kTanh;
  } else if (str == "bandpass") {
    return ActivationMode::kBandPass;
  } else {
G
GaoWei8 已提交
91 92
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unknown CUDNN activation string: %s.", str));
Q
qingqing01 已提交
93 94 95
  }
}

D
dangqingqing 已提交
96 97 98
template <typename T>
class CudnnDataType;

W
wuhuanzhou 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0)
template <>
class CudnnDataType<bfloat16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16;
  using ScalingParamType = const float;
  using BatchNormParamType = float;
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
};
#endif

K
Kexin Zhao 已提交
118 119 120 121
template <>
class CudnnDataType<float16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_HALF;
K
Kexin Zhao 已提交
122
  // The scaling param type is float for HALF and FLOAT tensors
K
update  
Kexin Zhao 已提交
123 124
  using ScalingParamType = const float;
  using BatchNormParamType = float;
K
Kexin Zhao 已提交
125
  static ScalingParamType* kOne() {
K
Kexin Zhao 已提交
126
    static ScalingParamType v = 1.0;
K
Kexin Zhao 已提交
127 128 129
    return &v;
  }
  static ScalingParamType* kZero() {
K
Kexin Zhao 已提交
130
    static ScalingParamType v = 0.0;
K
Kexin Zhao 已提交
131 132 133 134
    return &v;
  }
};

D
dangqingqing 已提交
135 136 137 138
template <>
class CudnnDataType<float> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
K
update  
Kexin Zhao 已提交
139 140
  using ScalingParamType = const float;
  using BatchNormParamType = float;
Q
Qiao Longfei 已提交
141 142 143 144 145 146 147 148
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
149 150 151 152 153 154
};

template <>
class CudnnDataType<double> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
K
update  
Kexin Zhao 已提交
155 156
  using ScalingParamType = const double;
  using BatchNormParamType = double;
Q
Qiao Longfei 已提交
157 158 159 160 161 162 163 164
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
165 166
};

C
chengduoZH 已提交
167 168
inline cudnnTensorFormat_t GetCudnnTensorFormat(
    const DataLayout& order) {  // Not use
D
dangqingqing 已提交
169 170 171 172 173
  switch (order) {
    case DataLayout::kNHWC:
      return CUDNN_TENSOR_NHWC;
    case DataLayout::kNCHW:
      return CUDNN_TENSOR_NCHW;
C
chengduoZH 已提交
174
    case DataLayout::kNCDHW:
武毅 已提交
175
      return CUDNN_TENSOR_NCHW;  // NOTE: cudnn treat NdTensor as the same
176 177
    case DataLayout::kNDHWC:
      return CUDNN_TENSOR_NHWC;  // add, liyamei
D
dangqingqing 已提交
178
    default:
G
GaoWei8 已提交
179 180
      PADDLE_THROW(platform::errors::Unimplemented(
          "CUDNN has no equivalent dataLayout for input order."));
D
dangqingqing 已提交
181 182 183 184 185 186 187
  }
  return CUDNN_TENSOR_NCHW;
}

class ScopedTensorDescriptor {
 public:
  ScopedTensorDescriptor() {
188
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
D
dangqingqing 已提交
189
  }
Z
Zeng Jinle 已提交
190
  ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
191
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
D
dangqingqing 已提交
192 193 194 195
  }

  inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
196 197 198
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    // the format is not used now, will add later
D
dangqingqing 已提交
199 200
    std::vector<int> strides(dims.size());
    strides[dims.size() - 1] = 1;
201 202
    for (int i = dims.size() - 2; i >= 0; i--) {
      strides[i] = dims[i + 1] * strides[i + 1];
D
dangqingqing 已提交
203
    }
武毅 已提交
204
    // Update tensor descriptor dims setting if groups > 1
205 206
    // NOTE: Here, Assume using NCHW or NCDHW order
    std::vector<int> dims_with_group(dims.begin(), dims.end());
武毅 已提交
207 208 209
    if (groups > 1) {
      dims_with_group[1] = dims_with_group[1] / groups;
    }
210 211 212

    if (dims.size() == 4) {
      if (format == CUDNN_TENSOR_NCHW) {
213
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
214 215 216
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
217
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
218 219 220 221
            desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
      }
    } else if (dims.size() == 5) {
      if (format == CUDNN_TENSOR_NCHW) {
222
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
223 224 225
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
226
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
227 228 229
            desc_, format, type, dims.size(), dims.data()));
      }
    }
D
dangqingqing 已提交
230 231 232 233 234
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
235 236 237 238
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims,
                      groups);
D
dangqingqing 已提交
239 240
  }

G
GaoWei8 已提交
241 242 243
  inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
                                            const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
244
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
G
GaoWei8 已提交
245 246 247 248 249 250 251 252 253 254
        desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    return descriptor(CudnnDataType<T>::type, dim, stride);
  }

255 256
  inline cudnnTensorDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
257 258 259 260 261
 private:
  cudnnTensorDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
};

G
GaoWei8 已提交
262
#if CUDNN_VERSION >= 7201
G
GaoWei8 已提交
263 264 265
class ScopedRNNTensorDescriptor {
 public:
  ScopedRNNTensorDescriptor() {
266
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_));
G
GaoWei8 已提交
267 268 269
  }

  ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
270
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_));
G
GaoWei8 已提交
271 272 273 274 275
  }

  inline cudnnRNNDataDescriptor_t descriptor(
      const cudnnDataType_t cudnn_type, int max_seq_length, int batch_size,
      int input_size, bool time_major, const std::vector<int>& seq_length) {
276
    static double padding_fill = 0.0f;
G
GaoWei8 已提交
277 278 279 280 281 282 283 284
    cudnnRNNDataLayout_t layout;

    if (time_major) {
      layout = CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED;
    } else {
      layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
    }

285
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetRNNDataDescriptor(
G
GaoWei8 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299
        desc_, cudnn_type, layout, max_seq_length, batch_size, input_size,
        seq_length.data(), static_cast<void*>(&padding_fill)));

    return desc_;
  }

  template <typename T>
  inline cudnnRNNDataDescriptor_t descriptor(
      int max_length, int batch_size, int input_size, bool time_major,
      const std::vector<int>& seq_length) {
    return descriptor(CudnnDataType<T>::type, max_length, batch_size,
                      input_size, time_major, seq_length);
  }

300 301
  inline cudnnRNNDataDescriptor_t desc() { return desc_; }

G
GaoWei8 已提交
302 303 304 305
 private:
  cudnnRNNDataDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
};
G
GaoWei8 已提交
306
#endif
G
GaoWei8 已提交
307 308 309 310

class ScopedDropoutDescriptor {
 public:
  ScopedDropoutDescriptor() {
311
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_));
G
GaoWei8 已提交
312 313
  }
  ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
314
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_));
G
GaoWei8 已提交
315 316 317 318 319 320 321 322
  }

  inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
                                             const platform::Place& place,
                                             bool initialized,
                                             float dropout_prob_,
                                             framework::Tensor* dropout_state_,
                                             int seed, size_t state_size) {
G
Guo Sheng 已提交
323
    if (dropout_state_ == nullptr) {  // for no dropout or test
324
      PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor(
G
Guo Sheng 已提交
325 326 327 328
          desc_, handle, 0 /* dropout */, nullptr, 0 /* state_size */,
          0 /* seed */));
      return desc_;
    }
G
GaoWei8 已提交
329 330
    auto* dropout_state_data = dropout_state_->data<uint8_t>();
    if (!initialized) {
331
      PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor(
G
GaoWei8 已提交
332 333 334 335
          desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
    } else {
      auto dropout_state_dims = dropout_state_->dims();
      state_size = dropout_state_dims[0];
336
      PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor(
G
GaoWei8 已提交
337 338 339 340
          desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
    }
    return desc_;
  }
341
  inline cudnnDropoutDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
342 343 344 345 346 347 348 349 350

 private:
  cudnnDropoutDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedDropoutDescriptor);
};

class ScopedRNNDescriptor {
 public:
  ScopedRNNDescriptor() {
351
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_));
G
GaoWei8 已提交
352 353
  }
  ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
354
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
G
GaoWei8 已提交
355 356
  }

357
  inline cudnnRNNDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
358 359 360 361 362 363

 private:
  cudnnRNNDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNDescriptor);
};

D
dangqingqing 已提交
364 365 366
class ScopedFilterDescriptor {
 public:
  ScopedFilterDescriptor() {
367
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
D
dangqingqing 已提交
368
  }
Z
Zeng Jinle 已提交
369
  ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
370
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
D
dangqingqing 已提交
371 372 373 374
  }

  inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
375 376
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
C
chengduoZH 已提交
377
    // filter layout: MCHW(MCDHW), where M is the number of
武毅 已提交
378
    // output image channels, C is the number of input image channels,
C
chengduoZH 已提交
379 380
    // D is the depth of the filter, H is the height of the filter, and W is the
    // width of the filter.
武毅 已提交
381 382 383 384 385
    std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
    if (groups > 1) {
      kernel_with_group[0] /= groups;
      // NOTE: input filter(C) of the filter is already asserted to be C/groups.
    }
386
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetFilterNdDescriptor(
武毅 已提交
387 388
        desc_, type, format, kernel_with_group.size(),
        kernel_with_group.data()));
D
dangqingqing 已提交
389 390 391 392 393
    return desc_;
  }

  template <typename T>
  inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
394 395
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
D
dangqingqing 已提交
396
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
武毅 已提交
397
                      kernel, groups);
D
dangqingqing 已提交
398 399
  }

400 401
  inline cudnnFilterDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
402 403 404 405 406 407 408 409
 private:
  cudnnFilterDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};

class ScopedConvolutionDescriptor {
 public:
  ScopedConvolutionDescriptor() {
410
    PADDLE_ENFORCE_GPU_SUCCESS(
411
        dynload::cudnnCreateConvolutionDescriptor(&desc_));
D
dangqingqing 已提交
412
  }
Z
Zeng Jinle 已提交
413
  ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
414
    PADDLE_ENFORCE_GPU_SUCCESS(
415
        dynload::cudnnDestroyConvolutionDescriptor(desc_));
D
dangqingqing 已提交
416 417 418 419 420
  }

  inline cudnnConvolutionDescriptor_t descriptor(
      cudnnDataType_t type, const std::vector<int>& pads,
      const std::vector<int>& strides, const std::vector<int>& dilations) {
G
GaoWei8 已提交
421 422 423 424 425 426 427 428 429 430 431
    PADDLE_ENFORCE_EQ(pads.size(), strides.size(),
                      platform::errors::InvalidArgument(
                          "The size of pads and strides should be equal. But "
                          "received size of pads is %d, size of strides is %d.",
                          pads.size(), strides.size()));
    PADDLE_ENFORCE_EQ(
        pads.size(), dilations.size(),
        platform::errors::InvalidArgument(
            "The size of pads and dilations should be equal. But received size "
            "of pads is %d, size of dilations is %d.",
            pads.size(), dilations.size()));
432

K
Kexin Zhao 已提交
433 434
    cudnnDataType_t compute_type =
        (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
435
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetConvolutionNdDescriptor(
D
dangqingqing 已提交
436
        desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
K
Kexin Zhao 已提交
437
        CUDNN_CROSS_CORRELATION, compute_type));
438
    return desc_;
D
dangqingqing 已提交
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
  }

  template <typename T>
  inline cudnnConvolutionDescriptor_t descriptor(
      const std::vector<int>& pads, const std::vector<int>& strides,
      const std::vector<int>& dilations) {
    return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
  }

 private:
  cudnnConvolutionDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
};

class ScopedPoolingDescriptor {
 public:
  ScopedPoolingDescriptor() {
456
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
D
dangqingqing 已提交
457
  }
Z
Zeng Jinle 已提交
458
  ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
459
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
D
dangqingqing 已提交
460 461 462 463 464 465
  }

  inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
                                             const std::vector<int>& kernel,
                                             const std::vector<int>& pads,
                                             const std::vector<int>& strides) {
G
GaoWei8 已提交
466 467 468 469 470 471 472 473 474 475 476
    PADDLE_ENFORCE_EQ(kernel.size(), pads.size(),
                      platform::errors::InvalidArgument(
                          "The size of kernel and pads should be equal. But "
                          "received size of kernel is %d, size of pads is %d.",
                          kernel.size(), pads.size()));
    PADDLE_ENFORCE_EQ(
        kernel.size(), strides.size(),
        platform::errors::InvalidArgument(
            "The size of kernel and strides should be equal. But "
            "received size of kernel is %d, size of strides is %d.",
            kernel.size(), strides.size()));
477
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
D
dzhwinter 已提交
478
        desc_, (GetPoolingMode(mode)),
D
dangqingqing 已提交
479 480
        CUDNN_PROPAGATE_NAN,  // Always propagate nans.
        kernel.size(), kernel.data(), pads.data(), strides.data()));
481
    return desc_;
D
dangqingqing 已提交
482 483 484 485 486 487 488
  }

 private:
  cudnnPoolingDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
};

W
whs 已提交
489 490 491
class ScopedSpatialTransformerDescriptor {
 public:
  ScopedSpatialTransformerDescriptor() {
492
    PADDLE_ENFORCE_GPU_SUCCESS(
493
        dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
W
whs 已提交
494
  }
Z
Zeng Jinle 已提交
495
  ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
496
    PADDLE_ENFORCE_GPU_SUCCESS(
497
        dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
W
whs 已提交
498 499 500 501 502
  }

  template <typename T>
  inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
                                                        const int dimA[]) {
503
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
W
whs 已提交
504 505 506 507 508 509 510 511 512
        desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
    return desc_;
  }

 private:
  cudnnSpatialTransformerDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
};

Q
qingqing01 已提交
513 514 515
class ScopedActivationDescriptor {
 public:
  ScopedActivationDescriptor() {
516
    PADDLE_ENFORCE_GPU_SUCCESS(
517
        dynload::cudnnCreateActivationDescriptor(&desc_));
Q
qingqing01 已提交
518
  }
Z
Zeng Jinle 已提交
519
  ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
520
    PADDLE_ENFORCE_GPU_SUCCESS(
521
        dynload::cudnnDestroyActivationDescriptor(desc_));
Q
qingqing01 已提交
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
  }

  template <typename T>
  inline cudnnActivationDescriptor_t descriptor(
      const std::string& act, double value_max = static_cast<double>(0.)) {
    double relu_ceiling = 0.0;
    ActivationMode activation_mode = StringToActivationMode(act);
    cudnnActivationMode_t mode;
    switch (activation_mode) {
#if CUDNN_VERSION >= 7100
      case ActivationMode::kNone:
        mode = CUDNN_ACTIVATION_IDENTITY;
        break;
#endif
      case ActivationMode::kRelu6:
        relu_ceiling = 6.0;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kReluX:
        relu_ceiling = value_max;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kRelu:
        mode = CUDNN_ACTIVATION_RELU;
        break;
      case ActivationMode::kSigmoid:
        mode = CUDNN_ACTIVATION_SIGMOID;
        break;
      case ActivationMode::kTanh:
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
G
GaoWei8 已提交
554 555 556
        PADDLE_THROW(platform::errors::Unimplemented(
            "Unrecognized CUDNN activation mode: %d.",
            static_cast<int>(activation_mode)));
Q
qingqing01 已提交
557
    }
558
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor(
Q
qingqing01 已提交
559 560 561 562 563 564 565 566 567
        desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
    return desc_;
  }

 private:
  cudnnActivationDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedActivationDescriptor);
};

568 569 570 571 572
inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
  use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
  if (use_cudnn) {
573
    auto& dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
574 575 576 577 578 579
    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
  }
#endif
  return use_cudnn;
}

W
Wu Yi 已提交
580 581 582 583
#if CUDNN_VERSION >= 7001
class ScopedCTCLossDescriptor {
 public:
  ScopedCTCLossDescriptor() {
584
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
W
Wu Yi 已提交
585
  }
Z
Zeng Jinle 已提交
586
  ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
587
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
W
Wu Yi 已提交
588 589 590 591
  }

  template <typename T>
  inline cudnnCTCLossDescriptor_t descriptor() {
592
    PADDLE_ENFORCE_GPU_SUCCESS(
W
Wu Yi 已提交
593 594 595 596 597 598 599 600 601 602
        dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
    return desc_;
  }

 private:
  cudnnCTCLossDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
};
#endif

D
dangqingqing 已提交
603 604
}  // namespace platform
}  // namespace paddle