cudnn_helper.h 21.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Q
qingqing01 已提交
17
#include <string>
Y
Pass CI  
Yu Yang 已提交
18
#include <vector>
19 20

#include "paddle/fluid/framework/operator.h"
Y
Yi Wang 已提交
21 22
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
K
Kexin Zhao 已提交
23
#include "paddle/fluid/platform/float16.h"
Y
Yi Wang 已提交
24
#include "paddle/fluid/platform/macros.h"
D
dangqingqing 已提交
25

D
dzhwinter 已提交
26 27
DECLARE_bool(cudnn_deterministic);

D
dangqingqing 已提交
28 29 30
namespace paddle {
namespace platform {

Q
Qiao Longfei 已提交
31 32 33
#define CUDNN_VERSION_MIN(major, minor, patch) \
  (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))

D
"fix"  
dzhwinter 已提交
34 35 36 37
enum class DataLayout {  // Not use
  kNHWC,
  kNCHW,
  kNCDHW,
38
  kNDHWC,  // add, liyamei
D
"fix"  
dzhwinter 已提交
39 40 41 42 43 44
  kNCHW_VECT_C,
};

enum class PoolingMode {
  kMaximum,
  kMaximumDeterministic,
45 46
  kAverageExclusive,
  kAverageInclusive,
D
"fix"  
dzhwinter 已提交
47 48
};

49
enum class ActivationMode {
Q
qingqing01 已提交
50 51 52 53 54 55 56 57 58
  kNone,  // activation identity
  kSigmoid,
  kRelu,
  kRelu6,
  kReluX,
  kTanh,
  kBandPass,
};

D
dzhwinter 已提交
59 60 61 62
inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
  switch (mode) {
    case PoolingMode::kMaximumDeterministic:
      return CUDNN_POOLING_MAX_DETERMINISTIC;
63
    case PoolingMode::kAverageExclusive:
D
dzhwinter 已提交
64
      return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
65 66
    case PoolingMode::kAverageInclusive:
      return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
D
dzhwinter 已提交
67 68 69
    case PoolingMode::kMaximum:
      return CUDNN_POOLING_MAX;
    default:
G
GaoWei8 已提交
70 71
      PADDLE_THROW(
          platform::errors::Unimplemented("Unexpected CUDNN pooling mode."));
D
dzhwinter 已提交
72 73
  }
}
D
dzhwinter 已提交
74

Q
qingqing01 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
inline ActivationMode StringToActivationMode(const std::string& str) {
  if (str == "identity") {
    return ActivationMode::kNone;
  } else if (str == "sigmoid") {
    return ActivationMode::kSigmoid;
  } else if (str == "relu") {
    return ActivationMode::kRelu;
  } else if (str == "relu6") {
    return ActivationMode::kRelu6;
  } else if (str == "relux") {
    return ActivationMode::kReluX;
  } else if (str == "tanh") {
    return ActivationMode::kTanh;
  } else if (str == "bandpass") {
    return ActivationMode::kBandPass;
  } else {
G
GaoWei8 已提交
91 92
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unknown CUDNN activation string: %s.", str));
Q
qingqing01 已提交
93 94 95
  }
}

D
dangqingqing 已提交
96 97 98
template <typename T>
class CudnnDataType;

W
wuhuanzhou 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0)
template <>
class CudnnDataType<bfloat16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16;
  using ScalingParamType = const float;
  using BatchNormParamType = float;
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
};
#endif

K
Kexin Zhao 已提交
118 119 120 121
template <>
class CudnnDataType<float16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_HALF;
K
Kexin Zhao 已提交
122
  // The scaling param type is float for HALF and FLOAT tensors
K
update  
Kexin Zhao 已提交
123 124
  using ScalingParamType = const float;
  using BatchNormParamType = float;
K
Kexin Zhao 已提交
125
  static ScalingParamType* kOne() {
K
Kexin Zhao 已提交
126
    static ScalingParamType v = 1.0;
K
Kexin Zhao 已提交
127 128 129
    return &v;
  }
  static ScalingParamType* kZero() {
K
Kexin Zhao 已提交
130
    static ScalingParamType v = 0.0;
K
Kexin Zhao 已提交
131 132 133 134
    return &v;
  }
};

D
dangqingqing 已提交
135 136 137 138
template <>
class CudnnDataType<float> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
K
update  
Kexin Zhao 已提交
139 140
  using ScalingParamType = const float;
  using BatchNormParamType = float;
Q
Qiao Longfei 已提交
141 142 143 144 145 146 147 148
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
149 150 151 152 153 154
};

template <>
class CudnnDataType<double> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
K
update  
Kexin Zhao 已提交
155 156
  using ScalingParamType = const double;
  using BatchNormParamType = double;
Q
Qiao Longfei 已提交
157 158 159 160 161 162 163 164
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
165 166
};

C
chengduoZH 已提交
167 168
inline cudnnTensorFormat_t GetCudnnTensorFormat(
    const DataLayout& order) {  // Not use
D
dangqingqing 已提交
169 170 171 172 173
  switch (order) {
    case DataLayout::kNHWC:
      return CUDNN_TENSOR_NHWC;
    case DataLayout::kNCHW:
      return CUDNN_TENSOR_NCHW;
C
chengduoZH 已提交
174
    case DataLayout::kNCDHW:
武毅 已提交
175
      return CUDNN_TENSOR_NCHW;  // NOTE: cudnn treat NdTensor as the same
176 177
    case DataLayout::kNDHWC:
      return CUDNN_TENSOR_NHWC;  // add, liyamei
D
dangqingqing 已提交
178
    default:
G
GaoWei8 已提交
179 180
      PADDLE_THROW(platform::errors::Unimplemented(
          "CUDNN has no equivalent dataLayout for input order."));
D
dangqingqing 已提交
181 182 183 184 185 186 187
  }
  return CUDNN_TENSOR_NCHW;
}

class ScopedTensorDescriptor {
 public:
  ScopedTensorDescriptor() {
188
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
D
dangqingqing 已提交
189
  }
Z
Zeng Jinle 已提交
190
  ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
191
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
D
dangqingqing 已提交
192 193 194 195
  }

  inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
196 197 198
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    // the format is not used now, will add later
D
dangqingqing 已提交
199 200
    std::vector<int> strides(dims.size());
    strides[dims.size() - 1] = 1;
201 202
    for (int i = dims.size() - 2; i >= 0; i--) {
      strides[i] = dims[i + 1] * strides[i + 1];
D
dangqingqing 已提交
203
    }
武毅 已提交
204
    // Update tensor descriptor dims setting if groups > 1
205 206
    // NOTE: Here, Assume using NCHW or NCDHW order
    std::vector<int> dims_with_group(dims.begin(), dims.end());
武毅 已提交
207 208 209
    if (groups > 1) {
      dims_with_group[1] = dims_with_group[1] / groups;
    }
210 211 212

    if (dims.size() == 4) {
      if (format == CUDNN_TENSOR_NCHW) {
213 214 215 216 217 218
        PADDLE_ENFORCE_GPU_SUCCESS(
            dynload::cudnnSetTensorNdDescriptor(desc_,
                                                type,
                                                dims_with_group.size(),
                                                dims_with_group.data(),
                                                strides.data()));
219
      } else {  // CUDNN_TENSOR_NHWC
220
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
221 222 223 224
            desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
      }
    } else if (dims.size() == 5) {
      if (format == CUDNN_TENSOR_NCHW) {
225 226 227 228 229 230
        PADDLE_ENFORCE_GPU_SUCCESS(
            dynload::cudnnSetTensorNdDescriptor(desc_,
                                                type,
                                                dims_with_group.size(),
                                                dims_with_group.data(),
                                                strides.data()));
231
      } else {  // CUDNN_TENSOR_NHWC
232
        PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
233 234 235
            desc_, format, type, dims.size(), dims.data()));
      }
    }
D
dangqingqing 已提交
236 237 238 239 240
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
241 242
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
243 244
    return descriptor(
        GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims, groups);
D
dangqingqing 已提交
245 246
  }

G
GaoWei8 已提交
247 248 249
  inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
                                            const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
250
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
G
GaoWei8 已提交
251 252 253 254 255 256 257 258 259 260
        desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    return descriptor(CudnnDataType<T>::type, dim, stride);
  }

261 262
  inline cudnnTensorDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
263 264 265 266 267
 private:
  cudnnTensorDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
};

G
GaoWei8 已提交
268
#if CUDNN_VERSION >= 7201
G
GaoWei8 已提交
269 270 271
class ScopedRNNTensorDescriptor {
 public:
  ScopedRNNTensorDescriptor() {
272
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_));
G
GaoWei8 已提交
273 274 275
  }

  ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
276
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_));
G
GaoWei8 已提交
277 278 279
  }

  inline cudnnRNNDataDescriptor_t descriptor(
280 281 282 283 284 285
      const cudnnDataType_t cudnn_type,
      int max_seq_length,
      int batch_size,
      int input_size,
      bool time_major,
      const std::vector<int>& seq_length) {
286
    static double padding_fill = 0.0f;
G
GaoWei8 已提交
287 288 289 290 291 292 293 294
    cudnnRNNDataLayout_t layout;

    if (time_major) {
      layout = CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED;
    } else {
      layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
    }

295 296 297 298 299 300 301 302 303
    PADDLE_ENFORCE_GPU_SUCCESS(
        dynload::cudnnSetRNNDataDescriptor(desc_,
                                           cudnn_type,
                                           layout,
                                           max_seq_length,
                                           batch_size,
                                           input_size,
                                           seq_length.data(),
                                           static_cast<void*>(&padding_fill)));
G
GaoWei8 已提交
304 305 306 307 308 309

    return desc_;
  }

  template <typename T>
  inline cudnnRNNDataDescriptor_t descriptor(
310 311 312 313
      int max_length,
      int batch_size,
      int input_size,
      bool time_major,
G
GaoWei8 已提交
314
      const std::vector<int>& seq_length) {
315 316 317 318 319 320
    return descriptor(CudnnDataType<T>::type,
                      max_length,
                      batch_size,
                      input_size,
                      time_major,
                      seq_length);
G
GaoWei8 已提交
321 322
  }

323 324
  inline cudnnRNNDataDescriptor_t desc() { return desc_; }

G
GaoWei8 已提交
325 326 327 328
 private:
  cudnnRNNDataDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
};
G
GaoWei8 已提交
329
#endif
G
GaoWei8 已提交
330 331 332 333

class ScopedDropoutDescriptor {
 public:
  ScopedDropoutDescriptor() {
334
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_));
G
GaoWei8 已提交
335 336
  }
  ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
337
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_));
G
GaoWei8 已提交
338 339 340 341 342 343
  }

  inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
                                             const platform::Place& place,
                                             bool initialized,
                                             float dropout_prob_,
344
                                             phi::DenseTensor* dropout_state_,
345 346
                                             int seed,
                                             size_t state_size) {
G
Guo Sheng 已提交
347
    if (dropout_state_ == nullptr) {  // for no dropout or test
348 349 350 351 352 353 354
      PADDLE_ENFORCE_GPU_SUCCESS(
          dynload::cudnnSetDropoutDescriptor(desc_,
                                             handle,
                                             0 /* dropout */,
                                             nullptr,
                                             0 /* state_size */,
                                             0 /* seed */));
G
Guo Sheng 已提交
355 356
      return desc_;
    }
G
GaoWei8 已提交
357 358
    auto* dropout_state_data = dropout_state_->data<uint8_t>();
    if (!initialized) {
359
      PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor(
G
GaoWei8 已提交
360 361 362 363
          desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
    } else {
      auto dropout_state_dims = dropout_state_->dims();
      state_size = dropout_state_dims[0];
364
      PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor(
G
GaoWei8 已提交
365 366 367 368
          desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
    }
    return desc_;
  }
369
  inline cudnnDropoutDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
370 371 372 373 374 375 376 377 378

 private:
  cudnnDropoutDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedDropoutDescriptor);
};

class ScopedRNNDescriptor {
 public:
  ScopedRNNDescriptor() {
379
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_));
G
GaoWei8 已提交
380 381
  }
  ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
382
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
G
GaoWei8 已提交
383 384
  }

385
  inline cudnnRNNDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
386 387 388 389 390 391

 private:
  cudnnRNNDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNDescriptor);
};

D
dangqingqing 已提交
392 393 394
class ScopedFilterDescriptor {
 public:
  ScopedFilterDescriptor() {
395
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
D
dangqingqing 已提交
396
  }
Z
Zeng Jinle 已提交
397
  ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
398
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
D
dangqingqing 已提交
399 400 401 402
  }

  inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
403 404
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
C
chengduoZH 已提交
405
    // filter layout: MCHW(MCDHW), where M is the number of
武毅 已提交
406
    // output image channels, C is the number of input image channels,
C
chengduoZH 已提交
407 408
    // D is the depth of the filter, H is the height of the filter, and W is the
    // width of the filter.
武毅 已提交
409 410 411 412 413
    std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
    if (groups > 1) {
      kernel_with_group[0] /= groups;
      // NOTE: input filter(C) of the filter is already asserted to be C/groups.
    }
414 415 416 417 418 419
    PADDLE_ENFORCE_GPU_SUCCESS(
        dynload::cudnnSetFilterNdDescriptor(desc_,
                                            type,
                                            format,
                                            kernel_with_group.size(),
                                            kernel_with_group.data()));
D
dangqingqing 已提交
420 421 422 423 424
    return desc_;
  }

  template <typename T>
  inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
425 426
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
427 428
    return descriptor(
        GetCudnnTensorFormat(order), CudnnDataType<T>::type, kernel, groups);
D
dangqingqing 已提交
429 430
  }

431 432
  inline cudnnFilterDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
433 434 435 436 437 438 439 440
 private:
  cudnnFilterDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};

class ScopedConvolutionDescriptor {
 public:
  ScopedConvolutionDescriptor() {
441
    PADDLE_ENFORCE_GPU_SUCCESS(
442
        dynload::cudnnCreateConvolutionDescriptor(&desc_));
D
dangqingqing 已提交
443
  }
Z
Zeng Jinle 已提交
444
  ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
445
    PADDLE_ENFORCE_GPU_SUCCESS(
446
        dynload::cudnnDestroyConvolutionDescriptor(desc_));
D
dangqingqing 已提交
447 448 449
  }

  inline cudnnConvolutionDescriptor_t descriptor(
450 451 452 453 454 455
      cudnnDataType_t type,
      const std::vector<int>& pads,
      const std::vector<int>& strides,
      const std::vector<int>& dilations) {
    PADDLE_ENFORCE_EQ(pads.size(),
                      strides.size(),
G
GaoWei8 已提交
456 457 458
                      platform::errors::InvalidArgument(
                          "The size of pads and strides should be equal. But "
                          "received size of pads is %d, size of strides is %d.",
459 460
                          pads.size(),
                          strides.size()));
G
GaoWei8 已提交
461
    PADDLE_ENFORCE_EQ(
462 463
        pads.size(),
        dilations.size(),
G
GaoWei8 已提交
464 465 466
        platform::errors::InvalidArgument(
            "The size of pads and dilations should be equal. But received size "
            "of pads is %d, size of dilations is %d.",
467 468
            pads.size(),
            dilations.size()));
469

K
Kexin Zhao 已提交
470 471
    cudnnDataType_t compute_type =
        (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
472 473 474 475 476 477 478 479
    PADDLE_ENFORCE_GPU_SUCCESS(
        dynload::cudnnSetConvolutionNdDescriptor(desc_,
                                                 pads.size(),
                                                 pads.data(),
                                                 strides.data(),
                                                 dilations.data(),
                                                 CUDNN_CROSS_CORRELATION,
                                                 compute_type));
480
    return desc_;
D
dangqingqing 已提交
481 482 483 484
  }

  template <typename T>
  inline cudnnConvolutionDescriptor_t descriptor(
485 486
      const std::vector<int>& pads,
      const std::vector<int>& strides,
D
dangqingqing 已提交
487 488 489 490 491 492 493 494 495 496 497 498
      const std::vector<int>& dilations) {
    return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
  }

 private:
  cudnnConvolutionDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
};

class ScopedPoolingDescriptor {
 public:
  ScopedPoolingDescriptor() {
499
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
D
dangqingqing 已提交
500
  }
Z
Zeng Jinle 已提交
501
  ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
502
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
D
dangqingqing 已提交
503 504 505 506 507 508
  }

  inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
                                             const std::vector<int>& kernel,
                                             const std::vector<int>& pads,
                                             const std::vector<int>& strides) {
509 510
    PADDLE_ENFORCE_EQ(kernel.size(),
                      pads.size(),
G
GaoWei8 已提交
511 512 513
                      platform::errors::InvalidArgument(
                          "The size of kernel and pads should be equal. But "
                          "received size of kernel is %d, size of pads is %d.",
514 515
                          kernel.size(),
                          pads.size()));
G
GaoWei8 已提交
516
    PADDLE_ENFORCE_EQ(
517 518
        kernel.size(),
        strides.size(),
G
GaoWei8 已提交
519 520 521
        platform::errors::InvalidArgument(
            "The size of kernel and strides should be equal. But "
            "received size of kernel is %d, size of strides is %d.",
522 523
            kernel.size(),
            strides.size()));
524
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
525 526
        desc_,
        (GetPoolingMode(mode)),
D
dangqingqing 已提交
527
        CUDNN_PROPAGATE_NAN,  // Always propagate nans.
528 529 530 531
        kernel.size(),
        kernel.data(),
        pads.data(),
        strides.data()));
532
    return desc_;
D
dangqingqing 已提交
533 534 535 536 537 538 539
  }

 private:
  cudnnPoolingDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
};

W
whs 已提交
540 541 542
class ScopedSpatialTransformerDescriptor {
 public:
  ScopedSpatialTransformerDescriptor() {
543
    PADDLE_ENFORCE_GPU_SUCCESS(
544
        dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
W
whs 已提交
545
  }
Z
Zeng Jinle 已提交
546
  ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
547
    PADDLE_ENFORCE_GPU_SUCCESS(
548
        dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
W
whs 已提交
549 550 551 552 553
  }

  template <typename T>
  inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
                                                        const int dimA[]) {
554
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
W
whs 已提交
555 556 557 558 559 560 561 562 563
        desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
    return desc_;
  }

 private:
  cudnnSpatialTransformerDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
};

Q
qingqing01 已提交
564 565 566
class ScopedActivationDescriptor {
 public:
  ScopedActivationDescriptor() {
567
    PADDLE_ENFORCE_GPU_SUCCESS(
568
        dynload::cudnnCreateActivationDescriptor(&desc_));
Q
qingqing01 已提交
569
  }
Z
Zeng Jinle 已提交
570
  ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
571
    PADDLE_ENFORCE_GPU_SUCCESS(
572
        dynload::cudnnDestroyActivationDescriptor(desc_));
Q
qingqing01 已提交
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
  }

  template <typename T>
  inline cudnnActivationDescriptor_t descriptor(
      const std::string& act, double value_max = static_cast<double>(0.)) {
    double relu_ceiling = 0.0;
    ActivationMode activation_mode = StringToActivationMode(act);
    cudnnActivationMode_t mode;
    switch (activation_mode) {
#if CUDNN_VERSION >= 7100
      case ActivationMode::kNone:
        mode = CUDNN_ACTIVATION_IDENTITY;
        break;
#endif
      case ActivationMode::kRelu6:
        relu_ceiling = 6.0;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kReluX:
        relu_ceiling = value_max;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kRelu:
        mode = CUDNN_ACTIVATION_RELU;
        break;
      case ActivationMode::kSigmoid:
        mode = CUDNN_ACTIVATION_SIGMOID;
        break;
      case ActivationMode::kTanh:
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
G
GaoWei8 已提交
605 606 607
        PADDLE_THROW(platform::errors::Unimplemented(
            "Unrecognized CUDNN activation mode: %d.",
            static_cast<int>(activation_mode)));
Q
qingqing01 已提交
608
    }
609
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor(
Q
qingqing01 已提交
610 611 612 613 614 615 616 617 618
        desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
    return desc_;
  }

 private:
  cudnnActivationDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedActivationDescriptor);
};

619
inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
H
HongyuJia 已提交
620
  bool use_cudnn = ctx.HasAttr("use_cudnn") && ctx.Attr<bool>("use_cudnn");
621 622 623
  use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
  if (use_cudnn) {
L
Leo Chen 已提交
624
    auto& dev_ctx = ctx.device_context<phi::GPUContext>();
625 626 627 628 629 630
    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
  }
#endif
  return use_cudnn;
}

W
Wu Yi 已提交
631 632 633 634
#if CUDNN_VERSION >= 7001
class ScopedCTCLossDescriptor {
 public:
  ScopedCTCLossDescriptor() {
635
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
W
Wu Yi 已提交
636
  }
Z
Zeng Jinle 已提交
637
  ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
638
    PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
W
Wu Yi 已提交
639 640 641 642
  }

  template <typename T>
  inline cudnnCTCLossDescriptor_t descriptor() {
643
    PADDLE_ENFORCE_GPU_SUCCESS(
W
Wu Yi 已提交
644 645 646 647 648 649 650 651 652 653
        dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
    return desc_;
  }

 private:
  cudnnCTCLossDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
};
#endif

D
dangqingqing 已提交
654 655
}  // namespace platform
}  // namespace paddle