cudnn_helper.h 19.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Q
qingqing01 已提交
17
#include <string>
Y
Pass CI  
Yu Yang 已提交
18
#include <vector>
19 20

#include "paddle/fluid/framework/operator.h"
Y
Yi Wang 已提交
21 22
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
K
Kexin Zhao 已提交
23
#include "paddle/fluid/platform/float16.h"
Y
Yi Wang 已提交
24
#include "paddle/fluid/platform/macros.h"
D
dangqingqing 已提交
25

W
wanghuancoder 已提交
26 27 28 29 30 31
namespace paddle {
namespace platform {
struct float16;
}  // namespace platform
}  // namespace paddle

D
dzhwinter 已提交
32 33
DECLARE_bool(cudnn_deterministic);

D
dangqingqing 已提交
34 35 36
namespace paddle {
namespace platform {

Q
Qiao Longfei 已提交
37 38 39
#define CUDNN_VERSION_MIN(major, minor, patch) \
  (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))

D
"fix"  
dzhwinter 已提交
40 41 42 43
enum class DataLayout {  // Not use
  kNHWC,
  kNCHW,
  kNCDHW,
44
  kNDHWC,  // add, liyamei
D
"fix"  
dzhwinter 已提交
45 46 47 48 49 50
  kNCHW_VECT_C,
};

enum class PoolingMode {
  kMaximum,
  kMaximumDeterministic,
51 52
  kAverageExclusive,
  kAverageInclusive,
D
"fix"  
dzhwinter 已提交
53 54
};

55
enum class ActivationMode {
Q
qingqing01 已提交
56 57 58 59 60 61 62 63 64
  kNone,  // activation identity
  kSigmoid,
  kRelu,
  kRelu6,
  kReluX,
  kTanh,
  kBandPass,
};

D
dzhwinter 已提交
65 66 67 68
inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
  switch (mode) {
    case PoolingMode::kMaximumDeterministic:
      return CUDNN_POOLING_MAX_DETERMINISTIC;
69
    case PoolingMode::kAverageExclusive:
D
dzhwinter 已提交
70
      return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
71 72
    case PoolingMode::kAverageInclusive:
      return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
D
dzhwinter 已提交
73 74 75
    case PoolingMode::kMaximum:
      return CUDNN_POOLING_MAX;
    default:
G
GaoWei8 已提交
76 77
      PADDLE_THROW(
          platform::errors::Unimplemented("Unexpected CUDNN pooling mode."));
D
dzhwinter 已提交
78 79
  }
}
D
dzhwinter 已提交
80

Q
qingqing01 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
inline ActivationMode StringToActivationMode(const std::string& str) {
  if (str == "identity") {
    return ActivationMode::kNone;
  } else if (str == "sigmoid") {
    return ActivationMode::kSigmoid;
  } else if (str == "relu") {
    return ActivationMode::kRelu;
  } else if (str == "relu6") {
    return ActivationMode::kRelu6;
  } else if (str == "relux") {
    return ActivationMode::kReluX;
  } else if (str == "tanh") {
    return ActivationMode::kTanh;
  } else if (str == "bandpass") {
    return ActivationMode::kBandPass;
  } else {
G
GaoWei8 已提交
97 98
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unknown CUDNN activation string: %s.", str));
Q
qingqing01 已提交
99 100 101
  }
}

D
dangqingqing 已提交
102 103 104
template <typename T>
class CudnnDataType;

W
wuhuanzhou 已提交
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0)
template <>
class CudnnDataType<bfloat16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16;
  using ScalingParamType = const float;
  using BatchNormParamType = float;
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
};
#endif

K
Kexin Zhao 已提交
124 125 126 127
template <>
class CudnnDataType<float16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_HALF;
K
Kexin Zhao 已提交
128
  // The scaling param type is float for HALF and FLOAT tensors
K
update  
Kexin Zhao 已提交
129 130
  using ScalingParamType = const float;
  using BatchNormParamType = float;
K
Kexin Zhao 已提交
131
  static ScalingParamType* kOne() {
K
Kexin Zhao 已提交
132
    static ScalingParamType v = 1.0;
K
Kexin Zhao 已提交
133 134 135
    return &v;
  }
  static ScalingParamType* kZero() {
K
Kexin Zhao 已提交
136
    static ScalingParamType v = 0.0;
K
Kexin Zhao 已提交
137 138 139 140
    return &v;
  }
};

D
dangqingqing 已提交
141 142 143 144
template <>
class CudnnDataType<float> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
K
update  
Kexin Zhao 已提交
145 146
  using ScalingParamType = const float;
  using BatchNormParamType = float;
Q
Qiao Longfei 已提交
147 148 149 150 151 152 153 154
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
155 156 157 158 159 160
};

template <>
class CudnnDataType<double> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
K
update  
Kexin Zhao 已提交
161 162
  using ScalingParamType = const double;
  using BatchNormParamType = double;
Q
Qiao Longfei 已提交
163 164 165 166 167 168 169 170
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
171 172
};

C
chengduoZH 已提交
173 174
inline cudnnTensorFormat_t GetCudnnTensorFormat(
    const DataLayout& order) {  // Not use
D
dangqingqing 已提交
175 176 177 178 179
  switch (order) {
    case DataLayout::kNHWC:
      return CUDNN_TENSOR_NHWC;
    case DataLayout::kNCHW:
      return CUDNN_TENSOR_NCHW;
C
chengduoZH 已提交
180
    case DataLayout::kNCDHW:
武毅 已提交
181
      return CUDNN_TENSOR_NCHW;  // NOTE: cudnn treat NdTensor as the same
182 183
    case DataLayout::kNDHWC:
      return CUDNN_TENSOR_NHWC;  // add, liyamei
D
dangqingqing 已提交
184
    default:
G
GaoWei8 已提交
185 186
      PADDLE_THROW(platform::errors::Unimplemented(
          "CUDNN has no equivalent dataLayout for input order."));
D
dangqingqing 已提交
187 188 189 190 191 192 193
  }
  return CUDNN_TENSOR_NCHW;
}

class ScopedTensorDescriptor {
 public:
  ScopedTensorDescriptor() {
194
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
D
dangqingqing 已提交
195
  }
Z
Zeng Jinle 已提交
196
  ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
197
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
D
dangqingqing 已提交
198 199 200 201
  }

  inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
202 203 204
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    // the format is not used now, will add later
D
dangqingqing 已提交
205 206
    std::vector<int> strides(dims.size());
    strides[dims.size() - 1] = 1;
207 208
    for (int i = dims.size() - 2; i >= 0; i--) {
      strides[i] = dims[i + 1] * strides[i + 1];
D
dangqingqing 已提交
209
    }
武毅 已提交
210
    // Update tensor descriptor dims setting if groups > 1
211 212
    // NOTE: Here, Assume using NCHW or NCDHW order
    std::vector<int> dims_with_group(dims.begin(), dims.end());
武毅 已提交
213 214 215
    if (groups > 1) {
      dims_with_group[1] = dims_with_group[1] / groups;
    }
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

    if (dims.size() == 4) {
      if (format == CUDNN_TENSOR_NCHW) {
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
            desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
      }
    } else if (dims.size() == 5) {
      if (format == CUDNN_TENSOR_NCHW) {
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
            desc_, format, type, dims.size(), dims.data()));
      }
    }
D
dangqingqing 已提交
236 237 238 239 240
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
241 242 243 244
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims,
                      groups);
D
dangqingqing 已提交
245 246
  }

G
GaoWei8 已提交
247 248 249 250 251 252 253 254 255 256 257 258 259 260
  inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
                                            const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
        desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    return descriptor(CudnnDataType<T>::type, dim, stride);
  }

261 262
  inline cudnnTensorDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
263 264 265 266 267
 private:
  cudnnTensorDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
};

G
GaoWei8 已提交
268
#if CUDNN_VERSION >= 7201
G
GaoWei8 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281
class ScopedRNNTensorDescriptor {
 public:
  ScopedRNNTensorDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_));
  }

  ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_));
  }

  inline cudnnRNNDataDescriptor_t descriptor(
      const cudnnDataType_t cudnn_type, int max_seq_length, int batch_size,
      int input_size, bool time_major, const std::vector<int>& seq_length) {
282
    static double padding_fill = 0.0f;
G
GaoWei8 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
    cudnnRNNDataLayout_t layout;

    if (time_major) {
      layout = CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED;
    } else {
      layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
    }

    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetRNNDataDescriptor(
        desc_, cudnn_type, layout, max_seq_length, batch_size, input_size,
        seq_length.data(), static_cast<void*>(&padding_fill)));

    return desc_;
  }

  template <typename T>
  inline cudnnRNNDataDescriptor_t descriptor(
      int max_length, int batch_size, int input_size, bool time_major,
      const std::vector<int>& seq_length) {
    return descriptor(CudnnDataType<T>::type, max_length, batch_size,
                      input_size, time_major, seq_length);
  }

306 307
  inline cudnnRNNDataDescriptor_t desc() { return desc_; }

G
GaoWei8 已提交
308 309 310 311
 private:
  cudnnRNNDataDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
};
G
GaoWei8 已提交
312
#endif
G
GaoWei8 已提交
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328

class ScopedDropoutDescriptor {
 public:
  ScopedDropoutDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_));
  }
  ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_));
  }

  inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
                                             const platform::Place& place,
                                             bool initialized,
                                             float dropout_prob_,
                                             framework::Tensor* dropout_state_,
                                             int seed, size_t state_size) {
G
Guo Sheng 已提交
329 330 331 332 333 334
    if (dropout_state_ == nullptr) {  // for no dropout or test
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetDropoutDescriptor(
          desc_, handle, 0 /* dropout */, nullptr, 0 /* state_size */,
          0 /* seed */));
      return desc_;
    }
G
GaoWei8 已提交
335 336 337 338 339 340 341 342 343 344 345 346
    auto* dropout_state_data = dropout_state_->data<uint8_t>();
    if (!initialized) {
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetDropoutDescriptor(
          desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
    } else {
      auto dropout_state_dims = dropout_state_->dims();
      state_size = dropout_state_dims[0];
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnRestoreDropoutDescriptor(
          desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
    }
    return desc_;
  }
347
  inline cudnnDropoutDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362

 private:
  cudnnDropoutDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedDropoutDescriptor);
};

class ScopedRNNDescriptor {
 public:
  ScopedRNNDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_));
  }
  ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
  }

363
  inline cudnnRNNDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
364 365 366 367 368 369

 private:
  cudnnRNNDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNDescriptor);
};

D
dangqingqing 已提交
370 371 372
class ScopedFilterDescriptor {
 public:
  ScopedFilterDescriptor() {
373
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
D
dangqingqing 已提交
374
  }
Z
Zeng Jinle 已提交
375
  ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
376
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
D
dangqingqing 已提交
377 378 379 380
  }

  inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
381 382
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
C
chengduoZH 已提交
383
    // filter layout: MCHW(MCDHW), where M is the number of
武毅 已提交
384
    // output image channels, C is the number of input image channels,
C
chengduoZH 已提交
385 386
    // D is the depth of the filter, H is the height of the filter, and W is the
    // width of the filter.
武毅 已提交
387 388 389 390 391
    std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
    if (groups > 1) {
      kernel_with_group[0] /= groups;
      // NOTE: input filter(C) of the filter is already asserted to be C/groups.
    }
392
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetFilterNdDescriptor(
武毅 已提交
393 394
        desc_, type, format, kernel_with_group.size(),
        kernel_with_group.data()));
D
dangqingqing 已提交
395 396 397 398 399
    return desc_;
  }

  template <typename T>
  inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
400 401
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
D
dangqingqing 已提交
402
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
武毅 已提交
403
                      kernel, groups);
D
dangqingqing 已提交
404 405
  }

406 407
  inline cudnnFilterDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
408 409 410 411 412 413 414 415
 private:
  cudnnFilterDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};

class ScopedConvolutionDescriptor {
 public:
  ScopedConvolutionDescriptor() {
416 417
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateConvolutionDescriptor(&desc_));
D
dangqingqing 已提交
418
  }
Z
Zeng Jinle 已提交
419
  ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
420 421
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroyConvolutionDescriptor(desc_));
D
dangqingqing 已提交
422 423 424 425 426
  }

  inline cudnnConvolutionDescriptor_t descriptor(
      cudnnDataType_t type, const std::vector<int>& pads,
      const std::vector<int>& strides, const std::vector<int>& dilations) {
G
GaoWei8 已提交
427 428 429 430 431 432 433 434 435 436 437
    PADDLE_ENFORCE_EQ(pads.size(), strides.size(),
                      platform::errors::InvalidArgument(
                          "The size of pads and strides should be equal. But "
                          "received size of pads is %d, size of strides is %d.",
                          pads.size(), strides.size()));
    PADDLE_ENFORCE_EQ(
        pads.size(), dilations.size(),
        platform::errors::InvalidArgument(
            "The size of pads and dilations should be equal. But received size "
            "of pads is %d, size of dilations is %d.",
            pads.size(), dilations.size()));
438

K
Kexin Zhao 已提交
439 440
    cudnnDataType_t compute_type =
        (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
441
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetConvolutionNdDescriptor(
D
dangqingqing 已提交
442
        desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
K
Kexin Zhao 已提交
443
        CUDNN_CROSS_CORRELATION, compute_type));
444
    return desc_;
D
dangqingqing 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
  }

  template <typename T>
  inline cudnnConvolutionDescriptor_t descriptor(
      const std::vector<int>& pads, const std::vector<int>& strides,
      const std::vector<int>& dilations) {
    return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
  }

 private:
  cudnnConvolutionDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
};

class ScopedPoolingDescriptor {
 public:
  ScopedPoolingDescriptor() {
462
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
D
dangqingqing 已提交
463
  }
Z
Zeng Jinle 已提交
464
  ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
465
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
D
dangqingqing 已提交
466 467 468 469 470 471
  }

  inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
                                             const std::vector<int>& kernel,
                                             const std::vector<int>& pads,
                                             const std::vector<int>& strides) {
G
GaoWei8 已提交
472 473 474 475 476 477 478 479 480 481 482
    PADDLE_ENFORCE_EQ(kernel.size(), pads.size(),
                      platform::errors::InvalidArgument(
                          "The size of kernel and pads should be equal. But "
                          "received size of kernel is %d, size of pads is %d.",
                          kernel.size(), pads.size()));
    PADDLE_ENFORCE_EQ(
        kernel.size(), strides.size(),
        platform::errors::InvalidArgument(
            "The size of kernel and strides should be equal. But "
            "received size of kernel is %d, size of strides is %d.",
            kernel.size(), strides.size()));
483
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
D
dzhwinter 已提交
484
        desc_, (GetPoolingMode(mode)),
D
dangqingqing 已提交
485 486
        CUDNN_PROPAGATE_NAN,  // Always propagate nans.
        kernel.size(), kernel.data(), pads.data(), strides.data()));
487
    return desc_;
D
dangqingqing 已提交
488 489 490 491 492 493 494
  }

 private:
  cudnnPoolingDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
};

W
whs 已提交
495 496 497
class ScopedSpatialTransformerDescriptor {
 public:
  ScopedSpatialTransformerDescriptor() {
498 499
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
W
whs 已提交
500
  }
Z
Zeng Jinle 已提交
501
  ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
502 503
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
W
whs 已提交
504 505 506 507 508
  }

  template <typename T>
  inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
                                                        const int dimA[]) {
509
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
W
whs 已提交
510 511 512 513 514 515 516 517 518
        desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
    return desc_;
  }

 private:
  cudnnSpatialTransformerDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
};

Q
qingqing01 已提交
519 520 521
class ScopedActivationDescriptor {
 public:
  ScopedActivationDescriptor() {
522 523
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateActivationDescriptor(&desc_));
Q
qingqing01 已提交
524
  }
Z
Zeng Jinle 已提交
525
  ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
526 527
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroyActivationDescriptor(desc_));
Q
qingqing01 已提交
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
  }

  template <typename T>
  inline cudnnActivationDescriptor_t descriptor(
      const std::string& act, double value_max = static_cast<double>(0.)) {
    double relu_ceiling = 0.0;
    ActivationMode activation_mode = StringToActivationMode(act);
    cudnnActivationMode_t mode;
    switch (activation_mode) {
#if CUDNN_VERSION >= 7100
      case ActivationMode::kNone:
        mode = CUDNN_ACTIVATION_IDENTITY;
        break;
#endif
      case ActivationMode::kRelu6:
        relu_ceiling = 6.0;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kReluX:
        relu_ceiling = value_max;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kRelu:
        mode = CUDNN_ACTIVATION_RELU;
        break;
      case ActivationMode::kSigmoid:
        mode = CUDNN_ACTIVATION_SIGMOID;
        break;
      case ActivationMode::kTanh:
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
G
GaoWei8 已提交
560 561 562
        PADDLE_THROW(platform::errors::Unimplemented(
            "Unrecognized CUDNN activation mode: %d.",
            static_cast<int>(activation_mode)));
Q
qingqing01 已提交
563
    }
564
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetActivationDescriptor(
Q
qingqing01 已提交
565 566 567 568 569 570 571 572 573
        desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
    return desc_;
  }

 private:
  cudnnActivationDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedActivationDescriptor);
};

574 575 576 577 578
inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
  use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
  if (use_cudnn) {
579
    auto& dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
580 581 582 583 584 585
    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
  }
#endif
  return use_cudnn;
}

W
Wu Yi 已提交
586 587 588 589
#if CUDNN_VERSION >= 7001
class ScopedCTCLossDescriptor {
 public:
  ScopedCTCLossDescriptor() {
590
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
W
Wu Yi 已提交
591
  }
Z
Zeng Jinle 已提交
592
  ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
593
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
W
Wu Yi 已提交
594 595 596 597
  }

  template <typename T>
  inline cudnnCTCLossDescriptor_t descriptor() {
598
    PADDLE_ENFORCE_CUDA_SUCCESS(
W
Wu Yi 已提交
599 600 601 602 603 604 605 606 607 608
        dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
    return desc_;
  }

 private:
  cudnnCTCLossDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
};
#endif

D
dangqingqing 已提交
609 610
}  // namespace platform
}  // namespace paddle