cudnn_helper.h 19.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Q
qingqing01 已提交
17
#include <string>
Y
Pass CI  
Yu Yang 已提交
18
#include <vector>
19 20

#include "paddle/fluid/framework/operator.h"
Y
Yi Wang 已提交
21 22
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
K
Kexin Zhao 已提交
23
#include "paddle/fluid/platform/float16.h"
Y
Yi Wang 已提交
24
#include "paddle/fluid/platform/macros.h"
D
dangqingqing 已提交
25

W
wanghuancoder 已提交
26 27 28 29 30 31
namespace paddle {
namespace platform {
struct float16;
}  // namespace platform
}  // namespace paddle

D
dzhwinter 已提交
32 33
DECLARE_bool(cudnn_deterministic);

D
dangqingqing 已提交
34 35 36
namespace paddle {
namespace platform {

Q
Qiao Longfei 已提交
37 38 39
#define CUDNN_VERSION_MIN(major, minor, patch) \
  (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))

D
"fix"  
dzhwinter 已提交
40 41 42 43
enum class DataLayout {  // Not use
  kNHWC,
  kNCHW,
  kNCDHW,
44
  kNDHWC,  // add, liyamei
D
"fix"  
dzhwinter 已提交
45 46 47 48 49 50
  kNCHW_VECT_C,
};

enum class PoolingMode {
  kMaximum,
  kMaximumDeterministic,
51 52
  kAverageExclusive,
  kAverageInclusive,
D
"fix"  
dzhwinter 已提交
53 54
};

55
enum class ActivationMode {
Q
qingqing01 已提交
56 57 58 59 60 61 62 63 64
  kNone,  // activation identity
  kSigmoid,
  kRelu,
  kRelu6,
  kReluX,
  kTanh,
  kBandPass,
};

D
dzhwinter 已提交
65 66 67 68
inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
  switch (mode) {
    case PoolingMode::kMaximumDeterministic:
      return CUDNN_POOLING_MAX_DETERMINISTIC;
69
    case PoolingMode::kAverageExclusive:
D
dzhwinter 已提交
70
      return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
71 72
    case PoolingMode::kAverageInclusive:
      return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
D
dzhwinter 已提交
73 74 75
    case PoolingMode::kMaximum:
      return CUDNN_POOLING_MAX;
    default:
G
GaoWei8 已提交
76 77
      PADDLE_THROW(
          platform::errors::Unimplemented("Unexpected CUDNN pooling mode."));
D
dzhwinter 已提交
78 79
  }
}
D
dzhwinter 已提交
80

Q
qingqing01 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
inline ActivationMode StringToActivationMode(const std::string& str) {
  if (str == "identity") {
    return ActivationMode::kNone;
  } else if (str == "sigmoid") {
    return ActivationMode::kSigmoid;
  } else if (str == "relu") {
    return ActivationMode::kRelu;
  } else if (str == "relu6") {
    return ActivationMode::kRelu6;
  } else if (str == "relux") {
    return ActivationMode::kReluX;
  } else if (str == "tanh") {
    return ActivationMode::kTanh;
  } else if (str == "bandpass") {
    return ActivationMode::kBandPass;
  } else {
G
GaoWei8 已提交
97 98
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unknown CUDNN activation string: %s.", str));
Q
qingqing01 已提交
99 100 101
  }
}

D
dangqingqing 已提交
102 103 104
template <typename T>
class CudnnDataType;

K
Kexin Zhao 已提交
105 106 107 108
template <>
class CudnnDataType<float16> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_HALF;
K
Kexin Zhao 已提交
109
  // The scaling param type is float for HALF and FLOAT tensors
K
update  
Kexin Zhao 已提交
110 111
  using ScalingParamType = const float;
  using BatchNormParamType = float;
K
Kexin Zhao 已提交
112
  static ScalingParamType* kOne() {
K
Kexin Zhao 已提交
113
    static ScalingParamType v = 1.0;
K
Kexin Zhao 已提交
114 115 116
    return &v;
  }
  static ScalingParamType* kZero() {
K
Kexin Zhao 已提交
117
    static ScalingParamType v = 0.0;
K
Kexin Zhao 已提交
118 119 120 121
    return &v;
  }
};

D
dangqingqing 已提交
122 123 124 125
template <>
class CudnnDataType<float> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
K
update  
Kexin Zhao 已提交
126 127
  using ScalingParamType = const float;
  using BatchNormParamType = float;
Q
Qiao Longfei 已提交
128 129 130 131 132 133 134 135
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
136 137 138 139 140 141
};

template <>
class CudnnDataType<double> {
 public:
  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
K
update  
Kexin Zhao 已提交
142 143
  using ScalingParamType = const double;
  using BatchNormParamType = double;
Q
Qiao Longfei 已提交
144 145 146 147 148 149 150 151
  static ScalingParamType* kOne() {
    static ScalingParamType v = 1.0;
    return &v;
  }
  static ScalingParamType* kZero() {
    static ScalingParamType v = 0.0;
    return &v;
  }
D
dangqingqing 已提交
152 153
};

C
chengduoZH 已提交
154 155
inline cudnnTensorFormat_t GetCudnnTensorFormat(
    const DataLayout& order) {  // Not use
D
dangqingqing 已提交
156 157 158 159 160
  switch (order) {
    case DataLayout::kNHWC:
      return CUDNN_TENSOR_NHWC;
    case DataLayout::kNCHW:
      return CUDNN_TENSOR_NCHW;
C
chengduoZH 已提交
161
    case DataLayout::kNCDHW:
武毅 已提交
162
      return CUDNN_TENSOR_NCHW;  // NOTE: cudnn treat NdTensor as the same
163 164
    case DataLayout::kNDHWC:
      return CUDNN_TENSOR_NHWC;  // add, liyamei
D
dangqingqing 已提交
165
    default:
G
GaoWei8 已提交
166 167
      PADDLE_THROW(platform::errors::Unimplemented(
          "CUDNN has no equivalent dataLayout for input order."));
D
dangqingqing 已提交
168 169 170 171 172 173 174
  }
  return CUDNN_TENSOR_NCHW;
}

class ScopedTensorDescriptor {
 public:
  ScopedTensorDescriptor() {
175
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
D
dangqingqing 已提交
176
  }
Z
Zeng Jinle 已提交
177
  ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
178
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
D
dangqingqing 已提交
179 180 181 182
  }

  inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
183 184 185
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    // the format is not used now, will add later
D
dangqingqing 已提交
186 187
    std::vector<int> strides(dims.size());
    strides[dims.size() - 1] = 1;
188 189
    for (int i = dims.size() - 2; i >= 0; i--) {
      strides[i] = dims[i + 1] * strides[i + 1];
D
dangqingqing 已提交
190
    }
武毅 已提交
191
    // Update tensor descriptor dims setting if groups > 1
192 193
    // NOTE: Here, Assume using NCHW or NCDHW order
    std::vector<int> dims_with_group(dims.begin(), dims.end());
武毅 已提交
194 195 196
    if (groups > 1) {
      dims_with_group[1] = dims_with_group[1] / groups;
    }
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

    if (dims.size() == 4) {
      if (format == CUDNN_TENSOR_NCHW) {
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
            desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
      }
    } else if (dims.size() == 5) {
      if (format == CUDNN_TENSOR_NCHW) {
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
            desc_, type, dims_with_group.size(), dims_with_group.data(),
            strides.data()));
      } else {  // CUDNN_TENSOR_NHWC
        PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
            desc_, format, type, dims.size(), dims.data()));
      }
    }
D
dangqingqing 已提交
217 218 219 220 221
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
222 223 224 225
                                            const std::vector<int>& dims,
                                            const int groups = 1) {
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims,
                      groups);
D
dangqingqing 已提交
226 227
  }

G
GaoWei8 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241
  inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
                                            const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
        desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
    return desc_;
  }

  template <typename T>
  inline cudnnTensorDescriptor_t descriptor(const std::vector<int>& dim,
                                            const std::vector<int>& stride) {
    return descriptor(CudnnDataType<T>::type, dim, stride);
  }

242 243
  inline cudnnTensorDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
244 245 246 247 248
 private:
  cudnnTensorDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
};

G
GaoWei8 已提交
249
#if CUDNN_VERSION >= 7201
G
GaoWei8 已提交
250 251 252 253 254 255 256 257 258 259 260 261 262
class ScopedRNNTensorDescriptor {
 public:
  ScopedRNNTensorDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_));
  }

  ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_));
  }

  inline cudnnRNNDataDescriptor_t descriptor(
      const cudnnDataType_t cudnn_type, int max_seq_length, int batch_size,
      int input_size, bool time_major, const std::vector<int>& seq_length) {
263
    static double padding_fill = 0.0f;
G
GaoWei8 已提交
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
    cudnnRNNDataLayout_t layout;

    if (time_major) {
      layout = CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED;
    } else {
      layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
    }

    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetRNNDataDescriptor(
        desc_, cudnn_type, layout, max_seq_length, batch_size, input_size,
        seq_length.data(), static_cast<void*>(&padding_fill)));

    return desc_;
  }

  template <typename T>
  inline cudnnRNNDataDescriptor_t descriptor(
      int max_length, int batch_size, int input_size, bool time_major,
      const std::vector<int>& seq_length) {
    return descriptor(CudnnDataType<T>::type, max_length, batch_size,
                      input_size, time_major, seq_length);
  }

287 288
  inline cudnnRNNDataDescriptor_t desc() { return desc_; }

G
GaoWei8 已提交
289 290 291 292
 private:
  cudnnRNNDataDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor);
};
G
GaoWei8 已提交
293
#endif
G
GaoWei8 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309

class ScopedDropoutDescriptor {
 public:
  ScopedDropoutDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_));
  }
  ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_));
  }

  inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
                                             const platform::Place& place,
                                             bool initialized,
                                             float dropout_prob_,
                                             framework::Tensor* dropout_state_,
                                             int seed, size_t state_size) {
G
Guo Sheng 已提交
310 311 312 313 314 315
    if (dropout_state_ == nullptr) {  // for no dropout or test
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetDropoutDescriptor(
          desc_, handle, 0 /* dropout */, nullptr, 0 /* state_size */,
          0 /* seed */));
      return desc_;
    }
G
GaoWei8 已提交
316 317 318 319 320 321 322 323 324 325 326 327
    auto* dropout_state_data = dropout_state_->data<uint8_t>();
    if (!initialized) {
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetDropoutDescriptor(
          desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
    } else {
      auto dropout_state_dims = dropout_state_->dims();
      state_size = dropout_state_dims[0];
      PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnRestoreDropoutDescriptor(
          desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
    }
    return desc_;
  }
328
  inline cudnnDropoutDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343

 private:
  cudnnDropoutDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedDropoutDescriptor);
};

class ScopedRNNDescriptor {
 public:
  ScopedRNNDescriptor() {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_));
  }
  ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
  }

344
  inline cudnnRNNDescriptor_t desc() { return desc_; }
G
GaoWei8 已提交
345 346 347 348 349 350

 private:
  cudnnRNNDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedRNNDescriptor);
};

D
dangqingqing 已提交
351 352 353
class ScopedFilterDescriptor {
 public:
  ScopedFilterDescriptor() {
354
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
D
dangqingqing 已提交
355
  }
Z
Zeng Jinle 已提交
356
  ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
357
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
D
dangqingqing 已提交
358 359 360 361
  }

  inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
                                            const cudnnDataType_t type,
武毅 已提交
362 363
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
C
chengduoZH 已提交
364
    // filter layout: MCHW(MCDHW), where M is the number of
武毅 已提交
365
    // output image channels, C is the number of input image channels,
C
chengduoZH 已提交
366 367
    // D is the depth of the filter, H is the height of the filter, and W is the
    // width of the filter.
武毅 已提交
368 369 370 371 372
    std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
    if (groups > 1) {
      kernel_with_group[0] /= groups;
      // NOTE: input filter(C) of the filter is already asserted to be C/groups.
    }
373
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetFilterNdDescriptor(
武毅 已提交
374 375
        desc_, type, format, kernel_with_group.size(),
        kernel_with_group.data()));
D
dangqingqing 已提交
376 377 378 379 380
    return desc_;
  }

  template <typename T>
  inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
武毅 已提交
381 382
                                            const std::vector<int>& kernel,
                                            const int groups = 1) {
D
dangqingqing 已提交
383
    return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
武毅 已提交
384
                      kernel, groups);
D
dangqingqing 已提交
385 386
  }

387 388
  inline cudnnFilterDescriptor_t desc() { return desc_; }

D
dangqingqing 已提交
389 390 391 392 393 394 395 396
 private:
  cudnnFilterDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
};

class ScopedConvolutionDescriptor {
 public:
  ScopedConvolutionDescriptor() {
397 398
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateConvolutionDescriptor(&desc_));
D
dangqingqing 已提交
399
  }
Z
Zeng Jinle 已提交
400
  ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
401 402
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroyConvolutionDescriptor(desc_));
D
dangqingqing 已提交
403 404 405 406 407
  }

  inline cudnnConvolutionDescriptor_t descriptor(
      cudnnDataType_t type, const std::vector<int>& pads,
      const std::vector<int>& strides, const std::vector<int>& dilations) {
G
GaoWei8 已提交
408 409 410 411 412 413 414 415 416 417 418
    PADDLE_ENFORCE_EQ(pads.size(), strides.size(),
                      platform::errors::InvalidArgument(
                          "The size of pads and strides should be equal. But "
                          "received size of pads is %d, size of strides is %d.",
                          pads.size(), strides.size()));
    PADDLE_ENFORCE_EQ(
        pads.size(), dilations.size(),
        platform::errors::InvalidArgument(
            "The size of pads and dilations should be equal. But received size "
            "of pads is %d, size of dilations is %d.",
            pads.size(), dilations.size()));
419

K
Kexin Zhao 已提交
420 421
    cudnnDataType_t compute_type =
        (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
422
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetConvolutionNdDescriptor(
D
dangqingqing 已提交
423
        desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
K
Kexin Zhao 已提交
424
        CUDNN_CROSS_CORRELATION, compute_type));
425
    return desc_;
D
dangqingqing 已提交
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
  }

  template <typename T>
  inline cudnnConvolutionDescriptor_t descriptor(
      const std::vector<int>& pads, const std::vector<int>& strides,
      const std::vector<int>& dilations) {
    return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
  }

 private:
  cudnnConvolutionDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
};

class ScopedPoolingDescriptor {
 public:
  ScopedPoolingDescriptor() {
443
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
D
dangqingqing 已提交
444
  }
Z
Zeng Jinle 已提交
445
  ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
446
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
D
dangqingqing 已提交
447 448 449 450 451 452
  }

  inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
                                             const std::vector<int>& kernel,
                                             const std::vector<int>& pads,
                                             const std::vector<int>& strides) {
G
GaoWei8 已提交
453 454 455 456 457 458 459 460 461 462 463
    PADDLE_ENFORCE_EQ(kernel.size(), pads.size(),
                      platform::errors::InvalidArgument(
                          "The size of kernel and pads should be equal. But "
                          "received size of kernel is %d, size of pads is %d.",
                          kernel.size(), pads.size()));
    PADDLE_ENFORCE_EQ(
        kernel.size(), strides.size(),
        platform::errors::InvalidArgument(
            "The size of kernel and strides should be equal. But "
            "received size of kernel is %d, size of strides is %d.",
            kernel.size(), strides.size()));
464
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
D
dzhwinter 已提交
465
        desc_, (GetPoolingMode(mode)),
D
dangqingqing 已提交
466 467
        CUDNN_PROPAGATE_NAN,  // Always propagate nans.
        kernel.size(), kernel.data(), pads.data(), strides.data()));
468
    return desc_;
D
dangqingqing 已提交
469 470 471 472 473 474 475
  }

 private:
  cudnnPoolingDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
};

W
whs 已提交
476 477 478
class ScopedSpatialTransformerDescriptor {
 public:
  ScopedSpatialTransformerDescriptor() {
479 480
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
W
whs 已提交
481
  }
Z
Zeng Jinle 已提交
482
  ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
483 484
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
W
whs 已提交
485 486 487 488 489
  }

  template <typename T>
  inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
                                                        const int dimA[]) {
490
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
W
whs 已提交
491 492 493 494 495 496 497 498 499
        desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
    return desc_;
  }

 private:
  cudnnSpatialTransformerDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
};

Q
qingqing01 已提交
500 501 502
class ScopedActivationDescriptor {
 public:
  ScopedActivationDescriptor() {
503 504
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnCreateActivationDescriptor(&desc_));
Q
qingqing01 已提交
505
  }
Z
Zeng Jinle 已提交
506
  ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
507 508
    PADDLE_ENFORCE_CUDA_SUCCESS(
        dynload::cudnnDestroyActivationDescriptor(desc_));
Q
qingqing01 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
  }

  template <typename T>
  inline cudnnActivationDescriptor_t descriptor(
      const std::string& act, double value_max = static_cast<double>(0.)) {
    double relu_ceiling = 0.0;
    ActivationMode activation_mode = StringToActivationMode(act);
    cudnnActivationMode_t mode;
    switch (activation_mode) {
#if CUDNN_VERSION >= 7100
      case ActivationMode::kNone:
        mode = CUDNN_ACTIVATION_IDENTITY;
        break;
#endif
      case ActivationMode::kRelu6:
        relu_ceiling = 6.0;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kReluX:
        relu_ceiling = value_max;
        mode = CUDNN_ACTIVATION_CLIPPED_RELU;
        break;
      case ActivationMode::kRelu:
        mode = CUDNN_ACTIVATION_RELU;
        break;
      case ActivationMode::kSigmoid:
        mode = CUDNN_ACTIVATION_SIGMOID;
        break;
      case ActivationMode::kTanh:
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
G
GaoWei8 已提交
541 542 543
        PADDLE_THROW(platform::errors::Unimplemented(
            "Unrecognized CUDNN activation mode: %d.",
            static_cast<int>(activation_mode)));
Q
qingqing01 已提交
544
    }
545
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetActivationDescriptor(
Q
qingqing01 已提交
546 547 548 549 550 551 552 553 554
        desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
    return desc_;
  }

 private:
  cudnnActivationDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedActivationDescriptor);
};

555 556 557 558 559
inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
  use_cudnn &= paddle::platform::is_gpu_place(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
  if (use_cudnn) {
560
    auto& dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
561 562 563 564 565 566
    use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
  }
#endif
  return use_cudnn;
}

W
Wu Yi 已提交
567 568 569 570
#if CUDNN_VERSION >= 7001
class ScopedCTCLossDescriptor {
 public:
  ScopedCTCLossDescriptor() {
571
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
W
Wu Yi 已提交
572
  }
Z
Zeng Jinle 已提交
573
  ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
574
    PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
W
Wu Yi 已提交
575 576 577 578
  }

  template <typename T>
  inline cudnnCTCLossDescriptor_t descriptor() {
579
    PADDLE_ENFORCE_CUDA_SUCCESS(
W
Wu Yi 已提交
580 581 582 583 584 585 586 587 588 589
        dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
    return desc_;
  }

 private:
  cudnnCTCLossDescriptor_t desc_;
  DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
};
#endif

D
dangqingqing 已提交
590 591
}  // namespace platform
}  // namespace paddle