cudnn_conv.h 4.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <cudnn.h>
#include <string>
#include <vector>
#include "lite/api/paddle_place.h"
#include "lite/backends/cuda/cuda_utils.h"
#include "lite/core/context.h"
#include "lite/core/target_wrapper.h"
#include "lite/operators/op_params.h"

namespace paddle {
namespace lite {
namespace cuda {
namespace math {

template <PrecisionType Ptype_out>
class CudnnConv2DBase {
 public:
  CudnnConv2DBase()
      : handle_(NULL),
35
        fwd_algo_((cudnnConvolutionFwdAlgo_t)0),
36 37
        input_desc_(NULL),
        output_desc_(NULL),
38
        bias_desc_(NULL),
39
        filter_desc_(NULL),
40
        conv_desc_(NULL),
41
        act_desc_(NULL),
42 43
        workspace_data_(NULL),
        workspace_(NULL),
44
        workspace_fwd_sizes_(0),
45
        workspace_size_inbytes_(0) {}
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68

  ~CudnnConv2DBase() {
    if (conv_desc_) {
      CUDNN_CHECK(cudnnDestroyConvolutionDescriptor(conv_desc_));
    }
    if (input_desc_) {
      CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc_));
    }
    if (output_desc_) {
      CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc_));
    }
    if (act_desc_) {
      CUDNN_CHECK(cudnnDestroyActivationDescriptor(act_desc_));
    }
    if (bias_desc_) {
      CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc_));
    }
    if (filter_desc_) {
      CUDNN_CHECK(cudnnDestroyFilterDescriptor(filter_desc_));
    }
    if (handle_ != NULL) {
      CUDNN_CHECK(cudnnDestroy(handle_));
    }
69 70 71 72 73
    ResetWorkSpace();
  }

 protected:
  void ResetWorkSpace() {
74
    if (workspace_data_ != NULL) {
75
      CUDA_CALL(cudaFree(workspace_data_));
76
    }
77
    workspace_data_ = NULL;
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
  }

 protected:
  cudaStream_t stream_;
  cudnnHandle_t handle_;
  cudnnConvolutionFwdAlgo_t fwd_algo_;
  cudnnTensorDescriptor_t input_desc_;
  cudnnTensorDescriptor_t output_desc_;
  cudnnTensorDescriptor_t bias_desc_;
  cudnnFilterDescriptor_t filter_desc_;
  cudnnConvolutionDescriptor_t conv_desc_;

  // activation descriptor
  cudnnActivationDescriptor_t act_desc_;
  bool with_relu_act_{true};

94 95
  void* workspace_data_;  // underlying storage
  void* workspace_;       // aliases into _workspaceData
96 97 98 99 100 101 102 103 104 105 106 107 108
  size_t workspace_fwd_sizes_;
  size_t workspace_size_inbytes_;  // size of underlying storage

  const bool use_tensor_core_ = true;
  const size_t workspace_limit_bytes_ = 4 * 1024 * 1024;
  const cudnnConvolutionFwdPreference_t preference_ =
      CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;

  // For int8
  Tensor temp_tensor_;
  Tensor scale_;
};

109
template <typename T, PrecisionType Ptype_out>
110 111 112
class CudnnConv2D : public CudnnConv2DBase<Ptype_out> {
 public:
  CudnnConv2D() : CudnnConv2DBase<Ptype_out>() {}
113
  virtual ~CudnnConv2D() = default;
114 115 116 117 118 119 120 121 122 123 124 125 126
  virtual bool init(const operators::ConvParam& param,
                    Context<TARGET(kCUDA)>* ctx);

  virtual bool create(const operators::ConvParam& param,
                      Context<TARGET(kCUDA)>* ctx);

  virtual bool run(const operators::ConvParam& param);
};

template <PrecisionType Ptype_out>
class CudnnConv2DInt8 : CudnnConv2DBase<Ptype_out> {
 public:
  CudnnConv2DInt8() : CudnnConv2DBase<Ptype_out>() {}
127
  virtual ~CudnnConv2DInt8() = default;
128 129 130 131 132 133 134 135 136 137 138 139 140
  virtual bool init(const operators::ConvParam& param,
                    Context<TARGET(kCUDA)>* ctx);

  virtual bool create(const operators::ConvParam& param,
                      Context<TARGET(kCUDA)>* ctx);

  virtual bool run(const operators::ConvParam& param);
};

}  // namespace math
}  // namespace cuda
}  // namespace lite
}  // namespace paddle