conv_miopen_helper.h 6.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include "paddle/fluid/operators/conv_base_helper.h"
18 19 20 21

namespace paddle {
namespace operators {

22
using ConvArgs = ConvArgsBase<miopenHandle_t, miopenDataType_t>;
23 24

template <typename DeviceContext, typename T, size_t D>
H
hong 已提交
25
static void RemovePaddingSlice(const phi::GPUContext& context,
26 27 28
                               const Tensor* input, Tensor* out,
                               const std::vector<int>& starts,
                               const std::vector<int>& axes) {
H
hong 已提交
29
  auto& place = *context.eigen_device();
30 31 32 33 34 35 36 37 38 39
  auto in_dims = input->dims();
  auto new_out_dims = out->dims();
  auto offsets = Eigen::array<int, D>();
  auto extents = Eigen::array<int, D>();
  for (size_t i = 0; i < D; ++i) {
    offsets[i] = 0;
    extents[i] = new_out_dims[i];
  }

  for (size_t i = 0; i < axes.size(); ++i) {
40
    int start = starts[i];
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
    if (start < 0) {
      start = (start + in_dims[axes[i]]);
    }
    start = std::max(start, 0);
    offsets[axes[i]] = start;
  }
  auto in_t =
      framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
          *input);

  auto out_t =
      framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
          *out, new_out_dims);
  out_t.device(place) = in_t.slice(offsets, extents);
}

template <>
struct SearchAlgorithm<miopenConvFwdAlgorithm_t> {
  using perf_t = miopenConvAlgoPerf_t;
  using algo_t = miopenConvFwdAlgorithm_t;

  template <typename T>
  static algo_t Find(const ConvArgs& args, bool exhaustive_search,
64
                     bool deterministic, size_t workspace_size,
H
hong 已提交
65
                     const phi::GPUContext& ctx) {
66 67
    algo_t algo;

H
hong 已提交
68
    auto workspace_handle = ctx.cudnn_workspace_handle();
69

70 71 72
    int find_count;
    miopenConvAlgoPerf_t find_result;
    auto cudnn_find_func = [&](void* cudnn_workspace_ptr) {
73
      PADDLE_ENFORCE_GPU_SUCCESS(
74 75 76 77 78 79 80 81
          platform::dynload::miopenFindConvolutionForwardAlgorithm(
              args.handle, args.idesc.desc(), args.x->data<T>(),
              args.wdesc.desc(), args.w->data<T>(), args.cdesc.desc(),
              args.odesc.desc(), const_cast<T*>(args.o->data<T>()),
              kNUM_CUDNN_FWD_ALGS, &find_count, &find_result,
              cudnn_workspace_ptr, workspace_size, false));
    };

R
ronnywang 已提交
82 83
    workspace_handle.RunFuncSync(cudnn_find_func, workspace_size);
    algo = find_result.fwd_algo;
84 85 86 87
    VLOG(3) << "choose algo " << algo;
    return algo;
  }

88
  static size_t GetWorkspaceSize(const ConvArgs& args) {
89
    size_t workspace_size = 0;
90
    PADDLE_ENFORCE_GPU_SUCCESS(
91 92 93 94 95 96 97 98 99 100 101 102 103 104
        platform::dynload::miopenConvolutionForwardGetWorkSpaceSize(
            args.handle, args.wdesc.desc(), args.idesc.desc(),
            args.cdesc.desc(), args.odesc.desc(), &workspace_size));
    return workspace_size;
  }
};

template <>
struct SearchAlgorithm<miopenConvBwdDataAlgorithm_t> {
  using perf_t = miopenConvAlgoPerf_t;
  using algo_t = miopenConvBwdDataAlgorithm_t;

  template <typename T>
  static algo_t Find(const ConvArgs& args, bool exhaustive_search,
105
                     bool deterministic, size_t workspace_size,
H
hong 已提交
106
                     const phi::GPUContext& ctx) {
107 108
    algo_t algo;

H
hong 已提交
109
    auto workspace_handle = ctx.cudnn_workspace_handle();
110

111 112 113
    int find_count;
    miopenConvAlgoPerf_t find_result;
    auto cudnn_find_func = [&](void* cudnn_workspace_ptr) {
114
      PADDLE_ENFORCE_GPU_SUCCESS(
115 116 117 118 119 120 121 122
          platform::dynload::miopenFindConvolutionBackwardDataAlgorithm(
              args.handle, args.odesc.desc(), args.o->data<T>(),
              args.wdesc.desc(), args.w->data<T>(), args.cdesc.desc(),
              args.idesc.desc(), const_cast<T*>(args.x->data<T>()),
              kNUM_CUDNN_BWD_DATA_ALGS, &find_count, &find_result,
              cudnn_workspace_ptr, workspace_size, false));
    };

R
ronnywang 已提交
123 124
    workspace_handle.RunFuncSync(cudnn_find_func, workspace_size);
    algo = find_result.bwd_data_algo;
125 126 127 128
    VLOG(3) << "choose algo " << algo;
    return algo;
  }

129
  static size_t GetWorkspaceSize(const ConvArgs& args) {
130
    size_t workspace_size = 0;
131
    PADDLE_ENFORCE_GPU_SUCCESS(
132 133 134 135 136 137 138 139 140 141 142 143 144 145
        platform::dynload::miopenConvolutionBackwardDataGetWorkSpaceSize(
            args.handle, args.odesc.desc(), args.wdesc.desc(),
            args.cdesc.desc(), args.idesc.desc(), &workspace_size));
    return workspace_size;
  }
};

template <>
struct SearchAlgorithm<miopenConvBwdWeightsAlgorithm_t> {
  using perf_t = miopenConvAlgoPerf_t;
  using algo_t = miopenConvBwdWeightsAlgorithm_t;

  template <typename T>
  static algo_t Find(const ConvArgs& args, bool exhaustive_search,
146
                     bool deterministic, size_t workspace_size,
H
hong 已提交
147
                     const phi::GPUContext& ctx) {
148 149
    algo_t algo;

H
hong 已提交
150
    auto workspace_handle = ctx.cudnn_workspace_handle();
151 152 153 154

    int find_count;
    miopenConvAlgoPerf_t find_result;
    auto cudnn_find_func = [&](void* cudnn_workspace_ptr) {
155
      PADDLE_ENFORCE_GPU_SUCCESS(
156 157 158 159 160 161 162 163
          platform::dynload::miopenFindConvolutionBackwardWeightsAlgorithm(
              args.handle, args.odesc.desc(), args.o->data<T>(),
              args.idesc.desc(), args.x->data<T>(), args.cdesc.desc(),
              args.wdesc.desc(), const_cast<T*>(args.w->data<T>()),
              kNUM_CUDNN_BWD_FILTER_ALGS, &find_count, &find_result,
              cudnn_workspace_ptr, workspace_size, false));
    };

R
ronnywang 已提交
164 165
    workspace_handle.RunFuncSync(cudnn_find_func, workspace_size);
    algo = find_result.bwd_weights_algo;
166 167 168 169
    VLOG(3) << "choose algo " << algo;
    return algo;
  }

170
  static size_t GetWorkspaceSize(const ConvArgs& args) {
171
    size_t workspace_size = 0;
172
    PADDLE_ENFORCE_GPU_SUCCESS(
173 174 175 176 177 178 179 180 181
        platform::dynload::miopenConvolutionBackwardWeightsGetWorkSpaceSize(
            args.handle, args.odesc.desc(), args.idesc.desc(),
            args.cdesc.desc(), args.wdesc.desc(), &workspace_size));
    return workspace_size;
  }
};

}  // namespace operators
}  // namespace paddle