conv_image_compute.h 4.5 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <vector>
20

21
#include "lite/backends/opencl/cl_half.h"
22
#include "lite/backends/opencl/cl_include.h"
Y
Yan Chunwei 已提交
23 24
#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
25
#include "lite/kernels/opencl/image_helper.h"
Y
Yan Chunwei 已提交
26
#include "lite/operators/op_params.h"
27 28 29 30
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
#include "lite/backends/opencl/cl_utility.h"
Y
Yan Chunwei 已提交
31 32 33 34 35

namespace paddle {
namespace lite {
namespace kernels {
namespace opencl {
36

37
class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
38
                                           PRECISION(kFP16),
39 40 41
                                           DATALAYOUT(kImageDefault)> {
 public:
  using param_t = operators::ConvParam;
42
  using kernel_t = void (ConvImageCompute::*)(bool);
43 44 45

  void PrepareForRun() override;

46 47
  void ReInitWhenNeeded() override;

48
  void Run() override;
49 50

  double Tune(int times = 5);
51

52 53 54
#ifdef LITE_WITH_PROFILE
  void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
    ch->kernel_func_name = kernel_func_names_[0];
55 56
    ch->global_work_size = ch->NDRangeToStr(global_work_size_);
    ch->local_work_size = ch->NDRangeToStr(local_work_size_);
57 58 59 60 61
    ch->cl_event =
        event_;  // `event_` defined in `kernel.h`, valid after kernel::Run
  }
#endif

62
 private:
63 64 65 66 67 68 69 70 71 72 73 74 75 76
  void PrintConvInfo();
  void GetGlobalWorkSize();
  void Conv2d1x1opt(bool enable_tune = false);
  void Conv2d3x3(bool enable_tune = false);
  void Conv2d3x3opt(bool enable_tune = false);
  void Conv2d5x5(bool enable_tune = false);
  void Conv2d5x5opt(bool enable_tune = false);
  void Conv2d7x7(bool enable_tune = false);
  void Conv2d7x7opt(bool enable_tune = false);
  void DepthwiseConv2d3x3s1(bool enable_tune = false);
  void DepthwiseConv2d3x3(bool enable_tune = false);
  void DepthwiseConv2d(bool enable_tune = false);

  param_t* conv_param_{nullptr};
77 78 79 80 81

  kernel_t impl_;
  std::vector<std::string> kernel_func_names_{};
  std::vector<std::string> kernel_func_paths_{};
  std::vector<std::string> build_options_{};
82
  std::string time_stamp_{GetTimeStamp()};
X
xiebaiyuan 已提交
83

84 85
  std::unique_ptr<Tensor> filter_gpu_image_{nullptr};
  std::unique_ptr<Tensor> bias_gpu_image_{nullptr};
X
xiebaiyuan 已提交
86 87
  std::unique_ptr<Tensor> tensor_hold_filter_image_{nullptr};
  std::unique_ptr<Tensor> tensor_hold_bias_image_{nullptr};
88 89
  cl::NDRange global_work_size_ = cl::NDRange{
      static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
90 91

  // opencl kernel args
92 93 94 95
  int c_blk_ = 1;
  int w_blk_ = 1;
  int nh_blk_ = 1;

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
  const cl::Image2D* input_image_p_{nullptr};
  const cl::Image2D* filter_image_p_{nullptr};
  const cl::Image2D* bias_image_p_{nullptr};
  const cl::Image2D* output_image_p_{nullptr};

  int stride_h_{-1};
  int stride_w_{-1};

  int dilation_h_{-1};
  int dilation_w_{-1};

  int pad_up_{-1};
  int pad_down_{-1};
  int pad_left_{-1};
  int pad_right_{-1};

  int offset_{-1};
  int groups_{-1};
  bool relu_fused_{false};
  bool has_bias_{false};

  int input_tensor_n_{-1};
  int input_tensor_c_{-1};
  int input_tensor_h_{-1};
  int input_tensor_w_{-1};
  int input_image_h_{-1};
  int input_image_w_{-1};
  int input_c_block_{-1};

  int output_tensor_n_{-1};
  int output_tensor_c_{-1};
  int output_tensor_h_{-1};
  int output_tensor_w_{-1};
  int output_image_h_{-1};
  int output_image_w_{-1};

  int filter_tensor_n_{-1};
  int filter_tensor_c_{-1};
  int filter_tensor_h_{-1};
  int filter_tensor_w_{-1};
  int filter_image_h_{-1};
  int filter_image_w_{-1};

  int bias_image_h_{-1};
  int bias_image_w_{-1};

142 143 144
  int default_c_blk_ = 1;
  int default_w_blk_ = 1;
  int default_nh_blk_ = 1;
145 146 147 148
  // =================

  DDim last_input_dims_{};
  bool is_first_epoch_for_run_{true};
149 150

  cl::Kernel kernel_;
151
  cl_int status_;
152 153
  cl::NDRange local_work_size_ = cl::NDRange{
      static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
154
  bool use_lws_{true};
155
  bool use_tune_{true};
156
};
157

Y
Yan Chunwei 已提交
158 159 160 161
}  // namespace opencl
}  // namespace kernels
}  // namespace lite
}  // namespace paddle