cl_image.h 9.7 KB
Newer Older
L
liuruilong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17 18 19
#include <vector>

#include "CL/cl.h"
L
liuruilong 已提交
20

L
liuruilong 已提交
21
#include "framework/cl/cl_deleter.h"
L
liuruilong 已提交
22
#include "framework/cl/cl_engine.h"
L
liuruilong 已提交
23 24 25
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_image_converter.h"
#include "framework/cl/cl_tool.h"
L
liuruilong 已提交
26 27 28 29 30 31 32 33
#include "framework/ddim.h"
#include "framework/tensor.h"

namespace paddle_mobile {
namespace framework {

class CLImage {
 public:
L
liuruilong 已提交
34 35
  CLImage() = default;

L
liuruilong 已提交
36 37 38 39 40 41 42 43 44
  ~CLImage() {
    if (tensor_data_ != nullptr) {
      delete[](tensor_data_);
    }

    if (image_converter_) {
      delete (image_converter_);
    }
  }
L
liuruilong 已提交
45 46 47 48 49 50 51
  /*
   * will not hold input tensor data, memcpy in this method
   * */
  void SetTensorData(float *tensorData, const DDim &dim) {
    int numel = product(dim);
    if (tensor_data_ != nullptr) {
      delete[](tensor_data_);
L
liuruilong 已提交
52
      tensor_data_ = nullptr;
L
liuruilong 已提交
53 54
    }
    tensor_data_ = new float[numel];
L
liuruilong 已提交
55
    memcpy(tensor_data_, tensorData, numel * sizeof(float));
L
liuruilong 已提交
56 57 58
    tensor_dims_ = dim;
  }

Y
yangfei 已提交
59
  bool isInit() { return initialized_; }
L
liuruilong 已提交
60 61
  /*
   * need call SetTensorData first
L
liuruilong 已提交
62 63
   *
   * folder when one dim or two dim
L
liuruilong 已提交
64
   * */
L
liuruilong 已提交
65
  void InitCLImage(cl_context context, cl_command_queue command_queue) {
L
liuruilong 已提交
66 67 68 69 70 71
    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");
    CLImageConverterFolder *folder_converter = new CLImageConverterFolder();
    InitCLImage(context, command_queue, folder_converter);
  }

Y
yangfei 已提交
72 73 74 75 76 77 78
  void InitNormalCLImage(cl_context context, cl_command_queue command_queue) {
    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
    InitCLImage(context, command_queue, normal_converter);
  }

L
liuruilong 已提交
79 80 81 82
  void InitCLImage(cl_context context, cl_command_queue command_queue,
                   CLImageConverterBase *converter) {
    if (image_converter_ != nullptr) {
      delete (image_converter_);
D
dolphin8 已提交
83
    }
L
liuruilong 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99

    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");

    DLOG << " begin init cl image ";
    image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);

    half_t *image_data = new half_t[product(image_dims_) * 4];

    DLOG << " convert to image";
    converter->NCHWToImage(tensor_data_, image_data, tensor_dims_);
    DLOG << " end convert to image";

    InitCLImage(context, image_dims_[0], image_dims_[1], image_data);

    delete[](image_data);
L
liuruilong 已提交
100
    delete[](tensor_data_);
L
liuruilong 已提交
101 102

    command_queue_ = command_queue;
L
liuruilong 已提交
103
    tensor_data_ = nullptr;
L
liuruilong 已提交
104
    image_converter_ = converter;
L
liuruilong 已提交
105
    initialized_ = true;
L
liuruilong 已提交
106
    DLOG << " end init cl image";
L
liuruilong 已提交
107 108
  }

L
liuruilong 已提交
109
  void InitNImage(cl_context context, cl_command_queue command_queue) {
Y
yangfei 已提交
110 111 112
    if (tensor_data_ == nullptr) {
      PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
    }
L
liuruilong 已提交
113 114 115
    CLImageConverterNWBlock *folder_converter = new CLImageConverterNWBlock();
    InitCLImage(context, command_queue, folder_converter);
    PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
Y
yangfei 已提交
116
  }
Y
yangfei 已提交
117 118 119 120 121 122 123 124
  void InitDWImage(cl_context context, cl_command_queue command_queue) {
    if (tensor_data_ == nullptr) {
      PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
    }
    CLImageConverterDWBlock *dw_converter = new CLImageConverterDWBlock();
    InitCLImage(context, command_queue, dw_converter);
    PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
  }
Y
yangfei 已提交
125

L
liuruilong 已提交
126 127
  void InitEmptyImage(cl_context context, cl_command_queue command_queue,
                      const DDim &dim) {
L
liuruilong 已提交
128 129
    PADDLE_MOBILE_ENFORCE(tensor_data_ == nullptr,
                          " empty image tensor data shouldn't have value");
L
liuruilong 已提交
130

Y
yangfei 已提交
131 132 133
    //    CLImageConverterFolder *folder_converter = new
    //    CLImageConverterFolder();
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
L
liuruilong 已提交
134 135

    DLOG << " to get image dims ";
Y
yangfei 已提交
136
    image_dims_ = normal_converter->InitImageDimInfoWith(dim);
L
liuruilong 已提交
137
    DLOG << " end get image dims " << image_dims_;
L
liuruilong 已提交
138

L
liuruilong 已提交
139
    InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
L
liuruilong 已提交
140

L
liuruilong 已提交
141 142
    tensor_dims_ = dim;
    command_queue_ = command_queue;
Y
yangfei 已提交
143
    image_converter_ = normal_converter;
L
liuruilong 已提交
144
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
L
liuruilong 已提交
145
    initialized_ = true;
L
liuruilong 已提交
146
    DLOG << " end init cl image";
L
liuruilong 已提交
147
  }
148 149 150 151 152
  // create fake size cl_mem for mem share
  void InitFakeSizeImage(cl_context context, cl_command_queue command_queue,
                         const DDim &need_dims, const DDim &real_dims) {
    PADDLE_MOBILE_ENFORCE(tensor_data_ == nullptr,
                          " empty image tensor data shouldn't have value");
L
liuruilong 已提交
153

154 155 156 157
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();

    real_image_dims = normal_converter->InitImageDimInfoWith(real_dims);
    real_tensor_dims = real_dims;
158

159
    image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
160 161
    InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
    tensor_dims_ = need_dims;
    command_queue_ = command_queue;
    image_converter_ = normal_converter;
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
    initialized_ = true;
    DLOG << " end init cl image";
  }

  void InitWithExitedMem(cl_context context, cl_command_queue command_queue,
                         DDim need_dims, CLImage &src) {
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();

    real_image_dims = normal_converter->InitImageDimInfoWith(src.dims());
    real_tensor_dims = src.dims();

    image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
    // InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
    if (cl_image_ != src.cl_image_) {
      cl_image_.reset(src.cl_image_.get());
    }

    tensor_dims_ = need_dims;
184
    command_queue_ = command_queue;
185
    image_converter_ = normal_converter;
186 187 188 189 190
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
    initialized_ = true;
    DLOG << " end init cl image";
  }

191 192 193 194 195 196 197 198 199 200 201 202
  /*! The internal of two tensors share the same memory block. */
  inline CLImage &ShareHolderWith(const CLImage &src) {
    PADDLE_MOBILE_ENFORCE(
        src.cl_image_ != nullptr,
        "Tensor holds no memory. Call Tensor::mutable_data first.")

    if (cl_image_ != src.cl_image_) {
      cl_image_.reset(src.cl_image_.get());
    }
    return *this;
  }

L
liuruilong 已提交
203
  cl_mem GetCLImage() const { return cl_image_.get(); }
L
liuruilong 已提交
204

Y
yangfei 已提交
205
  const DDim &ImageDims() const { return image_dims_; }
L
liuruilong 已提交
206

L
liuruilong 已提交
207
  inline size_t ImageWidth() const { return image_dims_[0]; }
L
liuruilong 已提交
208

L
liuruilong 已提交
209
  inline size_t ImageHeight() const { return image_dims_[1]; }
L
liuruilong 已提交
210

L
liuruilong 已提交
211
  inline cl_command_queue CommandQueue() const { return command_queue_; }
Y
yangfei 已提交
212

L
liuruilong 已提交
213 214 215 216 217 218 219 220 221 222 223 224
  /*
   *  resize original tensor dim
   * */
  inline CLImage &Resize(const DDim &dims) {
    tensor_dims_ = dims;
    return *this;
  }

  template <typename T>
  T *data() const {
    if (initialized_) {
      PADDLE_MOBILE_THROW_EXCEPTION(
L
liuruilong 已提交
225 226
          " cl image has initialized, tensor data has been deleted, can't use "
          "tensor data");
L
liuruilong 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240
    }
    return reinterpret_cast<T *>(tensor_data_);
  }

  /*
   *  numel of tensor dim
   * */
  inline int64_t numel() const { return product(tensor_dims_); }

  /*
   *  original tensor dim
   * */
  const DDim &dims() const { return tensor_dims_; }

L
liuruilong 已提交
241 242
  cl_event GetClEvent() const { return cl_event_.get(); }

L
liuruilong 已提交
243
  CLImageConverterBase *Converter() const { return image_converter_; }
D
dolphin8 已提交
244

L
liuruilong 已提交
245
 private:
L
liuruilong 已提交
246
  void InitCLImage(cl_context context, int width, int height, void *data) {
D
dolphin8 已提交
247 248 249
    cl_image_format cf = {.image_channel_order = CL_RGBA,
                          .image_channel_data_type = CL_HALF_FLOAT};
    cl_image_desc cid = {
L
liuruilong 已提交
250 251 252 253 254 255 256 257 258 259
        .image_type = CL_MEM_OBJECT_IMAGE2D,
        .image_width = width,
        .image_height = height,
        .image_depth = 1,
        .image_array_size = 1,
        .image_row_pitch = 0,
        .image_slice_pitch = 0,
        .num_mip_levels = 0,
        .num_samples = 0,
        // .buffer = nullptr
D
dolphin8 已提交
260 261 262
    };
    cid.buffer = nullptr;
    cl_int err;
L
liuruilong 已提交
263
    cl_mem cl_image = clCreateImage(
L
liuruilong 已提交
264 265 266 267 268
        context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0),
        &cf,   // const cl_image_format *image_format
        &cid,  // const cl_image_desc *image_desc
        data,  // void *host_ptr
        &err);
L
liuruilong 已提交
269
    cl_image_.reset(cl_image);
D
dolphin8 已提交
270 271 272 273 274
    if (err != CL_SUCCESS) {
      CL_CHECK_ERRORS(err);
      PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
    }
  }
L
liuruilong 已提交
275

L
liuruilong 已提交
276
  bool initialized_ = false;
L
liuruilong 已提交
277
  std::unique_ptr<_cl_mem, CLMemDeleter> cl_image_;
L
liuruilong 已提交
278
  std::unique_ptr<_cl_event, CLEventDeleter> cl_event_;
279
  DDim tensor_dims_;
L
liuruilong 已提交
280
  DDim image_dims_;
281 282 283 284
  // real image dims usually it is same as image_dims
  DDim real_image_dims;
  // real tensor dims usually it is same as tensor dims
  DDim real_tensor_dims;
L
liuruilong 已提交
285
  float *tensor_data_ = nullptr;
L
liuruilong 已提交
286
  cl_context context_;
Y
yangfei 已提交
287
  cl_command_queue command_queue_;
L
liuruilong 已提交
288
  CLImageConverterBase *image_converter_ = nullptr;
L
liuruilong 已提交
289 290
};

Y
yangfei 已提交
291 292
void TensorToCLImage(Tensor *tensor, CLImage *image, cl_context context,
                     cl_command_queue commandQueue, cl_kernel kernel);
Y
yangfei 已提交
293

Y
yangfei 已提交
294 295
void CLImageToTensor(CLImage *image, Tensor *tensor, cl_context context,
                     cl_command_queue commandQueue, cl_kernel kernel);
L
liuruilong 已提交
296

L
liuruilong 已提交
297 298 299 300
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &image);
#endif

301 302
}  // namespace framework
}  // namespace paddle_mobile