cl_image.h 11.5 KB
Newer Older
L
liuruilong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include <memory>
18 19 20
#include <vector>

#include "CL/cl.h"
L
liuruilong 已提交
21

L
liuruilong 已提交
22
#include "framework/cl/cl_deleter.h"
L
liuruilong 已提交
23
#include "framework/cl/cl_engine.h"
L
liuruilong 已提交
24 25 26
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_image_converter.h"
#include "framework/cl/cl_tool.h"
L
liuruilong 已提交
27 28 29 30 31 32 33 34
#include "framework/ddim.h"
#include "framework/tensor.h"

namespace paddle_mobile {
namespace framework {

class CLImage {
 public:
L
liuruilong 已提交
35 36
  CLImage() = default;

L
liuruilong 已提交
37 38 39 40 41 42 43 44 45
  ~CLImage() {
    if (tensor_data_ != nullptr) {
      delete[](tensor_data_);
    }

    if (image_converter_) {
      delete (image_converter_);
    }
  }
L
liuruilong 已提交
46 47 48 49 50 51 52
  /*
   * will not hold input tensor data, memcpy in this method
   * */
  void SetTensorData(float *tensorData, const DDim &dim) {
    int numel = product(dim);
    if (tensor_data_ != nullptr) {
      delete[](tensor_data_);
L
liuruilong 已提交
53
      tensor_data_ = nullptr;
L
liuruilong 已提交
54 55
    }
    tensor_data_ = new float[numel];
L
liuruilong 已提交
56
    memcpy(tensor_data_, tensorData, numel * sizeof(float));
L
liuruilong 已提交
57 58 59
    tensor_dims_ = dim;
  }

Y
yangfei 已提交
60
  bool isInit() { return initialized_; }
L
liuruilong 已提交
61 62
  /*
   * need call SetTensorData first
L
liuruilong 已提交
63 64
   *
   * folder when one dim or two dim
L
liuruilong 已提交
65
   * */
L
liuruilong 已提交
66
  void InitCLImage(cl_context context, cl_command_queue command_queue) {
L
liuruilong 已提交
67 68 69 70 71 72
    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");
    CLImageConverterFolder *folder_converter = new CLImageConverterFolder();
    InitCLImage(context, command_queue, folder_converter);
  }

Y
yangfei 已提交
73 74 75 76 77 78 79
  void InitNormalCLImage(cl_context context, cl_command_queue command_queue) {
    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
    InitCLImage(context, command_queue, normal_converter);
  }

L
liuruilong 已提交
80 81 82 83
  void InitCLImage(cl_context context, cl_command_queue command_queue,
                   CLImageConverterBase *converter) {
    if (image_converter_ != nullptr) {
      delete (image_converter_);
D
dolphin8 已提交
84
    }
L
liuruilong 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");

    DLOG << " begin init cl image ";
    image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);

    half_t *image_data = new half_t[product(image_dims_) * 4];

    DLOG << " convert to image";
    converter->NCHWToImage(tensor_data_, image_data, tensor_dims_);
    DLOG << " end convert to image";

    InitCLImage(context, image_dims_[0], image_dims_[1], image_data);

    delete[](image_data);
L
liuruilong 已提交
101
    delete[](tensor_data_);
L
liuruilong 已提交
102 103

    command_queue_ = command_queue;
L
liuruilong 已提交
104
    tensor_data_ = nullptr;
L
liuruilong 已提交
105
    image_converter_ = converter;
L
liuruilong 已提交
106
    initialized_ = true;
L
liuruilong 已提交
107
    DLOG << " end init cl image";
L
liuruilong 已提交
108 109
  }

L
liuruilong 已提交
110
  void InitNImage(cl_context context, cl_command_queue command_queue) {
Y
yangfei 已提交
111 112 113
    if (tensor_data_ == nullptr) {
      PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
    }
L
liuruilong 已提交
114 115 116
    CLImageConverterNWBlock *folder_converter = new CLImageConverterNWBlock();
    InitCLImage(context, command_queue, folder_converter);
    PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
Y
yangfei 已提交
117
  }
Y
yangfei 已提交
118 119 120 121 122 123 124 125
  void InitDWImage(cl_context context, cl_command_queue command_queue) {
    if (tensor_data_ == nullptr) {
      PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
    }
    CLImageConverterDWBlock *dw_converter = new CLImageConverterDWBlock();
    InitCLImage(context, command_queue, dw_converter);
    PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
  }
Y
yangfei 已提交
126

L
liuruilong 已提交
127 128
  void InitEmptyImage(cl_context context, cl_command_queue command_queue,
                      const DDim &dim) {
129 130 131
    if (image_converter_ != nullptr) {
      delete image_converter_;
    }
L
liuruilong 已提交
132 133
    PADDLE_MOBILE_ENFORCE(tensor_data_ == nullptr,
                          " empty image tensor data shouldn't have value");
L
liuruilong 已提交
134

Y
yangfei 已提交
135 136 137
    //    CLImageConverterFolder *folder_converter = new
    //    CLImageConverterFolder();
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
138
    PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
L
liuruilong 已提交
139
    DLOG << " to get image dims ";
Y
yangfei 已提交
140
    image_dims_ = normal_converter->InitImageDimInfoWith(dim);
L
liuruilong 已提交
141
    DLOG << " end get image dims " << image_dims_;
L
liuruilong 已提交
142

L
liuruilong 已提交
143
    InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
L
liuruilong 已提交
144

L
liuruilong 已提交
145 146
    tensor_dims_ = dim;
    command_queue_ = command_queue;
Y
yangfei 已提交
147
    image_converter_ = normal_converter;
L
liuruilong 已提交
148
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
L
liuruilong 已提交
149
    initialized_ = true;
L
liuruilong 已提交
150
    DLOG << " end init cl image";
L
liuruilong 已提交
151
  }
152 153 154
  /**
   *  create fake size cl_mem for mem share
   */
155
  void InitFakeSizeImage(cl_context context, cl_command_queue command_queue,
156
                         const DDim &need_dims, const DDim &real_image_dims) {
157 158
    PADDLE_MOBILE_ENFORCE(tensor_data_ == nullptr,
                          " empty image tensor data shouldn't have value");
159 160 161
    if (image_converter_ != nullptr) {
      delete image_converter_;
    }
162
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
163 164
    // use real image dims to create mem
    real_image_dims_ = real_image_dims;
165 166 167
    // when init fake size image ,
    // reinit image is allow , it is disallowed after this..
    shared_mem_ = false;
168 169
    InitCLImage(context, real_image_dims_[0], real_image_dims_[1], nullptr);
    // cheat cl_image they got what they wanted
170
    image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
171 172 173 174 175 176
    DLOG << "InitFakeSizeImage ... ";
    DLOG << "real_image_dims:  " << real_image_dims_;
    DLOG << "image_dims_:  " << image_dims_;
    PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
                              real_image_dims_[1] >= image_dims_[1],
                          "real image is not enough");
177 178 179 180 181
    tensor_dims_ = need_dims;
    command_queue_ = command_queue;
    image_converter_ = normal_converter;
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
    initialized_ = true;
182 183 184
    shared_mem_ = true;

    DLOG << " end init FakeSizeImage";
185
  }
186 187 188 189
  /**
   * init cl mem with a exist cl mem
   */
  void InitWithExistMem(cl_context context, cl_command_queue command_queue,
190
                        DDim need_dims, const CLImage &src) {
191 192 193
    if (image_converter_ != nullptr) {
      delete image_converter_;
    }
194 195
    CLImageConverterNormal *normal_converter = new CLImageConverterNormal();

196
    real_image_dims_ = src.real_image_dims_;
197
    image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
198 199 200 201

    DLOG << "InitWithExistMem ... ";
    DLOG << "real_image_dims:  " << real_image_dims_;
    DLOG << "image_dims_:  " << image_dims_;
202

203 204 205 206 207 208
    if (real_image_dims_[0] < image_dims_[0] ||
        real_image_dims_[1] < image_dims_[1]) {
      DLOG << "real image is not enough!";
      DLOG << "real_image_dims:  " << real_image_dims_;
      DLOG << "image_dims_:  " << image_dims_;
    }
209 210 211
    PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
                              real_image_dims_[1] >= image_dims_[1],
                          "real image is not enough!");
212
    if (cl_image_ != src.cl_image_) {
213
      cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
214 215 216
    }

    tensor_dims_ = need_dims;
217
    command_queue_ = command_queue;
218
    image_converter_ = normal_converter;
219 220
    cl_event_ = CLEngine::Instance()->CreateEvent(context);
    initialized_ = true;
221 222 223
    shared_mem_ = true;

    DLOG << " end init WithExistMem";
224 225
  }

226 227 228 229 230 231 232 233 234
  void InitConv2dTransposeFilterCLImage(cl_context context,
                                        cl_command_queue command_queue) {
    PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                          " need call SetTensorData first");
    CLImageConverterConv2dTransposeTransWeight *converter =
        new CLImageConverterConv2dTransposeTransWeight();
    InitCLImage(context, command_queue, converter);
  }

235 236 237 238 239 240 241
  /*! The internal of two tensors share the same memory block. */
  inline CLImage &ShareHolderWith(const CLImage &src) {
    PADDLE_MOBILE_ENFORCE(
        src.cl_image_ != nullptr,
        "Tensor holds no memory. Call Tensor::mutable_data first.")

    if (cl_image_ != src.cl_image_) {
242
      cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
243 244 245 246
    }
    return *this;
  }

L
liuruilong 已提交
247
  cl_mem GetCLImage() const { return cl_image_.get(); }
L
liuruilong 已提交
248

Y
yangfei 已提交
249
  const DDim &ImageDims() const { return image_dims_; }
L
liuruilong 已提交
250

L
liuruilong 已提交
251
  inline size_t ImageWidth() const { return image_dims_[0]; }
L
liuruilong 已提交
252

L
liuruilong 已提交
253
  inline size_t ImageHeight() const { return image_dims_[1]; }
L
liuruilong 已提交
254

L
liuruilong 已提交
255
  inline cl_command_queue CommandQueue() const { return command_queue_; }
Y
yangfei 已提交
256

L
liuruilong 已提交
257 258 259 260 261 262 263 264 265 266 267 268
  /*
   *  resize original tensor dim
   * */
  inline CLImage &Resize(const DDim &dims) {
    tensor_dims_ = dims;
    return *this;
  }

  template <typename T>
  T *data() const {
    if (initialized_) {
      PADDLE_MOBILE_THROW_EXCEPTION(
L
liuruilong 已提交
269 270
          " cl image has initialized, tensor data has been deleted, can't use "
          "tensor data");
L
liuruilong 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283 284
    }
    return reinterpret_cast<T *>(tensor_data_);
  }

  /*
   *  numel of tensor dim
   * */
  inline int64_t numel() const { return product(tensor_dims_); }

  /*
   *  original tensor dim
   * */
  const DDim &dims() const { return tensor_dims_; }

L
liuruilong 已提交
285 286
  cl_event GetClEvent() const { return cl_event_.get(); }

L
liuruilong 已提交
287
  CLImageConverterBase *Converter() const { return image_converter_; }
D
dolphin8 已提交
288

L
liuruilong 已提交
289
 private:
290 291
  void InitCLImage(cl_context context, size_t width, size_t height,
                   void *data) {
292 293
    PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")

D
dolphin8 已提交
294 295 296
    cl_image_format cf = {.image_channel_order = CL_RGBA,
                          .image_channel_data_type = CL_HALF_FLOAT};
    cl_image_desc cid = {
L
liuruilong 已提交
297 298 299 300 301 302 303 304 305 306
        .image_type = CL_MEM_OBJECT_IMAGE2D,
        .image_width = width,
        .image_height = height,
        .image_depth = 1,
        .image_array_size = 1,
        .image_row_pitch = 0,
        .image_slice_pitch = 0,
        .num_mip_levels = 0,
        .num_samples = 0,
        // .buffer = nullptr
D
dolphin8 已提交
307 308 309
    };
    cid.buffer = nullptr;
    cl_int err;
L
liuruilong 已提交
310
    cl_mem cl_image = clCreateImage(
L
liuruilong 已提交
311 312 313 314 315
        context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0),
        &cf,   // const cl_image_format *image_format
        &cid,  // const cl_image_desc *image_desc
        data,  // void *host_ptr
        &err);
316
    cl_image_.reset(cl_image, CLMemDeleter());
D
dolphin8 已提交
317 318 319 320 321
    if (err != CL_SUCCESS) {
      CL_CHECK_ERRORS(err);
      PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
    }
  }
L
liuruilong 已提交
322

L
liuruilong 已提交
323
  bool initialized_ = false;
324
  std::shared_ptr<_cl_mem> cl_image_;
L
liuruilong 已提交
325
  std::unique_ptr<_cl_event, CLEventDeleter> cl_event_;
326
  DDim tensor_dims_;
L
liuruilong 已提交
327
  DDim image_dims_;
328
  // real image dims usually it is same as image_dims
329
  DDim real_image_dims_;
L
liuruilong 已提交
330
  float *tensor_data_ = nullptr;
L
liuruilong 已提交
331
  cl_context context_;
Y
yangfei 已提交
332
  cl_command_queue command_queue_;
L
liuruilong 已提交
333
  CLImageConverterBase *image_converter_ = nullptr;
334
  bool shared_mem_ = false;
L
liuruilong 已提交
335 336
};

Y
yangfei 已提交
337 338
void TensorToCLImage(Tensor *tensor, CLImage *image, cl_context context,
                     cl_command_queue commandQueue, cl_kernel kernel);
Y
yangfei 已提交
339

Y
yangfei 已提交
340 341
void CLImageToTensor(CLImage *image, Tensor *tensor, cl_context context,
                     cl_command_queue commandQueue, cl_kernel kernel);
L
liuruilong 已提交
342

L
liuruilong 已提交
343 344 345 346
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &image);
#endif

347 348
}  // namespace framework
}  // namespace paddle_mobile