[mobile][opencl]opt cl mem (#2736)

* [mobile][opencl] fix a leak in clengine , fix loader.load's seg ,move contest commendqueue's prt outof clscope ,test=mobile * [mobile][opencl] suite cpu mode... ,test=mobile * [mobile][opencl] shard ptr with others in cl image ,test=mobile

[mobile][opencl]opt cl mem (#2736)
* [mobile][opencl] fix a leak in clengine , fix loader.load's seg ,move contest commendqueue's prt outof clscope ,test=mobile * [mobile][opencl] suite cpu mode... ,test=mobile * [mobile][opencl] shard ptr with others in cl image ,test=mobile
6135fd4a · xiebaiyuan · GitHub · 08afd3aa · 6135fd4a · 6135fd4a
8 changed file
--- a/mobile/src/framework/cl/cl_deleter.h
+++ b/mobile/src/framework/cl/cl_deleter.h
@@ -15,45 +15,51 @@ limitations under the License. */
 #pragma once
 #include "CL/cl.h"
+#include "common/log.h"
 struct CLKernelDeleter {
  template <class T>
  void operator()(T *clKernelObj) {
-    clReleaseKernel(clKernelObj);
+    const cl_int status = clReleaseKernel(clKernelObj);
+    LOG(paddle_mobile::kNO_LOG) << "clReleaseKernel  status:     " << status;
  }
 };
 struct CLMemDeleter {
  template <class T>
  void operator()(T *clMemObj) {
-    clReleaseMemObject(clMemObj);
+    const cl_int status = clReleaseMemObject(clMemObj);
+    LOG(paddle_mobile::kNO_LOG) << "CLMemDeleter  status:     " << status;
  }
 };
 struct CLEventDeleter {
  template <class T>
  void operator()(T *clEventObj) {
-    clReleaseEvent(clEventObj);
+    const cl_int status = clReleaseEvent(clEventObj);
+    LOG(paddle_mobile::kNO_LOG) << "CLEventDeleter  status:     " << status;
  }
 };
 struct CLCommQueueDeleter {
  template <class T>
  void operator()(T *clQueueObj) {
-    clReleaseCommandQueue(clQueueObj);
+    const cl_int status = clReleaseCommandQueue(clQueueObj);
+    LOG(paddle_mobile::kNO_LOG) << "CLCommQueueDeleter  status:     " << status;
  }
 };
 struct CLContextDeleter {
  template <class T>
  void operator()(T *clContextObj) {
-    clReleaseContext(clContextObj);
+    const cl_int status = clReleaseContext(clContextObj);
+    LOG(paddle_mobile::kNO_LOG) << "CLContextDeleter  status:     " << status;
  }
 };
 struct CLProgramDeleter {
  template <class T>
  void operator()(T *clProgramObj) {
-    clReleaseProgram(clProgramObj);
+    const cl_int status = clReleaseProgram(clProgramObj);
+    LOG(paddle_mobile::kNO_LOG) << "CLProgramDeleter  status:   " << status;
  }
 };
--- a/mobile/src/framework/cl/cl_engine.cpp
+++ b/mobile/src/framework/cl/cl_engine.cpp
@@ -23,9 +23,11 @@ namespace paddle_mobile {
 namespace framework {
 bool CLEngine::Init() {
+  LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init()";
  if (initialized_) {
    return true;
  }
+  LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init() ...";
  cl_int status;
  bool is_setplatform_success = SetPlatform();
  bool is_setcldeviceid_success = SetClDeviceId();
@@ -53,12 +55,14 @@ bool CLEngine::SetPlatform() {
    return false;
  }
  /**For clarity, choose the first available platform. */
+  LOG(paddle_mobile::kNO_LOG) << "numPlatforms: " << numPlatforms;
  if (numPlatforms > 0) {
    cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
        malloc(numPlatforms * sizeof(cl_platform_id)));
    status = clGetPlatformIDs(numPlatforms, platforms, NULL);
    platform_ = platforms[0];
    free(platforms);
+    LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
    return status == CL_SUCCESS;
  }
@@ -67,70 +71,21 @@ bool CLEngine::SetPlatform() {
 bool CLEngine::SetClDeviceId() {
  cl_uint numDevices = 0;
-  devices_ = NULL;
+  LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
  cl_int status =
      clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
  if (status != CL_SUCCESS) {
    return false;
  }
+  LOG(paddle_mobile::kNO_LOG) << "numDevices: " << numDevices;
  if (numDevices > 0) {
-    devices_ = reinterpret_cast<cl_device_id *>(
-        malloc(numDevices * sizeof(cl_device_id)));
    status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_,
                            NULL);
+    LOG(paddle_mobile::kNO_LOG) << "devices_[0]" << devices_[0];
    return status == CL_SUCCESS;
  }
  return false;
 }
-// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
-//    const std::string &kernel_name) {
-//  std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
-//      clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
-//  return std::move(kernel);
-//}
-//
-// bool CLEngine::SetClCommandQueue() {
-//  cl_int status;
-//  command_queue_.reset(
-//          clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
-//  return true;
-//}
-// bool CLEngine::SetClContext() {
-//  context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
-//  return true;
-//}
-// bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
-//  size_t size;
-//  char *str;
-//  std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
-//
-//  if (!f.is_open()) {
-//    return false;
-//  }
-//
-//  size_t fileSize;
-//  f.seekg(0, std::fstream::end);
-//  size = fileSize = (size_t)f.tellg();
-//  f.seekg(0, std::fstream::beg);
-//  str = new char[size + 1];
-//  if (!str) {
-//    f.close();
-//    return 0;
-//  }
-//
-//  f.read(str, fileSize);
-//  f.close();
-//  str[size] = '\0';
-//  const char *source = str;
-//  size_t sourceSize[] = {strlen(source)};
-//  program_.reset(
-//      clCreateProgramWithSource(context_.get(), 1, &source, sourceSize,
-//      NULL));
-//  return true;
-//}
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/mobile/src/framework/cl/cl_engine.h
+++ b/mobile/src/framework/cl/cl_engine.h
@@ -57,19 +57,27 @@ class CLLocalWorkSizeInfo {
  // max number of work-items in local_work_size in dim 2
  size_t max_work_item_size2;
 };
+inline void ctx_info(const char *errinfo, const void *private_info, size_t cb,
+                     void *user_data) {
+  fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
+}
 class CLEngine {
 public:
  static CLEngine *Instance();
  bool Init();
  bool isInitSuccess();
-  std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
+  std::shared_ptr<_cl_context> CreateContext() {
+    DLOG << "CreateContext ---";
+    DLOG << "platform: " << platform_;
+    DLOG << "devices_[0]: " << devices_[0];
    cl_int status;
-    cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
+    cl_context c = clCreateContext(NULL, 1, devices_, &ctx_info, NULL, &status);
-    std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
+    std::shared_ptr<_cl_context> context(c, CLContextDeleter());
    CL_CHECK_ERRORS(status);
-    return std::move(context_ptr);
+    return std::move(context);
  }
  std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue(
@@ -84,14 +92,14 @@ class CLEngine {
  }
  cl_context getContext() {
-    if (context_ == nullptr) {
+    if (context_.get() == nullptr) {
      context_ = CreateContext();
    }
    return context_.get();
  }
  cl_command_queue getClCommandQueue() {
-    if (command_queue_ == nullptr) {
+    if (command_queue_.get() == nullptr) {
      command_queue_ = CreateClCommandQueue(getContext());
    }
    return command_queue_.get();
@@ -216,11 +224,7 @@ class CLEngine {
      DLOG << " program build error: " << log;
    }
-    if (status == CL_SUCCESS) {
+    return status == CL_SUCCESS;
-      return true;
-    } else {
-      return false;
-    }
  }
  cl_device_id DeviceID(int index = 0) { return devices_[index]; }
@@ -239,28 +243,13 @@ class CLEngine {
  CLLocalWorkSizeInfo localWorkSizeInfo_;
-  cl_platform_id platform_;
-  cl_device_id *devices_;
  cl_int status_;
  std::string cl_path_;
-  std::unique_ptr<_cl_program, CLProgramDeleter> program_;
-  std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr;
-  std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ =
-      nullptr;
-  //  bool SetClContext();
-  //  bool SetClCommandQueue();
-  //  bool LoadKernelFromFile(const char *kernel_file);
-  //  bool BuildProgram();
  bool is_init_success_ = false;
+  std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
+  std::shared_ptr<_cl_context> context_;
+  cl_device_id devices_[10];
+  cl_platform_id platform_;
 };
 }  // namespace framework

--- a/mobile/src/framework/cl/cl_image.h
+++ b/mobile/src/framework/cl/cl_image.h
@@ -211,7 +211,7 @@ class CLImage {
                              real_image_dims_[1] >= image_dims_[1],
                          "real image is not enough!");
    if (cl_image_ != src.cl_image_) {
-      cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
+      cl_image_ = src.cl_image_;
    }
    tensor_dims_ = need_dims;
@@ -233,18 +233,6 @@ class CLImage {
    InitCLImage(context, command_queue, converter);
  }
-  /*! The internal of two tensors share the same memory block. */
-  inline CLImage &ShareHolderWith(const CLImage &src) {
-    PADDLE_MOBILE_ENFORCE(
-        src.cl_image_ != nullptr,
-        "Tensor holds no memory. Call Tensor::mutable_data first.")
-    if (cl_image_ != src.cl_image_) {
-      cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
-    }
-    return *this;
-  }
  cl_mem GetCLImage() const { return cl_image_.get(); }
  const DDim &ImageDims() const { return image_dims_; }

--- a/mobile/src/framework/cl/cl_scope.h
+++ b/mobile/src/framework/cl/cl_scope.h
@@ -35,14 +35,11 @@ namespace framework {
 class CLScope {
 public:
-  CLScope() {
+  CLScope() {}
-    CLEngine *engine = CLEngine::Instance();
-    context_ = engine->getContext();
-    command_queue_ = engine->getClCommandQueue();
-    localWorkSizeInfo_ = engine->getLocalWorkSizeInfo();
-  }
-  cl_command_queue CommandQueue() { return command_queue_; }
+  cl_command_queue CommandQueue() {
+    return CLEngine::Instance()->getClCommandQueue();
+  }
  std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
      const std::string &kernel_name, const std::string &file_name,
@@ -58,7 +55,7 @@ class CLScope {
    return std::move(kernel);
  }
-  cl_context Context() { return context_; }
+  cl_context Context() { return CLEngine::Instance()->getContext(); }
  cl_program Program(const std::string &file_name,
                     const std::string &kernel_name,
@@ -79,7 +76,7 @@ class CLScope {
      std::string header(header_it->second.begin(), header_it->second.end());
      source = header + "\n" + source;
      auto program = CLEngine::Instance()->CreateProgramWithSource(
-          context_, source.c_str());
+          CLEngine::Instance()->getContext(), source.c_str());
      LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key
                       << " --- ";
@@ -99,7 +96,7 @@ class CLScope {
        return it->second.get();
      }
      auto program = CLEngine::Instance()->CreateProgramWith(
-          context_,
+          CLEngine::Instance()->getContext(),
          CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
      LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key
@@ -113,7 +110,9 @@ class CLScope {
    }
  }
-  CLLocalWorkSizeInfo LocalWorkSizeInfo() { return localWorkSizeInfo_; }
+  CLLocalWorkSizeInfo LocalWorkSizeInfo() {
+    return CLEngine::Instance()->getLocalWorkSizeInfo();
+  }
  size_t KernelWorkSize(cl_kernel kernel) {
    size_t kernel_work_size = CLEngine::Instance()->GetKernelWorkSize(kernel);
    return kernel_work_size;
@@ -121,12 +120,9 @@ class CLScope {
 private:
  cl_int status_;
-  cl_context context_;
-  cl_command_queue command_queue_;
  std::unordered_map<std::string,
                     std::unique_ptr<_cl_program, CLProgramDeleter>>
      programs_;
-  CLLocalWorkSizeInfo localWorkSizeInfo_;
 };
 }  // namespace framework

--- a/mobile/src/io/opencl_interface.cpp
+++ b/mobile/src/io/opencl_interface.cpp
@@ -28,8 +28,26 @@ cl_command_queue getClCommandQueue() {
 }
 bool isInitSuccess() {
+  prepareOpenclRuntime();
  return framework::CLEngine::Instance()->isInitSuccess();
 }
+bool prepareOpenclRuntime() {
+#ifdef PREPARE_OPENCL_RUNTIME
+  DLOG << "cl runtime prepared. ";
+  cl_uint numPlatforms;  // the NO. of platforms
+  cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
+  if (status == CL_SUCCESS) {
+    if (numPlatforms > 0) {
+      cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
+          malloc(numPlatforms * sizeof(cl_platform_id)));
+      status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+      free(platforms);
+    }
+  }
+#endif
+  return true;
+}
 }  // namespace paddle_mobile
 #endif
--- a/mobile/src/io/opencl_interface.h
+++ b/mobile/src/io/opencl_interface.h
@@ -21,6 +21,7 @@ namespace paddle_mobile {
 cl_context getContext();
 cl_command_queue getClCommandQueue();
 bool isInitSuccess();
+bool prepareOpenclRuntime();
 }  // namespace paddle_mobile

--- a/mobile/src/io/paddle_mobile.h
+++ b/mobile/src/io/paddle_mobile.h
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "io/paddle_inference_api.h"
 #ifdef PADDLE_MOBILE_CL
 #include "framework/cl/cl_engine.h"
+#include "io/opencl_interface.h"
 #endif
 namespace paddle_mobile {
@@ -34,16 +35,24 @@ template <typename Device, typename T = float>
 class PaddleMobile {
 public:
  explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
-#ifndef PADDLE_MOBILE_CL
    bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
+#ifndef PADDLE_MOBILE_CL
    PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
+#else
+    if (is_gpu) {
+      prepareOpenclRuntime();
+    }
 #endif
  }
  PaddleMobile() {
-#ifndef PADDLE_MOBILE_CL
    bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
+#ifndef PADDLE_MOBILE_CL
    PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
+#else
+    if (is_gpu) {  // recheck when run cpu in with opencl.
+      prepareOpenclRuntime();
+    }
 #endif
  }
  virtual ~PaddleMobile() { Clear(); }