From 6135fd4a79d2f757c54124e794088fe2fa294c56 Mon Sep 17 00:00:00 2001 From: xiebaiyuan Date: Wed, 8 Jan 2020 13:48:48 +0800 Subject: [PATCH] [mobile][opencl]opt cl mem (#2736) * [mobile][opencl] fix a leak in clengine , fix loader.load's seg ,move contest commendqueue's prt outof clscope ,test=mobile * [mobile][opencl] suite cpu mode... ,test=mobile * [mobile][opencl] shard ptr with others in cl image ,test=mobile --- mobile/src/framework/cl/cl_deleter.h | 20 ++++++--- mobile/src/framework/cl/cl_engine.cpp | 61 ++++----------------------- mobile/src/framework/cl/cl_engine.h | 51 +++++++++------------- mobile/src/framework/cl/cl_image.h | 14 +----- mobile/src/framework/cl/cl_scope.h | 24 +++++------ mobile/src/io/opencl_interface.cpp | 18 ++++++++ mobile/src/io/opencl_interface.h | 1 + mobile/src/io/paddle_mobile.h | 13 +++++- 8 files changed, 82 insertions(+), 120 deletions(-) diff --git a/mobile/src/framework/cl/cl_deleter.h b/mobile/src/framework/cl/cl_deleter.h index 55af631174..731e5de663 100644 --- a/mobile/src/framework/cl/cl_deleter.h +++ b/mobile/src/framework/cl/cl_deleter.h @@ -15,45 +15,51 @@ limitations under the License. */ #pragma once #include "CL/cl.h" - +#include "common/log.h" struct CLKernelDeleter { template void operator()(T *clKernelObj) { - clReleaseKernel(clKernelObj); + const cl_int status = clReleaseKernel(clKernelObj); + LOG(paddle_mobile::kNO_LOG) << "clReleaseKernel status: " << status; } }; struct CLMemDeleter { template void operator()(T *clMemObj) { - clReleaseMemObject(clMemObj); + const cl_int status = clReleaseMemObject(clMemObj); + LOG(paddle_mobile::kNO_LOG) << "CLMemDeleter status: " << status; } }; struct CLEventDeleter { template void operator()(T *clEventObj) { - clReleaseEvent(clEventObj); + const cl_int status = clReleaseEvent(clEventObj); + LOG(paddle_mobile::kNO_LOG) << "CLEventDeleter status: " << status; } }; struct CLCommQueueDeleter { template void operator()(T *clQueueObj) { - clReleaseCommandQueue(clQueueObj); + const cl_int status = clReleaseCommandQueue(clQueueObj); + LOG(paddle_mobile::kNO_LOG) << "CLCommQueueDeleter status: " << status; } }; struct CLContextDeleter { template void operator()(T *clContextObj) { - clReleaseContext(clContextObj); + const cl_int status = clReleaseContext(clContextObj); + LOG(paddle_mobile::kNO_LOG) << "CLContextDeleter status: " << status; } }; struct CLProgramDeleter { template void operator()(T *clProgramObj) { - clReleaseProgram(clProgramObj); + const cl_int status = clReleaseProgram(clProgramObj); + LOG(paddle_mobile::kNO_LOG) << "CLProgramDeleter status: " << status; } }; diff --git a/mobile/src/framework/cl/cl_engine.cpp b/mobile/src/framework/cl/cl_engine.cpp index c39ae00b00..e8a8361eac 100644 --- a/mobile/src/framework/cl/cl_engine.cpp +++ b/mobile/src/framework/cl/cl_engine.cpp @@ -23,9 +23,11 @@ namespace paddle_mobile { namespace framework { bool CLEngine::Init() { + LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init()"; if (initialized_) { return true; } + LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init() ..."; cl_int status; bool is_setplatform_success = SetPlatform(); bool is_setcldeviceid_success = SetClDeviceId(); @@ -53,12 +55,14 @@ bool CLEngine::SetPlatform() { return false; } /**For clarity, choose the first available platform. */ + LOG(paddle_mobile::kNO_LOG) << "numPlatforms: " << numPlatforms; if (numPlatforms > 0) { cl_platform_id *platforms = reinterpret_cast( malloc(numPlatforms * sizeof(cl_platform_id))); status = clGetPlatformIDs(numPlatforms, platforms, NULL); platform_ = platforms[0]; free(platforms); + LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_; return status == CL_SUCCESS; } @@ -67,70 +71,21 @@ bool CLEngine::SetPlatform() { bool CLEngine::SetClDeviceId() { cl_uint numDevices = 0; - devices_ = NULL; + LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_; cl_int status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); if (status != CL_SUCCESS) { return false; } + LOG(paddle_mobile::kNO_LOG) << "numDevices: " << numDevices; + if (numDevices > 0) { - devices_ = reinterpret_cast( - malloc(numDevices * sizeof(cl_device_id))); status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_, NULL); + LOG(paddle_mobile::kNO_LOG) << "devices_[0]" << devices_[0]; return status == CL_SUCCESS; } return false; } - -// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel( -// const std::string &kernel_name) { -// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel( -// clCreateKernel(program_.get(), kernel_name.c_str(), NULL)); -// return std::move(kernel); -//} -// -// bool CLEngine::SetClCommandQueue() { -// cl_int status; -// command_queue_.reset( -// clCreateCommandQueue(context_.get(), devices_[0], 0, &status)); -// return true; -//} - -// bool CLEngine::SetClContext() { -// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL)); -// return true; -//} - -// bool CLEngine::LoadKernelFromFile(const char *kernel_file) { -// size_t size; -// char *str; -// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary)); -// -// if (!f.is_open()) { -// return false; -// } -// -// size_t fileSize; -// f.seekg(0, std::fstream::end); -// size = fileSize = (size_t)f.tellg(); -// f.seekg(0, std::fstream::beg); -// str = new char[size + 1]; -// if (!str) { -// f.close(); -// return 0; -// } -// -// f.read(str, fileSize); -// f.close(); -// str[size] = '\0'; -// const char *source = str; -// size_t sourceSize[] = {strlen(source)}; -// program_.reset( -// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, -// NULL)); -// return true; -//} - } // namespace framework } // namespace paddle_mobile diff --git a/mobile/src/framework/cl/cl_engine.h b/mobile/src/framework/cl/cl_engine.h index cf758f8328..2a6362ebc0 100644 --- a/mobile/src/framework/cl/cl_engine.h +++ b/mobile/src/framework/cl/cl_engine.h @@ -57,19 +57,27 @@ class CLLocalWorkSizeInfo { // max number of work-items in local_work_size in dim 2 size_t max_work_item_size2; }; - +inline void ctx_info(const char *errinfo, const void *private_info, size_t cb, + void *user_data) { + fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo); +} class CLEngine { public: static CLEngine *Instance(); bool Init(); bool isInitSuccess(); - std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() { + + std::shared_ptr<_cl_context> CreateContext() { + DLOG << "CreateContext ---"; + DLOG << "platform: " << platform_; + DLOG << "devices_[0]: " << devices_[0]; + cl_int status; - cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status); - std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c); + cl_context c = clCreateContext(NULL, 1, devices_, &ctx_info, NULL, &status); + std::shared_ptr<_cl_context> context(c, CLContextDeleter()); CL_CHECK_ERRORS(status); - return std::move(context_ptr); + return std::move(context); } std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue( @@ -84,14 +92,14 @@ class CLEngine { } cl_context getContext() { - if (context_ == nullptr) { + if (context_.get() == nullptr) { context_ = CreateContext(); } return context_.get(); } cl_command_queue getClCommandQueue() { - if (command_queue_ == nullptr) { + if (command_queue_.get() == nullptr) { command_queue_ = CreateClCommandQueue(getContext()); } return command_queue_.get(); @@ -216,11 +224,7 @@ class CLEngine { DLOG << " program build error: " << log; } - if (status == CL_SUCCESS) { - return true; - } else { - return false; - } + return status == CL_SUCCESS; } cl_device_id DeviceID(int index = 0) { return devices_[index]; } @@ -239,28 +243,13 @@ class CLEngine { CLLocalWorkSizeInfo localWorkSizeInfo_; - cl_platform_id platform_; - - cl_device_id *devices_; - cl_int status_; - std::string cl_path_; - std::unique_ptr<_cl_program, CLProgramDeleter> program_; - - std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr; - - std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ = - nullptr; - - // bool SetClContext(); - - // bool SetClCommandQueue(); - - // bool LoadKernelFromFile(const char *kernel_file); - - // bool BuildProgram(); bool is_init_success_ = false; + std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_; + std::shared_ptr<_cl_context> context_; + cl_device_id devices_[10]; + cl_platform_id platform_; }; } // namespace framework diff --git a/mobile/src/framework/cl/cl_image.h b/mobile/src/framework/cl/cl_image.h index f891e41a6a..57656c3c6d 100644 --- a/mobile/src/framework/cl/cl_image.h +++ b/mobile/src/framework/cl/cl_image.h @@ -211,7 +211,7 @@ class CLImage { real_image_dims_[1] >= image_dims_[1], "real image is not enough!"); if (cl_image_ != src.cl_image_) { - cl_image_.reset(src.cl_image_.get(), CLMemDeleter()); + cl_image_ = src.cl_image_; } tensor_dims_ = need_dims; @@ -233,18 +233,6 @@ class CLImage { InitCLImage(context, command_queue, converter); } - /*! The internal of two tensors share the same memory block. */ - inline CLImage &ShareHolderWith(const CLImage &src) { - PADDLE_MOBILE_ENFORCE( - src.cl_image_ != nullptr, - "Tensor holds no memory. Call Tensor::mutable_data first.") - - if (cl_image_ != src.cl_image_) { - cl_image_.reset(src.cl_image_.get(), CLMemDeleter()); - } - return *this; - } - cl_mem GetCLImage() const { return cl_image_.get(); } const DDim &ImageDims() const { return image_dims_; } diff --git a/mobile/src/framework/cl/cl_scope.h b/mobile/src/framework/cl/cl_scope.h index 2555216564..49e705e5a0 100644 --- a/mobile/src/framework/cl/cl_scope.h +++ b/mobile/src/framework/cl/cl_scope.h @@ -35,14 +35,11 @@ namespace framework { class CLScope { public: - CLScope() { - CLEngine *engine = CLEngine::Instance(); - context_ = engine->getContext(); - command_queue_ = engine->getClCommandQueue(); - localWorkSizeInfo_ = engine->getLocalWorkSizeInfo(); - } + CLScope() {} - cl_command_queue CommandQueue() { return command_queue_; } + cl_command_queue CommandQueue() { + return CLEngine::Instance()->getClCommandQueue(); + } std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( const std::string &kernel_name, const std::string &file_name, @@ -58,7 +55,7 @@ class CLScope { return std::move(kernel); } - cl_context Context() { return context_; } + cl_context Context() { return CLEngine::Instance()->getContext(); } cl_program Program(const std::string &file_name, const std::string &kernel_name, @@ -79,7 +76,7 @@ class CLScope { std::string header(header_it->second.begin(), header_it->second.end()); source = header + "\n" + source; auto program = CLEngine::Instance()->CreateProgramWithSource( - context_, source.c_str()); + CLEngine::Instance()->getContext(), source.c_str()); LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key << " --- "; @@ -99,7 +96,7 @@ class CLScope { return it->second.get(); } auto program = CLEngine::Instance()->CreateProgramWith( - context_, + CLEngine::Instance()->getContext(), CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name); LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key @@ -113,7 +110,9 @@ class CLScope { } } - CLLocalWorkSizeInfo LocalWorkSizeInfo() { return localWorkSizeInfo_; } + CLLocalWorkSizeInfo LocalWorkSizeInfo() { + return CLEngine::Instance()->getLocalWorkSizeInfo(); + } size_t KernelWorkSize(cl_kernel kernel) { size_t kernel_work_size = CLEngine::Instance()->GetKernelWorkSize(kernel); return kernel_work_size; @@ -121,12 +120,9 @@ class CLScope { private: cl_int status_; - cl_context context_; - cl_command_queue command_queue_; std::unordered_map> programs_; - CLLocalWorkSizeInfo localWorkSizeInfo_; }; } // namespace framework diff --git a/mobile/src/io/opencl_interface.cpp b/mobile/src/io/opencl_interface.cpp index 1df5b48339..636cd1b760 100644 --- a/mobile/src/io/opencl_interface.cpp +++ b/mobile/src/io/opencl_interface.cpp @@ -28,8 +28,26 @@ cl_command_queue getClCommandQueue() { } bool isInitSuccess() { + prepareOpenclRuntime(); return framework::CLEngine::Instance()->isInitSuccess(); } +bool prepareOpenclRuntime() { +#ifdef PREPARE_OPENCL_RUNTIME + DLOG << "cl runtime prepared. "; + cl_uint numPlatforms; // the NO. of platforms + cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); + if (status == CL_SUCCESS) { + if (numPlatforms > 0) { + cl_platform_id *platforms = reinterpret_cast( + malloc(numPlatforms * sizeof(cl_platform_id))); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + free(platforms); + } + } +#endif + return true; +} + } // namespace paddle_mobile #endif diff --git a/mobile/src/io/opencl_interface.h b/mobile/src/io/opencl_interface.h index f1039f1373..6a3608790a 100644 --- a/mobile/src/io/opencl_interface.h +++ b/mobile/src/io/opencl_interface.h @@ -21,6 +21,7 @@ namespace paddle_mobile { cl_context getContext(); cl_command_queue getClCommandQueue(); bool isInitSuccess(); +bool prepareOpenclRuntime(); } // namespace paddle_mobile diff --git a/mobile/src/io/paddle_mobile.h b/mobile/src/io/paddle_mobile.h index 8b8f0683ab..8c40b0696a 100644 --- a/mobile/src/io/paddle_mobile.h +++ b/mobile/src/io/paddle_mobile.h @@ -26,6 +26,7 @@ limitations under the License. */ #include "io/paddle_inference_api.h" #ifdef PADDLE_MOBILE_CL #include "framework/cl/cl_engine.h" +#include "io/opencl_interface.h" #endif namespace paddle_mobile { @@ -34,16 +35,24 @@ template class PaddleMobile { public: explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) { -#ifndef PADDLE_MOBILE_CL bool is_gpu = std::is_same, Device>::value; +#ifndef PADDLE_MOBILE_CL PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); +#else + if (is_gpu) { + prepareOpenclRuntime(); + } #endif } PaddleMobile() { -#ifndef PADDLE_MOBILE_CL bool is_gpu = std::is_same, Device>::value; +#ifndef PADDLE_MOBILE_CL PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); +#else + if (is_gpu) { // recheck when run cpu in with opencl. + prepareOpenclRuntime(); + } #endif } virtual ~PaddleMobile() { Clear(); } -- GitLab