未验证 提交 6135fd4a 编写于 作者: X xiebaiyuan 提交者: GitHub

[mobile][opencl]opt cl mem (#2736)

* [mobile][opencl] fix a leak in clengine , fix loader.load's seg ,move contest commendqueue's prt outof clscope ,test=mobile

* [mobile][opencl] suite cpu mode... ,test=mobile

* [mobile][opencl] shard ptr with others in cl image ,test=mobile
上级 08afd3aa
...@@ -15,45 +15,51 @@ limitations under the License. */ ...@@ -15,45 +15,51 @@ limitations under the License. */
#pragma once #pragma once
#include "CL/cl.h" #include "CL/cl.h"
#include "common/log.h"
struct CLKernelDeleter { struct CLKernelDeleter {
template <class T> template <class T>
void operator()(T *clKernelObj) { void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj); const cl_int status = clReleaseKernel(clKernelObj);
LOG(paddle_mobile::kNO_LOG) << "clReleaseKernel status: " << status;
} }
}; };
struct CLMemDeleter { struct CLMemDeleter {
template <class T> template <class T>
void operator()(T *clMemObj) { void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj); const cl_int status = clReleaseMemObject(clMemObj);
LOG(paddle_mobile::kNO_LOG) << "CLMemDeleter status: " << status;
} }
}; };
struct CLEventDeleter { struct CLEventDeleter {
template <class T> template <class T>
void operator()(T *clEventObj) { void operator()(T *clEventObj) {
clReleaseEvent(clEventObj); const cl_int status = clReleaseEvent(clEventObj);
LOG(paddle_mobile::kNO_LOG) << "CLEventDeleter status: " << status;
} }
}; };
struct CLCommQueueDeleter { struct CLCommQueueDeleter {
template <class T> template <class T>
void operator()(T *clQueueObj) { void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj); const cl_int status = clReleaseCommandQueue(clQueueObj);
LOG(paddle_mobile::kNO_LOG) << "CLCommQueueDeleter status: " << status;
} }
}; };
struct CLContextDeleter { struct CLContextDeleter {
template <class T> template <class T>
void operator()(T *clContextObj) { void operator()(T *clContextObj) {
clReleaseContext(clContextObj); const cl_int status = clReleaseContext(clContextObj);
LOG(paddle_mobile::kNO_LOG) << "CLContextDeleter status: " << status;
} }
}; };
struct CLProgramDeleter { struct CLProgramDeleter {
template <class T> template <class T>
void operator()(T *clProgramObj) { void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj); const cl_int status = clReleaseProgram(clProgramObj);
LOG(paddle_mobile::kNO_LOG) << "CLProgramDeleter status: " << status;
} }
}; };
...@@ -23,9 +23,11 @@ namespace paddle_mobile { ...@@ -23,9 +23,11 @@ namespace paddle_mobile {
namespace framework { namespace framework {
bool CLEngine::Init() { bool CLEngine::Init() {
LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init()";
if (initialized_) { if (initialized_) {
return true; return true;
} }
LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init() ...";
cl_int status; cl_int status;
bool is_setplatform_success = SetPlatform(); bool is_setplatform_success = SetPlatform();
bool is_setcldeviceid_success = SetClDeviceId(); bool is_setcldeviceid_success = SetClDeviceId();
...@@ -53,12 +55,14 @@ bool CLEngine::SetPlatform() { ...@@ -53,12 +55,14 @@ bool CLEngine::SetPlatform() {
return false; return false;
} }
/**For clarity, choose the first available platform. */ /**For clarity, choose the first available platform. */
LOG(paddle_mobile::kNO_LOG) << "numPlatforms: " << numPlatforms;
if (numPlatforms > 0) { if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>( cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
malloc(numPlatforms * sizeof(cl_platform_id))); malloc(numPlatforms * sizeof(cl_platform_id)));
status = clGetPlatformIDs(numPlatforms, platforms, NULL); status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform_ = platforms[0]; platform_ = platforms[0];
free(platforms); free(platforms);
LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
return status == CL_SUCCESS; return status == CL_SUCCESS;
} }
...@@ -67,70 +71,21 @@ bool CLEngine::SetPlatform() { ...@@ -67,70 +71,21 @@ bool CLEngine::SetPlatform() {
bool CLEngine::SetClDeviceId() { bool CLEngine::SetClDeviceId() {
cl_uint numDevices = 0; cl_uint numDevices = 0;
devices_ = NULL; LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
cl_int status = cl_int status =
clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
return false; return false;
} }
LOG(paddle_mobile::kNO_LOG) << "numDevices: " << numDevices;
if (numDevices > 0) { if (numDevices > 0) {
devices_ = reinterpret_cast<cl_device_id *>(
malloc(numDevices * sizeof(cl_device_id)));
status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_, status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_,
NULL); NULL);
LOG(paddle_mobile::kNO_LOG) << "devices_[0]" << devices_[0];
return status == CL_SUCCESS; return status == CL_SUCCESS;
} }
return false; return false;
} }
// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// const std::string &kernel_name) {
// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
// clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
// return std::move(kernel);
//}
//
// bool CLEngine::SetClCommandQueue() {
// cl_int status;
// command_queue_.reset(
// clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
// return true;
//}
// bool CLEngine::SetClContext() {
// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
// return true;
//}
// bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// size_t size;
// char *str;
// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
//
// if (!f.is_open()) {
// return false;
// }
//
// size_t fileSize;
// f.seekg(0, std::fstream::end);
// size = fileSize = (size_t)f.tellg();
// f.seekg(0, std::fstream::beg);
// str = new char[size + 1];
// if (!str) {
// f.close();
// return 0;
// }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize,
// NULL));
// return true;
//}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -57,19 +57,27 @@ class CLLocalWorkSizeInfo { ...@@ -57,19 +57,27 @@ class CLLocalWorkSizeInfo {
// max number of work-items in local_work_size in dim 2 // max number of work-items in local_work_size in dim 2
size_t max_work_item_size2; size_t max_work_item_size2;
}; };
inline void ctx_info(const char *errinfo, const void *private_info, size_t cb,
void *user_data) {
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
class CLEngine { class CLEngine {
public: public:
static CLEngine *Instance(); static CLEngine *Instance();
bool Init(); bool Init();
bool isInitSuccess(); bool isInitSuccess();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
std::shared_ptr<_cl_context> CreateContext() {
DLOG << "CreateContext ---";
DLOG << "platform: " << platform_;
DLOG << "devices_[0]: " << devices_[0];
cl_int status; cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status); cl_context c = clCreateContext(NULL, 1, devices_, &ctx_info, NULL, &status);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c); std::shared_ptr<_cl_context> context(c, CLContextDeleter());
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
return std::move(context_ptr); return std::move(context);
} }
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue( std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue(
...@@ -84,14 +92,14 @@ class CLEngine { ...@@ -84,14 +92,14 @@ class CLEngine {
} }
cl_context getContext() { cl_context getContext() {
if (context_ == nullptr) { if (context_.get() == nullptr) {
context_ = CreateContext(); context_ = CreateContext();
} }
return context_.get(); return context_.get();
} }
cl_command_queue getClCommandQueue() { cl_command_queue getClCommandQueue() {
if (command_queue_ == nullptr) { if (command_queue_.get() == nullptr) {
command_queue_ = CreateClCommandQueue(getContext()); command_queue_ = CreateClCommandQueue(getContext());
} }
return command_queue_.get(); return command_queue_.get();
...@@ -216,11 +224,7 @@ class CLEngine { ...@@ -216,11 +224,7 @@ class CLEngine {
DLOG << " program build error: " << log; DLOG << " program build error: " << log;
} }
if (status == CL_SUCCESS) { return status == CL_SUCCESS;
return true;
} else {
return false;
}
} }
cl_device_id DeviceID(int index = 0) { return devices_[index]; } cl_device_id DeviceID(int index = 0) { return devices_[index]; }
...@@ -239,28 +243,13 @@ class CLEngine { ...@@ -239,28 +243,13 @@ class CLEngine {
CLLocalWorkSizeInfo localWorkSizeInfo_; CLLocalWorkSizeInfo localWorkSizeInfo_;
cl_platform_id platform_;
cl_device_id *devices_;
cl_int status_; cl_int status_;
std::string cl_path_; std::string cl_path_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ =
nullptr;
// bool SetClContext();
// bool SetClCommandQueue();
// bool LoadKernelFromFile(const char *kernel_file);
// bool BuildProgram();
bool is_init_success_ = false; bool is_init_success_ = false;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::shared_ptr<_cl_context> context_;
cl_device_id devices_[10];
cl_platform_id platform_;
}; };
} // namespace framework } // namespace framework
......
...@@ -211,7 +211,7 @@ class CLImage { ...@@ -211,7 +211,7 @@ class CLImage {
real_image_dims_[1] >= image_dims_[1], real_image_dims_[1] >= image_dims_[1],
"real image is not enough!"); "real image is not enough!");
if (cl_image_ != src.cl_image_) { if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get(), CLMemDeleter()); cl_image_ = src.cl_image_;
} }
tensor_dims_ = need_dims; tensor_dims_ = need_dims;
...@@ -233,18 +233,6 @@ class CLImage { ...@@ -233,18 +233,6 @@ class CLImage {
InitCLImage(context, command_queue, converter); InitCLImage(context, command_queue, converter);
} }
/*! The internal of two tensors share the same memory block. */
inline CLImage &ShareHolderWith(const CLImage &src) {
PADDLE_MOBILE_ENFORCE(
src.cl_image_ != nullptr,
"Tensor holds no memory. Call Tensor::mutable_data first.")
if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
}
return *this;
}
cl_mem GetCLImage() const { return cl_image_.get(); } cl_mem GetCLImage() const { return cl_image_.get(); }
const DDim &ImageDims() const { return image_dims_; } const DDim &ImageDims() const { return image_dims_; }
......
...@@ -35,14 +35,11 @@ namespace framework { ...@@ -35,14 +35,11 @@ namespace framework {
class CLScope { class CLScope {
public: public:
CLScope() { CLScope() {}
CLEngine *engine = CLEngine::Instance();
context_ = engine->getContext();
command_queue_ = engine->getClCommandQueue();
localWorkSizeInfo_ = engine->getLocalWorkSizeInfo();
}
cl_command_queue CommandQueue() { return command_queue_; } cl_command_queue CommandQueue() {
return CLEngine::Instance()->getClCommandQueue();
}
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name, const std::string &kernel_name, const std::string &file_name,
...@@ -58,7 +55,7 @@ class CLScope { ...@@ -58,7 +55,7 @@ class CLScope {
return std::move(kernel); return std::move(kernel);
} }
cl_context Context() { return context_; } cl_context Context() { return CLEngine::Instance()->getContext(); }
cl_program Program(const std::string &file_name, cl_program Program(const std::string &file_name,
const std::string &kernel_name, const std::string &kernel_name,
...@@ -79,7 +76,7 @@ class CLScope { ...@@ -79,7 +76,7 @@ class CLScope {
std::string header(header_it->second.begin(), header_it->second.end()); std::string header(header_it->second.begin(), header_it->second.end());
source = header + "\n" + source; source = header + "\n" + source;
auto program = CLEngine::Instance()->CreateProgramWithSource( auto program = CLEngine::Instance()->CreateProgramWithSource(
context_, source.c_str()); CLEngine::Instance()->getContext(), source.c_str());
LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key
<< " --- "; << " --- ";
...@@ -99,7 +96,7 @@ class CLScope { ...@@ -99,7 +96,7 @@ class CLScope {
return it->second.get(); return it->second.get();
} }
auto program = CLEngine::Instance()->CreateProgramWith( auto program = CLEngine::Instance()->CreateProgramWith(
context_, CLEngine::Instance()->getContext(),
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name); CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key
...@@ -113,7 +110,9 @@ class CLScope { ...@@ -113,7 +110,9 @@ class CLScope {
} }
} }
CLLocalWorkSizeInfo LocalWorkSizeInfo() { return localWorkSizeInfo_; } CLLocalWorkSizeInfo LocalWorkSizeInfo() {
return CLEngine::Instance()->getLocalWorkSizeInfo();
}
size_t KernelWorkSize(cl_kernel kernel) { size_t KernelWorkSize(cl_kernel kernel) {
size_t kernel_work_size = CLEngine::Instance()->GetKernelWorkSize(kernel); size_t kernel_work_size = CLEngine::Instance()->GetKernelWorkSize(kernel);
return kernel_work_size; return kernel_work_size;
...@@ -121,12 +120,9 @@ class CLScope { ...@@ -121,12 +120,9 @@ class CLScope {
private: private:
cl_int status_; cl_int status_;
cl_context context_;
cl_command_queue command_queue_;
std::unordered_map<std::string, std::unordered_map<std::string,
std::unique_ptr<_cl_program, CLProgramDeleter>> std::unique_ptr<_cl_program, CLProgramDeleter>>
programs_; programs_;
CLLocalWorkSizeInfo localWorkSizeInfo_;
}; };
} // namespace framework } // namespace framework
......
...@@ -28,8 +28,26 @@ cl_command_queue getClCommandQueue() { ...@@ -28,8 +28,26 @@ cl_command_queue getClCommandQueue() {
} }
bool isInitSuccess() { bool isInitSuccess() {
prepareOpenclRuntime();
return framework::CLEngine::Instance()->isInitSuccess(); return framework::CLEngine::Instance()->isInitSuccess();
} }
bool prepareOpenclRuntime() {
#ifdef PREPARE_OPENCL_RUNTIME
DLOG << "cl runtime prepared. ";
cl_uint numPlatforms; // the NO. of platforms
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status == CL_SUCCESS) {
if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
malloc(numPlatforms * sizeof(cl_platform_id)));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
free(platforms);
}
}
#endif
return true;
}
} // namespace paddle_mobile } // namespace paddle_mobile
#endif #endif
...@@ -21,6 +21,7 @@ namespace paddle_mobile { ...@@ -21,6 +21,7 @@ namespace paddle_mobile {
cl_context getContext(); cl_context getContext();
cl_command_queue getClCommandQueue(); cl_command_queue getClCommandQueue();
bool isInitSuccess(); bool isInitSuccess();
bool prepareOpenclRuntime();
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
#include "io/paddle_inference_api.h" #include "io/paddle_inference_api.h"
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_engine.h" #include "framework/cl/cl_engine.h"
#include "io/opencl_interface.h"
#endif #endif
namespace paddle_mobile { namespace paddle_mobile {
...@@ -34,16 +35,24 @@ template <typename Device, typename T = float> ...@@ -34,16 +35,24 @@ template <typename Device, typename T = float>
class PaddleMobile { class PaddleMobile {
public: public:
explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) { explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
#ifndef PADDLE_MOBILE_CL
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#else
if (is_gpu) {
prepareOpenclRuntime();
}
#endif #endif
} }
PaddleMobile() { PaddleMobile() {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
#ifndef PADDLE_MOBILE_CL
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#else
if (is_gpu) { // recheck when run cpu in with opencl.
prepareOpenclRuntime();
}
#endif #endif
} }
virtual ~PaddleMobile() { Clear(); } virtual ~PaddleMobile() { Clear(); }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册