未验证 提交 6135fd4a 编写于 作者: X xiebaiyuan 提交者: GitHub

[mobile][opencl]opt cl mem (#2736)

* [mobile][opencl] fix a leak in clengine , fix loader.load's seg ,move contest commendqueue's prt outof clscope ,test=mobile

* [mobile][opencl] suite cpu mode... ,test=mobile

* [mobile][opencl] shard ptr with others in cl image ,test=mobile
上级 08afd3aa
......@@ -15,45 +15,51 @@ limitations under the License. */
#pragma once
#include "CL/cl.h"
#include "common/log.h"
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
const cl_int status = clReleaseKernel(clKernelObj);
LOG(paddle_mobile::kNO_LOG) << "clReleaseKernel status: " << status;
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
const cl_int status = clReleaseMemObject(clMemObj);
LOG(paddle_mobile::kNO_LOG) << "CLMemDeleter status: " << status;
}
};
struct CLEventDeleter {
template <class T>
void operator()(T *clEventObj) {
clReleaseEvent(clEventObj);
const cl_int status = clReleaseEvent(clEventObj);
LOG(paddle_mobile::kNO_LOG) << "CLEventDeleter status: " << status;
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
const cl_int status = clReleaseCommandQueue(clQueueObj);
LOG(paddle_mobile::kNO_LOG) << "CLCommQueueDeleter status: " << status;
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
const cl_int status = clReleaseContext(clContextObj);
LOG(paddle_mobile::kNO_LOG) << "CLContextDeleter status: " << status;
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
const cl_int status = clReleaseProgram(clProgramObj);
LOG(paddle_mobile::kNO_LOG) << "CLProgramDeleter status: " << status;
}
};
......@@ -23,9 +23,11 @@ namespace paddle_mobile {
namespace framework {
bool CLEngine::Init() {
LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init()";
if (initialized_) {
return true;
}
LOG(paddle_mobile::kNO_LOG) << "CLEngine::Init() ...";
cl_int status;
bool is_setplatform_success = SetPlatform();
bool is_setcldeviceid_success = SetClDeviceId();
......@@ -53,12 +55,14 @@ bool CLEngine::SetPlatform() {
return false;
}
/**For clarity, choose the first available platform. */
LOG(paddle_mobile::kNO_LOG) << "numPlatforms: " << numPlatforms;
if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
malloc(numPlatforms * sizeof(cl_platform_id)));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform_ = platforms[0];
free(platforms);
LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
return status == CL_SUCCESS;
}
......@@ -67,70 +71,21 @@ bool CLEngine::SetPlatform() {
bool CLEngine::SetClDeviceId() {
cl_uint numDevices = 0;
devices_ = NULL;
LOG(paddle_mobile::kNO_LOG) << "platform: " << platform_;
cl_int status =
clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (status != CL_SUCCESS) {
return false;
}
LOG(paddle_mobile::kNO_LOG) << "numDevices: " << numDevices;
if (numDevices > 0) {
devices_ = reinterpret_cast<cl_device_id *>(
malloc(numDevices * sizeof(cl_device_id)));
status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_,
NULL);
LOG(paddle_mobile::kNO_LOG) << "devices_[0]" << devices_[0];
return status == CL_SUCCESS;
}
return false;
}
// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// const std::string &kernel_name) {
// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
// clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
// return std::move(kernel);
//}
//
// bool CLEngine::SetClCommandQueue() {
// cl_int status;
// command_queue_.reset(
// clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
// return true;
//}
// bool CLEngine::SetClContext() {
// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
// return true;
//}
// bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// size_t size;
// char *str;
// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
//
// if (!f.is_open()) {
// return false;
// }
//
// size_t fileSize;
// f.seekg(0, std::fstream::end);
// size = fileSize = (size_t)f.tellg();
// f.seekg(0, std::fstream::beg);
// str = new char[size + 1];
// if (!str) {
// f.close();
// return 0;
// }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize,
// NULL));
// return true;
//}
} // namespace framework
} // namespace paddle_mobile
......@@ -57,19 +57,27 @@ class CLLocalWorkSizeInfo {
// max number of work-items in local_work_size in dim 2
size_t max_work_item_size2;
};
inline void ctx_info(const char *errinfo, const void *private_info, size_t cb,
void *user_data) {
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
class CLEngine {
public:
static CLEngine *Instance();
bool Init();
bool isInitSuccess();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
std::shared_ptr<_cl_context> CreateContext() {
DLOG << "CreateContext ---";
DLOG << "platform: " << platform_;
DLOG << "devices_[0]: " << devices_[0];
cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
cl_context c = clCreateContext(NULL, 1, devices_, &ctx_info, NULL, &status);
std::shared_ptr<_cl_context> context(c, CLContextDeleter());
CL_CHECK_ERRORS(status);
return std::move(context_ptr);
return std::move(context);
}
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue(
......@@ -84,14 +92,14 @@ class CLEngine {
}
cl_context getContext() {
if (context_ == nullptr) {
if (context_.get() == nullptr) {
context_ = CreateContext();
}
return context_.get();
}
cl_command_queue getClCommandQueue() {
if (command_queue_ == nullptr) {
if (command_queue_.get() == nullptr) {
command_queue_ = CreateClCommandQueue(getContext());
}
return command_queue_.get();
......@@ -216,11 +224,7 @@ class CLEngine {
DLOG << " program build error: " << log;
}
if (status == CL_SUCCESS) {
return true;
} else {
return false;
}
return status == CL_SUCCESS;
}
cl_device_id DeviceID(int index = 0) { return devices_[index]; }
......@@ -239,28 +243,13 @@ class CLEngine {
CLLocalWorkSizeInfo localWorkSizeInfo_;
cl_platform_id platform_;
cl_device_id *devices_;
cl_int status_;
std::string cl_path_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ =
nullptr;
// bool SetClContext();
// bool SetClCommandQueue();
// bool LoadKernelFromFile(const char *kernel_file);
// bool BuildProgram();
bool is_init_success_ = false;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::shared_ptr<_cl_context> context_;
cl_device_id devices_[10];
cl_platform_id platform_;
};
} // namespace framework
......
......@@ -211,7 +211,7 @@ class CLImage {
real_image_dims_[1] >= image_dims_[1],
"real image is not enough!");
if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
cl_image_ = src.cl_image_;
}
tensor_dims_ = need_dims;
......@@ -233,18 +233,6 @@ class CLImage {
InitCLImage(context, command_queue, converter);
}
/*! The internal of two tensors share the same memory block. */
inline CLImage &ShareHolderWith(const CLImage &src) {
PADDLE_MOBILE_ENFORCE(
src.cl_image_ != nullptr,
"Tensor holds no memory. Call Tensor::mutable_data first.")
if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get(), CLMemDeleter());
}
return *this;
}
cl_mem GetCLImage() const { return cl_image_.get(); }
const DDim &ImageDims() const { return image_dims_; }
......
......@@ -35,14 +35,11 @@ namespace framework {
class CLScope {
public:
CLScope() {
CLEngine *engine = CLEngine::Instance();
context_ = engine->getContext();
command_queue_ = engine->getClCommandQueue();
localWorkSizeInfo_ = engine->getLocalWorkSizeInfo();
}
CLScope() {}
cl_command_queue CommandQueue() { return command_queue_; }
cl_command_queue CommandQueue() {
return CLEngine::Instance()->getClCommandQueue();
}
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name,
......@@ -58,7 +55,7 @@ class CLScope {
return std::move(kernel);
}
cl_context Context() { return context_; }
cl_context Context() { return CLEngine::Instance()->getContext(); }
cl_program Program(const std::string &file_name,
const std::string &kernel_name,
......@@ -79,7 +76,7 @@ class CLScope {
std::string header(header_it->second.begin(), header_it->second.end());
source = header + "\n" + source;
auto program = CLEngine::Instance()->CreateProgramWithSource(
context_, source.c_str());
CLEngine::Instance()->getContext(), source.c_str());
LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key
<< " --- ";
......@@ -99,7 +96,7 @@ class CLScope {
return it->second.get();
}
auto program = CLEngine::Instance()->CreateProgramWith(
context_,
CLEngine::Instance()->getContext(),
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key
......@@ -113,7 +110,9 @@ class CLScope {
}
}
CLLocalWorkSizeInfo LocalWorkSizeInfo() { return localWorkSizeInfo_; }
CLLocalWorkSizeInfo LocalWorkSizeInfo() {
return CLEngine::Instance()->getLocalWorkSizeInfo();
}
size_t KernelWorkSize(cl_kernel kernel) {
size_t kernel_work_size = CLEngine::Instance()->GetKernelWorkSize(kernel);
return kernel_work_size;
......@@ -121,12 +120,9 @@ class CLScope {
private:
cl_int status_;
cl_context context_;
cl_command_queue command_queue_;
std::unordered_map<std::string,
std::unique_ptr<_cl_program, CLProgramDeleter>>
programs_;
CLLocalWorkSizeInfo localWorkSizeInfo_;
};
} // namespace framework
......
......@@ -28,8 +28,26 @@ cl_command_queue getClCommandQueue() {
}
bool isInitSuccess() {
prepareOpenclRuntime();
return framework::CLEngine::Instance()->isInitSuccess();
}
bool prepareOpenclRuntime() {
#ifdef PREPARE_OPENCL_RUNTIME
DLOG << "cl runtime prepared. ";
cl_uint numPlatforms; // the NO. of platforms
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status == CL_SUCCESS) {
if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
malloc(numPlatforms * sizeof(cl_platform_id)));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
free(platforms);
}
}
#endif
return true;
}
} // namespace paddle_mobile
#endif
......@@ -21,6 +21,7 @@ namespace paddle_mobile {
cl_context getContext();
cl_command_queue getClCommandQueue();
bool isInitSuccess();
bool prepareOpenclRuntime();
} // namespace paddle_mobile
......
......@@ -26,6 +26,7 @@ limitations under the License. */
#include "io/paddle_inference_api.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_engine.h"
#include "io/opencl_interface.h"
#endif
namespace paddle_mobile {
......@@ -34,16 +35,24 @@ template <typename Device, typename T = float>
class PaddleMobile {
public:
explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
#ifndef PADDLE_MOBILE_CL
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#else
if (is_gpu) {
prepareOpenclRuntime();
}
#endif
}
PaddleMobile() {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
#ifndef PADDLE_MOBILE_CL
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#else
if (is_gpu) { // recheck when run cpu in with opencl.
prepareOpenclRuntime();
}
#endif
}
virtual ~PaddleMobile() { Clear(); }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册