未验证 提交 de299b9e 编写于 作者: Y ysh329 提交者: GitHub

[BugFix][OPENCL] Fix initalization sequence of opencl backend valid API. test=develop (#4003)

* fix opencl backend. test=develop
上级 1d0f70ae
...@@ -41,10 +41,26 @@ namespace lite_api { ...@@ -41,10 +41,26 @@ namespace lite_api {
bool IsOpenCLBackendValid() { bool IsOpenCLBackendValid() {
bool opencl_valid = false; bool opencl_valid = false;
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
#ifdef LITE_WITH_LOG
LOG(INFO) << "opencl_lib_found:" << opencl_lib_found;
#endif
if (opencl_lib_found == false) return false;
bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
#ifdef LITE_WITH_LOG
LOG(INFO) << "dlsym_success:" << dlsym_success;
#endif
if (dlsym_success == false) return false;
opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice(); opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice();
#endif #endif
#ifdef LITE_WITH_LOG
LOG(INFO) << "opencl_valid:" << opencl_valid; LOG(INFO) << "opencl_valid:" << opencl_valid;
#endif
return opencl_valid; return opencl_valid;
} }
......
...@@ -34,15 +34,20 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, ...@@ -34,15 +34,20 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
std::string program_key = program_key_ss.str(); std::string program_key = program_key_ss.str();
auto it = programs_.find(program_key); auto it = programs_.find(program_key);
if (it != programs_.end()) { if (it != programs_.end()) {
#ifdef LITE_WITH_LOG
VLOG(3) << " --- program -> " << program_key << " has been built --- "; VLOG(3) << " --- program -> " << program_key << " has been built --- ";
#endif
return *(it->second); return *(it->second);
} }
auto program = CLRuntime::Global()->CreateProgram(GetContext(), file_name); auto program = CLRuntime::Global()->CreateProgram(GetContext(), file_name);
#ifdef LITE_WITH_LOG
VLOG(3) << " --- begin build program -> " << program_key << " --- "; VLOG(3) << " --- begin build program -> " << program_key << " --- ";
#endif
CLRuntime::Global()->BuildProgram(program.get(), options); CLRuntime::Global()->BuildProgram(program.get(), options);
#ifdef LITE_WITH_LOG
VLOG(3) << " --- end build program -> " << program_key << " --- "; VLOG(3) << " --- end build program -> " << program_key << " --- ";
#endif
programs_[program_key] = std::move(program); programs_[program_key] = std::move(program);
...@@ -54,14 +59,20 @@ void CLContext::AddKernel(const std::string &kernel_name, ...@@ -54,14 +59,20 @@ void CLContext::AddKernel(const std::string &kernel_name,
const std::string &options, const std::string &options,
const std::string &time_stamp) { const std::string &time_stamp) {
cl_int status{CL_SUCCESS}; cl_int status{CL_SUCCESS};
#ifdef LITE_WITH_LOG
VLOG(3) << " --- to get program " << file_name << " --- "; VLOG(3) << " --- to get program " << file_name << " --- ";
#endif
auto program = GetProgram(file_name, options); auto program = GetProgram(file_name, options);
#ifdef LITE_WITH_LOG
VLOG(3) << " --- end get program --- "; VLOG(3) << " --- end get program --- ";
VLOG(3) << " --- to create kernel: " << kernel_name << " --- "; VLOG(3) << " --- to create kernel: " << kernel_name << " --- ";
#endif
std::shared_ptr<cl::Kernel> kernel( std::shared_ptr<cl::Kernel> kernel(
new cl::Kernel(program, kernel_name.c_str(), &status)); new cl::Kernel(program, kernel_name.c_str(), &status));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
#ifdef LITE_WITH_LOG
VLOG(3) << " --- end create kernel --- "; VLOG(3) << " --- end create kernel --- ";
#endif
kernels_.emplace_back(std::move(kernel)); kernels_.emplace_back(std::move(kernel));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_name << options << time_stamp; kernel_key << kernel_name << options << time_stamp;
...@@ -69,7 +80,9 @@ void CLContext::AddKernel(const std::string &kernel_name, ...@@ -69,7 +80,9 @@ void CLContext::AddKernel(const std::string &kernel_name,
} }
cl::Kernel &CLContext::GetKernel(const int index) { cl::Kernel &CLContext::GetKernel(const int index) {
#ifdef LITE_WITH_LOG
VLOG(3) << " --- kernel count: " << kernels_.size() << " --- "; VLOG(3) << " --- kernel count: " << kernels_.size() << " --- ";
#endif
CHECK(static_cast<size_t>(index) < kernels_.size()) CHECK(static_cast<size_t>(index) < kernels_.size())
<< "The index must be less than the size of kernels."; << "The index must be less than the size of kernels.";
CHECK(kernels_[index] != nullptr) CHECK(kernels_[index] != nullptr)
......
...@@ -25,6 +25,13 @@ CLRuntime* CLRuntime::Global() { ...@@ -25,6 +25,13 @@ CLRuntime* CLRuntime::Global() {
} }
CLRuntime::~CLRuntime() { CLRuntime::~CLRuntime() {
#ifdef LITE_WITH_LOG
LOG(INFO) << "is_cl_runtime_initialized_:" << is_cl_runtime_initialized_;
#endif
if (is_cl_runtime_initialized_ == false) {
return;
}
if (command_queue_ != nullptr) { if (command_queue_ != nullptr) {
command_queue_->flush(); command_queue_->flush();
command_queue_->finish(); command_queue_->finish();
...@@ -38,18 +45,53 @@ CLRuntime::~CLRuntime() { ...@@ -38,18 +45,53 @@ CLRuntime::~CLRuntime() {
} }
bool CLRuntime::Init() { bool CLRuntime::Init() {
#ifdef LITE_WITH_LOG
LOG(INFO) << "is_cl_runtime_initialized_:" << is_cl_runtime_initialized_;
#endif
if (is_cl_runtime_initialized_) { if (is_cl_runtime_initialized_) {
return true; return true;
} }
bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
#ifdef LITE_WITH_LOG
LOG(INFO) << "opencl_lib_found:" << opencl_lib_found;
#endif
if (opencl_lib_found == false) {
return false;
}
bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
#ifdef LITE_WITH_LOG
LOG(INFO) << "dlsym_success:" << dlsym_success;
#endif
if (dlsym_success == false) {
return false;
}
bool is_platform_init = InitializePlatform(); bool is_platform_init = InitializePlatform();
bool is_device_init = InitializeDevice(); #ifdef LITE_WITH_LOG
LOG(INFO) << "is_platform_init:" << is_platform_init; LOG(INFO) << "is_platform_init:" << is_platform_init;
#endif
if (is_platform_init == false) {
return false;
}
bool is_device_init = InitializeDevice();
#ifdef LITE_WITH_LOG
LOG(INFO) << "is_device_init:" << is_device_init; LOG(INFO) << "is_device_init:" << is_device_init;
#endif
if (is_device_init == false) {
return false;
}
if ((is_platform_init == true) && (is_device_init == true)) { if ((is_platform_init == true) && (is_device_init == true)) {
is_platform_device_init_success_ = true; is_platform_device_init_success_ = true;
context_ = CreateContext(); context_ = CreateContext();
command_queue_ = CreateCommandQueue(context()); command_queue_ = CreateCommandQueue(context());
is_cl_runtime_initialized_ = true; is_cl_runtime_initialized_ = true;
#ifdef LITE_WITH_LOG
LOG(INFO) << "set is_cl_runtime_initialized_ = true";
#endif
} }
return is_cl_runtime_initialized_; return is_cl_runtime_initialized_;
} }
...@@ -138,20 +180,24 @@ GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) { ...@@ -138,20 +180,24 @@ GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) {
const std::string kMALI_PATTERN_STR = "Mali"; const std::string kMALI_PATTERN_STR = "Mali";
const std::string kADRENO_PATTERN_STR = "QUALCOMM Adreno(TM)"; const std::string kADRENO_PATTERN_STR = "QUALCOMM Adreno(TM)";
const std::string kPOWERVR_PATTERN_STR = "PowerVR"; const std::string kPOWERVR_PATTERN_STR = "PowerVR";
std::string gpu_type_str = "";
if (device_name == kADRENO_PATTERN_STR) { if (device_name == kADRENO_PATTERN_STR) {
LOG(INFO) << "adreno gpu"; gpu_type_str = "adreno gpu";
return GpuType::QUALCOMM_ADRENO; return GpuType::QUALCOMM_ADRENO;
} else if (device_name.find(kMALI_PATTERN_STR) != std::string::npos) { } else if (device_name.find(kMALI_PATTERN_STR) != std::string::npos) {
LOG(INFO) << "mali gpu"; gpu_type_str = "mali gpu";
return GpuType::ARM_MALI; return GpuType::ARM_MALI;
} else if (device_name.find(kPOWERVR_PATTERN_STR) != std::string::npos) { } else if (device_name.find(kPOWERVR_PATTERN_STR) != std::string::npos) {
LOG(INFO) << "powerVR gpu"; gpu_type_str = "powerVR gpu";
return GpuType::IMAGINATION_POWERVR; return GpuType::IMAGINATION_POWERVR;
} else { } else {
LOG(INFO) << "others gpu"; gpu_type_str = "others gpu";
return GpuType::UNKNOWN; return GpuType::UNKNOWN;
} }
#ifdef LITE_WITH_LOG
LOG(INFO) << "gpu_type_str:" << gpu_type_str;
#endif
} }
bool CLRuntime::InitializeDevice() { bool CLRuntime::InitializeDevice() {
......
...@@ -70,24 +70,23 @@ class CLRuntime { ...@@ -70,24 +70,23 @@ class CLRuntime {
static CLRuntime* Global(); static CLRuntime* Global();
bool OpenCLAvaliableForDevice() { bool OpenCLAvaliableForDevice() {
bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound(); // note(ysh329): entered this func means:
LOG(INFO) << "opencl_lib_found:" << opencl_lib_found; // 1. opencl_lib_found must be true
if (opencl_lib_found == false) return false; // 2. dlsym_success must be true
bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
LOG(INFO) << "dlsym_success:" << dlsym_success;
if (opencl_lib_found == false) return false;
InitializeDevice(); InitializeDevice();
bool support_fp16 = bool support_fp16 =
static_cast<bool>(device_info_["CL_DEVICE_EXTENSIONS_FP16"]); static_cast<bool>(device_info_["CL_DEVICE_EXTENSIONS_FP16"]);
#ifdef LITE_WITH_LOG
LOG(INFO) << "support_fp16:" << support_fp16; LOG(INFO) << "support_fp16:" << support_fp16;
#endif
if (support_fp16 == false) return false; if (support_fp16 == false) return false;
is_device_avaliable_for_opencl_ = is_device_avaliable_for_opencl_ = support_fp16;
dlsym_success && opencl_lib_found && support_fp16; #ifdef LITE_WITH_LOG
LOG(INFO) << "is_device_avaliable_for_opencl_:" LOG(INFO) << "is_device_avaliable_for_opencl_:"
<< is_device_avaliable_for_opencl_; << is_device_avaliable_for_opencl_;
#endif
return is_device_avaliable_for_opencl_; return is_device_avaliable_for_opencl_;
} }
......
...@@ -347,18 +347,23 @@ class Context<TargetType::kX86> { ...@@ -347,18 +347,23 @@ class Context<TargetType::kX86> {
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
template <> template <>
class Context<TargetType::kOpenCL> { class Context<TargetType::kOpenCL> {
std::shared_ptr<CLContext> cl_context_; std::shared_ptr<CLContext> cl_context_{nullptr};
public: public:
CLContext* cl_context() { return cl_context_.get(); } CLContext* cl_context() { return cl_context_.get(); }
void InitOnce() { void InitOnce() {
// Init cl runtime. if (CLRuntime::Global()->IsInitSuccess() == false) {
CHECK(CLRuntime::Global()->IsInitSuccess()) << "OpenCL runtime init failed"; LOG(ERROR) << "OpenCL runtime init failed";
}
cl_context_ = std::make_shared<CLContext>(); cl_context_ = std::make_shared<CLContext>();
} }
void CopySharedTo(OpenCLContext* ctx) { ctx->cl_context_ = cl_context_; } void CopySharedTo(OpenCLContext* ctx) {
if (ctx && cl_context_) {
ctx->cl_context_ = cl_context_;
}
}
}; };
#endif #endif
......
...@@ -159,9 +159,12 @@ RuntimeProgram::RuntimeProgram( ...@@ -159,9 +159,12 @@ RuntimeProgram::RuntimeProgram(
int block_idx) int block_idx)
: exec_scope_(exec_scope) { : exec_scope_(exec_scope) {
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
bool opencl_valid = CLRuntime::Global()->OpenCLAvaliableForDevice();
using OpenCLContext = Context<TargetType::kOpenCL>; using OpenCLContext = Context<TargetType::kOpenCL>;
std::unique_ptr<KernelContext> local_ctx(new KernelContext()); std::unique_ptr<KernelContext> unique_opencl_ctx(new KernelContext());
local_ctx->As<OpenCLContext>().InitOnce(); if (opencl_valid) {
unique_opencl_ctx->As<OpenCLContext>().InitOnce();
}
#endif #endif
CHECK(program_desc); CHECK(program_desc);
auto block_size = program_desc->BlocksSize(); auto block_size = program_desc->BlocksSize();
...@@ -227,9 +230,15 @@ RuntimeProgram::RuntimeProgram( ...@@ -227,9 +230,15 @@ RuntimeProgram::RuntimeProgram(
} }
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
if (kernel->target() == TARGET(kOpenCL)) { if (kernel->target() == TARGET(kOpenCL)) {
std::unique_ptr<KernelContext> ctx(new KernelContext()); if (opencl_valid) {
(*local_ctx).As<OpenCLContext>().CopySharedTo(&ctx->As<OpenCLContext>()); std::unique_ptr<KernelContext> ctx(new KernelContext());
kernel->SetContext(std::move(ctx)); (*unique_opencl_ctx)
.As<OpenCLContext>()
.CopySharedTo(&ctx->As<OpenCLContext>());
kernel->SetContext(std::move(ctx));
} else {
LOG(ERROR) << "opencl_valid:" << opencl_valid;
}
} else { } else {
kernel->SetContext( kernel->SetContext(
ContextScheduler::Global().NewContext(kernel->target())); ContextScheduler::Global().NewContext(kernel->target()));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册