From 84b08a9b1fde7209d30b8d3bb2d247e6ed936b0b Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Thu, 9 Apr 2020 11:54:59 -0500 Subject: [PATCH] [LITE][OPENCL] Fix OpenCL global static resources of CXX API and Light API (#3373) * [LITE][OPENCL] fix OpenCL global static resources. test=develop * Fix Cxx and light api. test=develop --- lite/api/cxx_api.h | 11 +---- lite/api/light_api.cc | 22 +++++++++- lite/api/light_api.h | 2 - lite/api/light_api_impl.cc | 7 ---- lite/backends/opencl/cl_context.cc | 37 ++++++---------- lite/backends/opencl/cl_context.h | 18 ++++++++ lite/backends/opencl/cl_runtime.cc | 42 ++++--------------- lite/backends/opencl/cl_runtime.h | 22 +--------- lite/core/mir/runtime_context_assign_pass.cc | 20 +++++++++ lite/kernels/opencl/io_copy_buffer_compute.cc | 4 ++ 10 files changed, 86 insertions(+), 99 deletions(-) diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index 094ba5b8d7..146556756a 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -43,16 +43,7 @@ class LITE_API Predictor { public: // Create an empty predictor. Predictor() { scope_ = std::make_shared(); } - ~Predictor() { -#ifdef LITE_WITH_OPENCL - CLRuntime::Global()->ReleaseResources(); -#endif - scope_.reset(); - exec_scope_ = nullptr; - program_.reset(); - input_names_.clear(); - output_names_.clear(); - } + // Create a predictor with the weight variable scope set. explicit Predictor(const std::shared_ptr& root_scope) : scope_(root_scope) {} diff --git a/lite/api/light_api.cc b/lite/api/light_api.cc index f61e2f3524..d82869dbef 100644 --- a/lite/api/light_api.cc +++ b/lite/api/light_api.cc @@ -14,6 +14,7 @@ #include "lite/api/light_api.h" #include +#include #include "paddle_use_kernels.h" // NOLINT #include "paddle_use_ops.h" // NOLINT @@ -135,7 +136,15 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) { // 1. Create op first Program program(prog, scope_, {}); - // 2. Create Instructs +// 2. Create Instructs +#ifdef LITE_WITH_OPENCL + using WaitListType = + std::unordered_map(nullptr)), + std::shared_ptr>; + using OpenCLContext = Context; + std::unique_ptr local_ctx(new KernelContext()); + local_ctx->As().InitOnce(); +#endif // Create the kernels of the target places, and filter out the specific // kernel with the target alias. @@ -151,7 +160,18 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) { return it->alias() == alias; }); CHECK(it != kernels.end()); + +#ifdef LITE_WITH_OPENCL + if ((*it)->target() == TARGET(kOpenCL)) { + std::unique_ptr ctx(new KernelContext()); + (*local_ctx).As().CopySharedTo(&ctx->As()); + (*it)->SetContext(std::move(ctx)); + } else { + (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); + } +#else (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); +#endif insts.emplace_back(op, std::move(*it)); } diff --git a/lite/api/light_api.h b/lite/api/light_api.h index e21618449c..aa25ea81c7 100644 --- a/lite/api/light_api.h +++ b/lite/api/light_api.h @@ -107,8 +107,6 @@ class LightPredictorImpl : public lite_api::PaddlePredictor { public: LightPredictorImpl() = default; - ~LightPredictorImpl(); - std::unique_ptr GetInput(int i) override; std::unique_ptr GetOutput(int i) const override; diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc index c5ec042dfa..cdf5b7fb06 100644 --- a/lite/api/light_api_impl.cc +++ b/lite/api/light_api_impl.cc @@ -21,13 +21,6 @@ namespace paddle { namespace lite { -LightPredictorImpl::~LightPredictorImpl() { - raw_predictor_.reset(); -#ifdef LITE_WITH_OPENCL - CLRuntime::Global()->ReleaseResources(); -#endif -} - void LightPredictorImpl::Init(const lite_api::MobileConfig& config) { // LightPredictor Only support NaiveBuffer backend in publish lib if (config.lite_model_file().empty()) { diff --git a/lite/backends/opencl/cl_context.cc b/lite/backends/opencl/cl_context.cc index 153c062003..0edb83acc4 100644 --- a/lite/backends/opencl/cl_context.cc +++ b/lite/backends/opencl/cl_context.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +10,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "lite/backends/opencl/cl_context.h" -#include #include #include #include @@ -36,10 +32,8 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, STL::stringstream program_key_ss; program_key_ss << file_name << options; std::string program_key = program_key_ss.str(); - - auto &programs = CLRuntime::Global()->programs(); - auto it = programs.find(program_key); - if (it != programs.end()) { + auto it = programs_.find(program_key); + if (it != programs_.end()) { VLOG(3) << " --- program -> " << program_key << " has been built --- "; return *(it->second); } @@ -50,9 +44,9 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, CLRuntime::Global()->BuildProgram(program.get(), options); VLOG(3) << " --- end build program -> " << program_key << " --- "; - programs[program_key] = std::move(program); + programs_[program_key] = std::move(program); - return *(programs[program_key]); + return *(programs_[program_key]); } void CLContext::AddKernel(const std::string &kernel_name, @@ -68,30 +62,25 @@ void CLContext::AddKernel(const std::string &kernel_name, new cl::Kernel(program, kernel_name.c_str(), &status)); CL_CHECK_FATAL(status); VLOG(3) << " --- end create kernel --- "; - - auto &kernels = CLRuntime::Global()->kernels(); - auto &kernel_offset_map = CLRuntime::Global()->kernel_offset(); - kernels.emplace_back(std::move(kernel)); + kernels_.emplace_back(std::move(kernel)); STL::stringstream kernel_key; kernel_key << kernel_name << options << time_stamp; - kernel_offset_map[kernel_key.str()] = kernels.size() - 1; + kernel_offset_[kernel_key.str()] = kernels_.size() - 1; } cl::Kernel &CLContext::GetKernel(const int index) { - auto &kernels = CLRuntime::Global()->kernels(); - VLOG(3) << " --- kernel count: " << kernels.size() << " --- "; - CHECK(static_cast(index) < kernels.size()) + VLOG(3) << " --- kernel count: " << kernels_.size() << " --- "; + CHECK(static_cast(index) < kernels_.size()) << "The index must be less than the size of kernels."; - CHECK(kernels[index] != nullptr) + CHECK(kernels_[index] != nullptr) << "The target kernel pointer cannot be null."; - return *(kernels[index]); + return *(kernels_[index]); } cl::Kernel &CLContext::GetKernel(const std::string &name) { - auto &kernel_offset_map = CLRuntime::Global()->kernel_offset(); - auto it = kernel_offset_map.find(name); - CHECK(it != kernel_offset_map.end()) << "Cannot find the kernel function: " - << name; + auto it = kernel_offset_.find(name); + CHECK(it != kernel_offset_.end()) << "Cannot find the kernel function: " + << name; return GetKernel(it->second); } diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h index b12473ccf5..586dc3df12 100644 --- a/lite/backends/opencl/cl_context.h +++ b/lite/backends/opencl/cl_context.h @@ -27,6 +27,20 @@ namespace lite { class CLContext { public: + ~CLContext() { + for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) { + clReleaseKernel(kernels_[kidx]->get()); + kernels_[kidx].reset(); + } + kernels_.clear(); + kernel_offset_.clear(); + for (auto &p : programs_) { + clReleaseProgram(p.second->get()); + } + programs_.clear(); + LOG(INFO) << "release cl::Program, cl::Kernel finished."; + } + cl::CommandQueue &GetCommandQueue(); cl::Context &GetContext(); @@ -52,6 +66,10 @@ class CLContext { int divitor = 2); // cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size, // size_t max_work_size); + private: + std::unordered_map> programs_; + std::vector> kernels_; + std::map kernel_offset_; }; } // namespace lite diff --git a/lite/backends/opencl/cl_runtime.cc b/lite/backends/opencl/cl_runtime.cc index dc6a168612..8a6b026367 100644 --- a/lite/backends/opencl/cl_runtime.cc +++ b/lite/backends/opencl/cl_runtime.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,7 +11,6 @@ limitations under the License. */ #include "lite/backends/opencl/cl_runtime.h" #include -#include #include #include #include "lite/utils/cp_logging.h" @@ -29,38 +25,16 @@ CLRuntime* CLRuntime::Global() { } CLRuntime::~CLRuntime() { - LOG(INFO) << "CLRuntime::~CLRuntime()"; - // Note: do ReleaseResources() in predictor - command_queue_&& clReleaseCommandQueue(command_queue_->get()); - command_queue_.reset(); - context_&& clReleaseContext(context_->get()); - context_.reset(); - device_.reset(); - platform_.reset(); - initialized_ = false; -} - -void CLRuntime::ReleaseResources() { - // if (is_resources_released_) { - // return; - // } - if (command_queue_ != nullptr) { command_queue_->flush(); command_queue_->finish(); } - for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) { - clReleaseKernel(kernels_[kidx]->get()); - kernels_[kidx].reset(); - } - kernels_.clear(); - kernel_offset_.clear(); - for (auto& p : programs_) { - clReleaseProgram(p.second->get()); - } - programs_.clear(); - LOG(INFO) << "release resources finished."; - is_resources_released_ = true; + // For controlling the destruction order: + command_queue_.reset(); + context_.reset(); + device_.reset(); + platform_.reset(); + LOG(INFO) << "release ~CLRuntime() "; } bool CLRuntime::Init() { @@ -98,14 +72,14 @@ cl::CommandQueue& CLRuntime::command_queue() { return *command_queue_; } -std::shared_ptr CLRuntime::CreateProgram( +std::unique_ptr CLRuntime::CreateProgram( const cl::Context& context, std::string file_name) { auto cl_file = opencl_kernels_files.find(file_name); std::string content(cl_file->second.begin(), cl_file->second.end()); cl::Program::Sources sources; sources.push_back(content); auto prog = - std::shared_ptr(new cl::Program(context, sources, &status_)); + std::unique_ptr(new cl::Program(context, sources, &status_)); VLOG(4) << "OpenCL kernel file name: " << file_name; VLOG(4) << "Program source size: " << content.size(); CL_CHECK_FATAL(status_); diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h index 69f9e3e371..2a8996b066 100644 --- a/lite/backends/opencl/cl_runtime.h +++ b/lite/backends/opencl/cl_runtime.h @@ -1,11 +1,8 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,7 +15,6 @@ limitations under the License. */ #include #include #include -#include #include #include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_utility.h" @@ -33,8 +29,6 @@ class CLRuntime { public: static CLRuntime* Global(); - void ReleaseResources(); - bool Init(); cl::Platform& platform(); @@ -45,7 +39,7 @@ class CLRuntime { cl::CommandQueue& command_queue(); - std::shared_ptr CreateProgram(const cl::Context& context, + std::unique_ptr CreateProgram(const cl::Context& context, std::string file_name); std::unique_ptr CreateEvent(const cl::Context& context); @@ -60,12 +54,6 @@ class CLRuntime { std::map& GetDeviceInfo(); - std::unordered_map>& programs() { - return programs_; - } - std::vector>& kernels() { return kernels_; } - std::map& kernel_offset() { return kernel_offset_; } - private: CLRuntime() = default; @@ -107,19 +95,11 @@ class CLRuntime { std::shared_ptr command_queue_{nullptr}; - std::unordered_map> programs_{}; - - std::vector> kernels_{}; - - std::map kernel_offset_{}; - cl_int status_{CL_SUCCESS}; bool initialized_{false}; bool is_init_success_{false}; - - bool is_resources_released_{false}; }; } // namespace lite diff --git a/lite/core/mir/runtime_context_assign_pass.cc b/lite/core/mir/runtime_context_assign_pass.cc index 97c4819eaf..3cbe602f31 100644 --- a/lite/core/mir/runtime_context_assign_pass.cc +++ b/lite/core/mir/runtime_context_assign_pass.cc @@ -24,11 +24,31 @@ class RuntimeContextAssignPass : public StmtPass { RuntimeContextAssignPass() {} void Apply(const std::unique_ptr& graph) override { +#ifdef LITE_WITH_OPENCL + using OpenCLContext = Context; + std::unique_ptr local_ctx(new KernelContext()); + local_ctx->As().InitOnce(); +#endif for (auto& node : graph->mutable_nodes()) { if (!node.IsStmt()) continue; auto& inst = node.AsStmt(); + +#ifdef LITE_WITH_OPENCL + if (inst.picked_kernel().target() == TARGET(kOpenCL)) { + std::unique_ptr ctx(new KernelContext()); + (*local_ctx) + .As() + .CopySharedTo(&ctx->As()); + inst.picked_kernel().SetContext(std::move(ctx)); + } else { + inst.picked_kernel().SetContext(ContextScheduler::Global().NewContext( + inst.picked_kernel().target())); + } +#else inst.picked_kernel().SetContext( ContextScheduler::Global().NewContext(inst.picked_kernel().target())); + +#endif } } }; diff --git a/lite/kernels/opencl/io_copy_buffer_compute.cc b/lite/kernels/opencl/io_copy_buffer_compute.cc index 6a49cc2577..f76f667923 100644 --- a/lite/kernels/opencl/io_copy_buffer_compute.cc +++ b/lite/kernels/opencl/io_copy_buffer_compute.cc @@ -106,6 +106,7 @@ class IoCopykOpenCLToHostCompute auto& context = ctx_->As(); auto* wait_list = context.cl_wait_list(); + auto it = wait_list->find(x_ptr); if (it != wait_list->end()) { #ifndef LITE_SHUTDOWN_LOG @@ -113,6 +114,9 @@ class IoCopykOpenCLToHostCompute #endif auto& event = *(it->second); event.wait(); + auto command_queue = CLRuntime::Global()->command_queue(); + command_queue.flush(); + command_queue.finish(); } else { LOG(FATAL) << "Could not find the sync event for the target cl tensor."; } -- GitLab