diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index bf269120f2edf6cb55a7c5091f52ff3818e6a390..9b8eb7d6463cd043a18a9aaec06c960cef7be72f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -276,11 +276,6 @@ int SubGraphOpenCLKernel::UnInit() { delete op; } } - for (const auto parameter : in_parameters_) { - if (parameter != nullptr) { - delete parameter; - } - } return RET_OK; } diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index b9bae225dd5ad4b35025dda294bea3efd964da55..a193c302c3909b2dce5b9d78a1757b67616b837e 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -24,6 +24,9 @@ namespace mindspore::lite::opencl { OpenCLAllocator::OpenCLAllocator() {} + +OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {} + OpenCLAllocator::~OpenCLAllocator() { Clear(); } void OpenCLAllocator::SetContext(const AllocatorContext &ctx) { @@ -46,14 +49,16 @@ void OpenCLAllocator::UnLock() { void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector{}); } void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size) { - auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); - auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); + if (ocl_runtime_ == nullptr) { + ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); + } + auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); size_t img_pitch = 0; size_t dtype_size = 1; if (!img_size.empty()) { dtype_size = img_size[2] == CL_FLOAT ? sizeof(cl_float4) : sizeof(cl_half4); - uint32_t image_alignment = ocl_runtime->GetImagePitchAlignment(); + uint32_t image_alignment = ocl_runtime_->GetImagePitchAlignment(); img_pitch = (img_size[0] + image_alignment - 1) / image_alignment * image_alignment; size = img_pitch * img_size[1] * dtype_size; } @@ -87,28 +92,28 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size) cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0; flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0; flags = flags | CL_MEM_READ_WRITE; - host_ptr = clSVMAlloc((*ocl_runtime->Context())(), flags, size, 0); + host_ptr = clSVMAlloc((*ocl_runtime_->Context())(), flags, size, 0); } else { cl_int ret = CL_SUCCESS; cl::Buffer *buffer = new (std::nothrow) - cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret); + cl::Buffer(*ocl_runtime_->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret); if (buffer == nullptr || ret != CL_SUCCESS) { UnLock(); MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; return nullptr; } device_ptr = static_cast(buffer); - host_ptr = ocl_runtime->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size); + host_ptr = ocl_runtime_->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size); if (host_ptr == nullptr) { UnLock(); MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; return nullptr; } cl::Memory *mem = buffer; - ocl_runtime->UnmapBuffer(*mem, host_ptr); + ocl_runtime_->UnmapBuffer(*mem, host_ptr); if (!img_size.empty()) { cl::ImageFormat image_format(CL_RGBA, img_size[2]); - cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), image_format, *buffer, img_size[0], + cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), image_format, *buffer, img_size[0], img_size[1], img_pitch * dtype_size, &ret); if (image == nullptr || ret != CL_SUCCESS) { delete buffer; @@ -139,7 +144,9 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; return nullptr; } - auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); + if (ocl_runtime_ == nullptr) { + ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); + } Lock(); auto iter = free_list_.lower_bound(size); while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { @@ -164,7 +171,7 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v cl_int ret = CL_SUCCESS; // CL_HALF_FLOAT, CL_FLOAT cl::ImageFormat image_format(CL_RGBA, img_size[2]); - cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format, img_size[0], img_size[1], 0, data, &ret); if (image == nullptr || ret != CL_SUCCESS) { MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")"; @@ -174,14 +181,14 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v } image_ptr = static_cast(image); std::vector region{img_size[0], img_size[1], 1}; - host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); + host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); if (host_ptr == nullptr) { MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; UnLock(); return nullptr; } cl::Memory *mem = image; - ocl_runtime->UnmapBuffer(*mem, host_ptr); + ocl_runtime_->UnmapBuffer(*mem, host_ptr); std::unique_ptr mem_buf = std::make_unique(); mem_buf->size_ = size; mem_buf->device_ptr_ = device_ptr; @@ -251,28 +258,46 @@ void *OpenCLAllocator::GetBuffer(void *buffer) { void OpenCLAllocator::Clear() { Lock(); - auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); - auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); + if (ocl_runtime_ == nullptr) { + ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); + } + auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { if (svm_capabilities) { - clSVMFree((*ocl_runtime->Context())(), it->second->host_ptr_); + clSVMFree((*ocl_runtime_->Context())(), it->second->host_ptr_); MS_LOG(DEBUG) << "OpenCL free svm buffer : " << it->second->host_ptr_; } else { - cl::Buffer *buff = static_cast(it->second->device_ptr_); - MS_LOG(DEBUG) << "OpenCL free device buffer : " << buff; - delete buff; + cl::Buffer *buffer = static_cast(it->second->device_ptr_); + MS_LOG(DEBUG) << "OpenCL free device buffer : " << buffer; + if (buffer != nullptr) { + delete buffer; + it->second->device_ptr_ = nullptr; + } + cl::Image *image = static_cast(it->second->image_ptr_); + if (image != nullptr) { + delete image; + it->second->image_ptr_ = nullptr; + } } } allocated_list_.clear(); for (auto it = free_list_.begin(); it != free_list_.end(); it++) { if (svm_capabilities) { - clSVMFree((*ocl_runtime->Context())(), it->second->host_ptr_); + clSVMFree((*ocl_runtime_->Context())(), it->second->host_ptr_); MS_LOG(DEBUG) << "OpenCL free svm buffer : " << it->second->host_ptr_; } else { - cl::Buffer *buff = static_cast(it->second->device_ptr_); - MS_LOG(DEBUG) << "OpenCL free device buffer : " << buff; - delete buff; + cl::Buffer *buffer = static_cast(it->second->device_ptr_); + MS_LOG(DEBUG) << "OpenCL free device buffer : " << buffer; + if (buffer != nullptr) { + delete buffer; + it->second->device_ptr_ = nullptr; + } + cl::Image *image = static_cast(it->second->image_ptr_); + if (image != nullptr) { + delete image; + it->second->image_ptr_ = nullptr; + } } } free_list_.clear(); @@ -280,8 +305,10 @@ void OpenCLAllocator::Clear() { } void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) { - auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); - auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); + if (ocl_runtime_ == nullptr) { + ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); + } + auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); if (svm_capabilities) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { auto it = allocated_list_.find(host_ptr); @@ -289,7 +316,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; return nullptr; } - ocl_runtime->MapBuffer(host_ptr, flags, it->second->size_, static_cast(command_queue), sync); + ocl_runtime_->MapBuffer(host_ptr, flags, it->second->size_, static_cast(command_queue), sync); } return host_ptr; } @@ -310,12 +337,12 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, void *new_host_ptr{nullptr}; if (mem_buf->img_size.empty()) { cl::Buffer *buffer = static_cast(mem_buf->device_ptr_); - new_host_ptr = ocl_runtime->MapBuffer(*buffer, flags, mem_buf->size_, nullptr, sync); + new_host_ptr = ocl_runtime_->MapBuffer(*buffer, flags, mem_buf->size_, nullptr, sync); } else { cl::ImageFormat image_format(CL_RGBA, mem_buf->img_size[2]); std::vector region{mem_buf->img_size[0], mem_buf->img_size[1], 1}; cl::Image2D *image = static_cast(mem_buf->image_ptr_); - new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); + new_host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); } if (new_host_ptr == nullptr) { UnLock(); @@ -334,11 +361,13 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, } int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { - auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); - auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); + if (ocl_runtime_ == nullptr) { + ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); + } + auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); if (svm_capabilities) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { - return ocl_runtime->UnmapBuffer(host_ptr); + return ocl_runtime_->UnmapBuffer(host_ptr); } return RET_OK; } @@ -351,7 +380,7 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { it->second->map_flags = false; cl::Memory *mem = static_cast(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_); - return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast(command_queue)); + return ocl_runtime_->UnmapBuffer(*mem, it->second->host_ptr_, static_cast(command_queue)); } else { MS_LOG(WARNING) << "Host ptr " << host_ptr << " do not mapped"; return RET_OK; diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index b582e83bc6e7666b1d8339fca99f3003369385d2..6649c29e5dfc74e289eb17d681eb5ae6e15adab4 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -40,11 +40,13 @@ struct OpenclMemory { OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER}; }; +class OpenCLRuntime; enum class MemType : char { SVM, BUF, IMG }; class OpenCLAllocator : public Allocator { public: OpenCLAllocator(); + explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime); ~OpenCLAllocator() override; void SetContext(const AllocatorContext &ctx) override; void *Malloc(size_t size) override; @@ -86,6 +88,7 @@ class OpenCLAllocator : public Allocator { // 6 is empirical value int shift_factor_ = 6; bool lock_flag_ = false; + OpenCLRuntime *ocl_runtime_{nullptr}; }; } // namespace mindspore::lite::opencl diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc index bd6ece20fd56eaeab1d8c3ab08c09a6a7c51359c..2bc10d979e4661fc37b8bd4968f9dba25e82ddc1 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc @@ -209,7 +209,7 @@ int OpenCLRuntime::Init() { return RET_ERROR; } - allocator_ = new (std::nothrow) OpenCLAllocator(); + allocator_ = new (std::nothrow) OpenCLAllocator(this); if (allocator_ == nullptr) { MS_LOG(ERROR) << "Command OpenCL allocator failed!"; return RET_ERROR;