diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index b8b7f452e373028130fa41fc1d059565ee3f2ff6..93f039af0e703e3aafbaa1a5b5d8673e542fc7d0 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -262,8 +262,8 @@ AscendDeviceAddress::~AscendDeviceAddress() { if (ptr_ == nullptr) { return; } - if (mem_dynamic_alloc_) { - AscendMemoryAllocator::GetInstance().FreeTensorMem(ptr_); + if (from_mem_pool_) { + AscendMemoryPool::GetInstance().FreeTensorMem(ptr_); ptr_ = nullptr; } } diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.h b/mindspore/ccsrc/device/ascend/ascend_device_address.h index 60cc64cca7c6615f820c6922c44b47aa4f43d1cd..93746082c11bc2edad15414ef6ca32a230004492 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.h +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.h @@ -21,7 +21,7 @@ #include #include #include "device/device_address.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "ir/dtype.h" namespace mindspore { diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 0c2a97a5a6ff924a81419b7fd4ded6e041394116..0c6861e21fae6f4a62be93c22a9cd2ca953871c3 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -29,7 +29,7 @@ #include "hccl/hcom.h" #include "runtime/context.h" #include "device/ascend/ascend_stream_assign.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "framework/ge_runtime/model_runner.h" #include "device/ascend/tasksink/task_generator.h" #include "session/anf_runtime_algorithm.h" diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc index f033d81d826ad16ccab83b249697b327b9c7ae02..42830f54fae777055a02a9651def2fe98b19a58e 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc @@ -15,29 +15,31 @@ */ #include "device/ascend/ascend_memory_manager.h" -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "utils/context/ms_context.h" #include "runtime/mem.h" namespace mindspore { namespace device { namespace ascend { -static const uint64_t ASCEND_MEM_SIZE = 20; -static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30); +const uint64_t kAscendDeviceMemGB = 20; +const uint64_t kAscendMemPoolGB = 5; +const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30); +const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30); void AscendMemoryManager::MallocDeviceMemory() { - device_mem_size_ = ASCEND_MEM_SIZE_BYTE; - static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO); + device_mem_size_ = kAscendDeviceMemSize; + static_mem_offset_ = device_mem_size_; auto ret = rtMalloc(reinterpret_cast(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; } - device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); + device_mem_pool_size_ = kAscendMemPoolSize; ret = rtMalloc(reinterpret_cast(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; } - AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); - AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); + AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); + AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); } void AscendMemoryManager::FreeDeviceMemory() { @@ -57,8 +59,8 @@ void AscendMemoryManager::FreeDeviceMemory() { } } -void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) { - return AscendMemoryAllocator::GetInstance().AllocTensorMem(size); +void *AscendMemoryManager::MallocMemFromMemPool(size_t size) { + return AscendMemoryPool::GetInstance().AllocTensorMem(size); } } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h index 8639fb5c7278bb8101313af88187c864c66915c4..dea88ac10a93a3b116b8d439e558fb6c403895a1 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h +++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h @@ -27,7 +27,11 @@ class AscendMemoryManager : public MemoryManager { void MallocDeviceMemory() override; void FreeDeviceMemory() override; - void *AllocTensorMemDynamic(size_t size) override; + void *MallocMemFromMemPool(size_t size) override; + + private: + uint8_t *device_mem_pool_base_{nullptr}; + uint64_t device_mem_pool_size_{0}; }; } // namespace ascend } // namespace device diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc similarity index 62% rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.cc index 08a30a28b72350bba13c937bfc551847a1d5d553..2c38e4290d0e8cdecdc09ddd7ca1e0a3e5c32b8a 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc +++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.cc @@ -14,24 +14,15 @@ * limitations under the License. */ -#include "device/ascend/ascend_memory_allocator.h" +#include "device/ascend/ascend_memory_pool.h" #include "device/ascend/ascend_kernel_runtime.h" #include "utils/log_adapter.h" namespace mindspore { namespace device { namespace ascend { -const uint64_t MEM_SIZE = 20; -const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30); - -AscendMemoryAllocator::AscendMemoryAllocator() { - hasMalloc_ = false; - free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); - total_mem_size_ = free_mem_size_; -} - -size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { - if (hasMalloc_) { +size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { + if (has_malloc_) { MS_LOG(EXCEPTION) << "Has alloc memory pool memory !"; } if (size == 0 || size > free_mem_size_) { @@ -41,35 +32,35 @@ size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { if (*addr == nullptr) { MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!"; } - hasMalloc_ = true; + has_malloc_ = true; free_mem_size_ -= size; return size; } -bool AscendMemoryAllocator::FreeDeviceMem(const DeviceMemPtr& addr) { +bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr& addr) { MS_EXCEPTION_IF_NULL(addr); - hasMalloc_ = false; + has_malloc_ = false; free_mem_size_ = total_mem_size_; return true; } -size_t AscendMemoryAllocator::AlignMemorySize(size_t size) const { +size_t AscendMemoryPool::AlignMemorySize(size_t size) const { if (size == 0) { return DYNAMIC_MEM_ALIGN_SIZE; } return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE; } -size_t AscendMemoryAllocator::mem_alloc_unit_size() const { return free_mem_size_ - 512; } +size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; } -void AscendMemoryAllocator::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { +void AscendMemoryPool::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { MS_EXCEPTION_IF_NULL(device_mem_pool_base); device_mem_pool_base_ = device_mem_pool_base; } -size_t AscendMemoryAllocator::free_mem_size() { return free_mem_size_; } +size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; } -size_t AscendMemoryAllocator::total_mem_size() { return total_mem_size_; } +size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; } } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h similarity index 67% rename from mindspore/ccsrc/device/ascend/ascend_memory_allocator.h rename to mindspore/ccsrc/device/ascend/ascend_memory_pool.h index 8b0f89a9b8e40d70c74ce1a792ec6c82d4adf14c..c2a29725f410832abd0e3fc2f6f6e80bb37895d7 100644 --- a/mindspore/ccsrc/device/ascend/ascend_memory_allocator.h +++ b/mindspore/ccsrc/device/ascend/ascend_memory_pool.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ -#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ +#ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ +#define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ #include #include "pre_activate/mem_reuse/mem_dynamic_allocator.h" @@ -23,22 +23,23 @@ namespace mindspore { namespace device { namespace ascend { -// The fraction of total ascend memory used to compute the graph. -static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8; - -class AscendMemoryAllocator : public DynamicMemPoolBestFit { +class AscendMemoryPool : public DynamicMemPoolBestFit { public: - ~AscendMemoryAllocator() override = default; + ~AscendMemoryPool() override = default; size_t AllocDeviceMem(size_t size, DeviceMemPtr* addr) override; bool FreeDeviceMem(const DeviceMemPtr& addr) override; void set_device_mem_pool_base(uint8_t* device_mem_pool_base); - void set_device_mem_pool_size(uint64_t device_mem_pool_size) { device_mem_pool_size_ = device_mem_pool_size; } + void set_device_mem_pool_size(uint64_t device_mem_pool_size) { + device_mem_pool_size_ = device_mem_pool_size; + free_mem_size_ = device_mem_pool_size_; + total_mem_size_ = free_mem_size_; + } size_t free_mem_size() override; size_t total_mem_size() override; - static AscendMemoryAllocator& GetInstance() { - static AscendMemoryAllocator instance; + static AscendMemoryPool& GetInstance() { + static AscendMemoryPool instance; return instance; } @@ -49,10 +50,10 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { size_t mem_alloc_unit_size() const override; private: - AscendMemoryAllocator(); - AscendMemoryAllocator(const AscendMemoryAllocator&) = delete; - AscendMemoryAllocator& operator=(const AscendMemoryAllocator&) = delete; - bool hasMalloc_; + AscendMemoryPool() = default; + AscendMemoryPool(const AscendMemoryPool&) = delete; + AscendMemoryPool& operator=(const AscendMemoryPool&) = delete; + bool has_malloc_{false}; uint8_t* device_mem_pool_base_{nullptr}; uint64_t device_mem_pool_size_{0}; size_t free_mem_size_; @@ -62,4 +63,4 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { } // namespace device } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ +#endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index cb022427e34fe5e1cc6a90e16e20ef5a0778933b..2d43963934cb0895cca739c504e97c09b6ee1f44 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -70,7 +70,7 @@ class DeviceAddress { size_t ref_count_{0}; string format_{"DefaultFormat"}; TypeId type_id_{kNumberTypeFloat16}; - bool mem_dynamic_alloc_{false}; + bool from_mem_pool_{false}; friend class KernelRuntime; friend class MemoryManager; friend class mindspore::device::ascend::tasksink::TaskGenerator; diff --git a/mindspore/ccsrc/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/device/gpu/gpu_device_address.cc index 36391d27db63e539e0a98cc37748c0f419f40307..c27a1aa65b65f92341406edbb326dd931326030d 100644 --- a/mindspore/ccsrc/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/device/gpu/gpu_device_address.cc @@ -46,7 +46,7 @@ GPUDeviceAddress::~GPUDeviceAddress() { } auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); - if (mem_dynamic_alloc_) { + if (from_mem_pool_) { GPUMemoryAllocator::GetInstance().FreeTensorMem(ptr_); ptr_ = nullptr; } diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 597e188e9dcac8d5175804b46fd7b4b41bd817a1..2ec1a5df2918010b59aeb4fafc4b14e80c3afde7 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -227,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(device_address); auto device_ptr = device_address->ptr_; if (device_ptr == nullptr) { - device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]); + device_ptr = mem_manager_->MallocMemFromMemPool(output_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); device_address->ptr_ = device_ptr; } @@ -244,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod kernel_workspaces->emplace_back(nullptr); continue; } - auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]); + auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); MS_EXCEPTION_IF_NULL(device_ptr); kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); @@ -292,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN addr_size.emplace_back(device_address.get(), output_size); } - auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -328,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf addr_size.emplace_back(device_address.get(), output_sizes[i]); } - auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total); + auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); MS_EXCEPTION_IF_NULL(device_mem_ptr); for (const auto &iter : addr_size) { MS_EXCEPTION_IF_NULL(iter.first); @@ -361,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } } @@ -372,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, auto workspace = kernel_workspaces[i]; if (workspace != nullptr) { MS_EXCEPTION_IF_NULL(workspace->addr); - mem_manager_->FreeTensorMemDynamic(workspace->addr); + mem_manager_->FreeMemFromMemPool(workspace->addr); workspace->addr = nullptr; } } @@ -389,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; @@ -411,7 +411,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0); MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address->ptr_); - mem_manager_->FreeTensorMemDynamic(device_address->ptr_); + mem_manager_->FreeMemFromMemPool(device_address->ptr_); device_address->ptr_ = nullptr; } *is_communication_op = true; diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc index 3944b504e411acd42dd669a624eeb8b64fc07670..7d042264b6cd88c30114970551117560fe1f517b 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc @@ -21,11 +21,11 @@ namespace mindspore { namespace device { namespace gpu { -void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) { +void *GPUMemoryManager::MallocMemFromMemPool(size_t size) { return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); } -void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) { +void GPUMemoryManager::FreeMemFromMemPool(void *device_ptr) { GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); } @@ -34,7 +34,7 @@ void GPUMemoryManager::MallocDeviceMemory() { MS_EXCEPTION_IF_NULL(context_ptr); // If use the dynamic memory pool, then alloc the first memory block to init. if (context_ptr->enable_dynamic_mem_pool()) { - auto device_addr = AllocTensorMemDynamic(1); + auto device_addr = MallocMemFromMemPool(1); if (!device_addr) { MS_LOG(ERROR) << "Dynamic memory pool init error."; } @@ -62,7 +62,7 @@ uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); if (context_ptr->enable_dynamic_mem_pool()) { - auto device_ptr = AllocTensorMemDynamic(size); + auto device_ptr = MallocMemFromMemPool(size); MS_EXCEPTION_IF_NULL(device_ptr); return AddressOffset(device_ptr, 0); } diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h index a18226bdf3f3ff37d2e0979131c2b0887df6e2c2..cc5dac2a5ef13a61581d7c314466c5fb64371fa1 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h @@ -28,11 +28,11 @@ class GPUMemoryManager : public MemoryManager { void MallocDeviceMemory() override; void FreeDeviceMemory() override; - void *AllocTensorMemDynamic(size_t size) override; - void FreeTensorMemDynamic(void *device_ptr) override; + void *MallocMemFromMemPool(size_t size) override; + void FreeMemFromMemPool(void *device_ptr) override; protected: - uint8_t *MallocStaticMem(size_t size, bool communication_mem); + uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; }; } // namespace gpu } // namespace device diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index 16025ed8a4e6681b993121d67af91c446ab9e338..eebc650347527854bca7ca0210e3297223c31c69 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -169,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, tensor_size); + mem_manager_->MallocMemFromMemPool(device_address, tensor_size); AnfAlgo::SetOutputAddr(device_address, index, item.get()); } } @@ -198,7 +198,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, output_sizes[i]); + mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } } @@ -213,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { for (size_t i = 0; i < workspace_lists.size(); ++i) { auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocOpMemory(device_address, workspace_lists[i]); + mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } } @@ -457,7 +457,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); auto mem_flag = kDynamicMem; if (is_enable_mem_reuse) { - mem_manager_->InitReuseDynamicMemory(graph); + mem_manager_->MallocReusedDynamicMem(graph); mem_flag = kReuseDynamicMem; } auto &kernels = graph->execution_order(); diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h index 1224bf14ebdc6e6dd80864f911f2b9179b6f38d6..61b43fd5c0a5fa1387941911baae16696054620e 100644 --- a/mindspore/ccsrc/device/kernel_runtime.h +++ b/mindspore/ccsrc/device/kernel_runtime.h @@ -33,7 +33,6 @@ #include "utils/context/ms_context.h" #include "device/memory_manager.h" -// using mindspore::session::KernelGraph; using mindspore::tensor::Tensor; using TensorPtr = std::shared_ptr; using mindspore::kernel::AddressPtr; diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc index 3c1ddee6bc5a654cb6b9d616e916eab53327bd9d..6977628eb18375559d34101661ab604465cded0e 100644 --- a/mindspore/ccsrc/device/memory_manager.cc +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -21,12 +21,6 @@ using mindspore::memreuse::BestFitMemReuse; using mindspore::memreuse::MemReuseUtilPtr; namespace mindspore { namespace device { -MemoryManager::~MemoryManager() { - device_mem_base_ = nullptr; - device_mem_pool_base_ = nullptr; - mem_reuse_util_ptr_ = nullptr; -} - size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; } @@ -35,7 +29,7 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const { return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize; } -void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) { +void MemoryManager::MallocReusedDynamicMem(session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared(); MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); @@ -147,23 +141,23 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { } } -void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) { - auto device_ptr = AllocTensorMemDynamic(size); +void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { + auto device_ptr = MallocMemFromMemPool(size); MS_EXCEPTION_IF_NULL(device_ptr); address->ptr_ = device_ptr; - address->mem_dynamic_alloc_ = true; + address->from_mem_pool_ = true; } -void *MemoryManager::AllocTensorMemDynamic(size_t size) { +void *MemoryManager::MallocMemFromMemPool(size_t size) { if (size == 0) { - MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0."; + MS_LOG(ERROR) << "MallocMemFromMemPool size is 0."; } return nullptr; } -void MemoryManager::FreeTensorMemDynamic(void *device_ptr) { +void MemoryManager::FreeMemFromMemPool(void *device_ptr) { if (device_ptr == nullptr) { - MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null."; + MS_LOG(ERROR) << "FreeMemFromMemPool device_ptr is null."; } } } // namespace device diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h index 2e47237defa747b80c5e971289ca92d4f6ae88a4..82c22f454856dbc4825b622b876f681e7521b4db 100644 --- a/mindspore/ccsrc/device/memory_manager.h +++ b/mindspore/ccsrc/device/memory_manager.h @@ -31,7 +31,7 @@ using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; class MemoryManager { public: MemoryManager() = default; - virtual ~MemoryManager(); + virtual ~MemoryManager() = default; virtual void MallocDeviceMemory() = 0; virtual void FreeDeviceMemory() = 0; @@ -40,16 +40,15 @@ class MemoryManager { dynamic_mem_offset_ = 0; } - void InitReuseDynamicMemory(session::KernelGraph *graph); + void MallocReusedDynamicMem(session::KernelGraph *graph); uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size); uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); virtual uint8_t *MallocMem(int flag, size_t size); - // Alloc memory use the dynamic memory pool. - virtual void *AllocTensorMemDynamic(size_t size); - // Free memory use the dynamic memory pool. - virtual void FreeTensorMemDynamic(void *device_ptr); - virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size); + virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); + virtual void *MallocMemFromMemPool(size_t size); + virtual void FreeMemFromMemPool(void *device_ptr); + size_t GetCommonAlignSize(size_t input_size) const; size_t GetCommunicationAlignSize(size_t input_size) const; @@ -57,9 +56,7 @@ class MemoryManager { virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); uint8_t *device_mem_base_{nullptr}; - uint8_t *device_mem_pool_base_{nullptr}; uint64_t device_mem_size_{0}; - uint64_t device_mem_pool_size_{0}; uint64_t dynamic_mem_offset_{0}; uint64_t static_mem_offset_{0}; size_t total_static_size_ = 0; diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 3c1351a85732106a8c7a2a48616b7079fc640166..f5bc07ff69c6ecc8ca3d0596a80741950f345501 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -95,7 +95,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc" "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc" "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc" - "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc" + "../../../mindspore/ccsrc/device/ascend/ascend_memory_pool.cc" "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc" "../../../mindspore/ccsrc/predict/predict.cc" "../../../mindspore/ccsrc/predict/converter/*.cc"