diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 9f559a51eb1c93947d4221725377f682181f059a..1d104148c323e747b71cc824a63f04137bba49ce 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -132,6 +132,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "kernel/kash/*.cc"
         "device/kernel_info.cc"
         "device/kernel_runtime.cc"
+        "device/memory_manager.cc"
         "device/kernel_runtime_manager.cc"
         "device/convert_tensor_utils.cc"
         "pre_activate/common/*.cc"
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index dc7eb5449b47758c985d11d79bfc491c0a7ad30a..0c2a97a5a6ff924a81419b7fd4ded6e041394116 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -37,6 +37,7 @@
 #include "kernel/tbe/tbe_utils.h"
 #include "kernel/tbe/tbe_python_funcs.h"
 #include "pre_activate/mem_reuse/mem_reuse_checker.h"
+#include "device/ascend/ascend_memory_manager.h"
 
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
@@ -47,8 +48,6 @@ using std::vector;
 namespace mindspore {
 namespace device {
 namespace ascend {
-static const uint64_t ASCEND_MEM_SIZE = 20;
-static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
 static const size_t PRAMATER_OUTPUT_INDEX = 0;
 
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
@@ -86,7 +85,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
     MS_EXCEPTION(DeviceProcessError) << "rtSetDevice, ret[" << static_cast<int>(ret) << "]";
   }
 
-  FreeDeviceMemory();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeDeviceMemory();
   (void)DestroyHccl();
   (void)ResetDevice();
   (void)ProfilingManager::GetInstance().StopProfiling();
@@ -109,11 +109,9 @@ bool AscendKernelRuntime::Init() {
   if (!ret) {
     return ret;
   }
-
-  ret = MallocDeviceMemory();
-  if (!ret) {
-    return ret;
-  }
+  mem_manager_ = std::make_shared<AscendMemoryManager>();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->MallocDeviceMemory();
 
   ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
   if (!ret) {
@@ -239,13 +237,6 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
   return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id);
 }
 
-void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
-  auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
-  MS_EXCEPTION_IF_NULL(device_ptr);
-  address->ptr_ = device_ptr;
-  address->mem_dynamic_alloc_ = true;
-}
-
 bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -474,42 +465,6 @@ bool AscendKernelRuntime::DestroyHccl() {
   context_ptr->set_enable_hccl(false);
   return true;
 }
-
-bool AscendKernelRuntime::MallocDeviceMemory() {
-  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
-  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
-  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
-  }
-  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
-  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
-  if (ret != RT_ERROR_NONE) {
-    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-  }
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
-  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
-  return true;
-}
-
-void AscendKernelRuntime::FreeDeviceMemory() {
-  if (device_mem_base_ != nullptr) {
-    auto ret = rtFree(device_mem_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_base_ = nullptr;
-  }
-  if (device_mem_pool_base_ != nullptr) {
-    auto ret = rtFree(device_mem_pool_base_);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
-    }
-    device_mem_pool_base_ = nullptr;
-  }
-}
-
-void AscendKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
index dbd1460d24fd01c252e969a28eeda53c41a6c6a1..0eedad3d2b5e27364cc5f2a3ab6f9e8d5de7471d 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -39,13 +39,11 @@ class AscendKernelRuntime : public KernelRuntime {
   bool GenTask(const session::KernelGraph *graph) override;
   bool RunTask(const session::KernelGraph *graph) override;
   bool LoadTask(const session::KernelGraph *graph) override;
-  void FreeHostMemory() override;
 
  protected:
   DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                        TypeId type_id) override;
   bool SyncStream() override;
-  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
 
  private:
   bool InitDevice();
@@ -53,8 +51,7 @@ class AscendKernelRuntime : public KernelRuntime {
   bool HcclInit();
   bool NeedDestroyHccl();
   bool DestroyHccl();
-  bool MallocDeviceMemory();
-  void FreeDeviceMemory();
+
   void ClearGraphModelMap();
   void ReleaseDeviceRes() override;
   uint32_t GetGraphModelId(const session::KernelGraph *kernel_graph);
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f033d81d826ad16ccab83b249697b327b9c7ae02
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/ascend/ascend_memory_manager.h"
+#include "device/ascend/ascend_memory_allocator.h"
+#include "utils/context/ms_context.h"
+#include "runtime/mem.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+static const uint64_t ASCEND_MEM_SIZE = 20;
+static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
+
+void AscendMemoryManager::MallocDeviceMemory() {
+  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
+  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
+  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
+  }
+  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
+  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
+  if (ret != RT_ERROR_NONE) {
+    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+  }
+  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
+  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
+}
+
+void AscendMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    auto ret = rtFree(device_mem_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_base_ = nullptr;
+  }
+  if (device_mem_pool_base_ != nullptr) {
+    auto ret = rtFree(device_mem_pool_base_);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
+    }
+    device_mem_pool_base_ = nullptr;
+  }
+}
+
+void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) {
+  return AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..8639fb5c7278bb8101313af88187c864c66915c4
--- /dev/null
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+#include "device/memory_manager.h"
+namespace mindspore {
+namespace device {
+namespace ascend {
+class AscendMemoryManager : public MemoryManager {
+ public:
+  AscendMemoryManager() = default;
+  virtual ~AscendMemoryManager() = default;
+
+  void MallocDeviceMemory() override;
+  void FreeDeviceMemory() override;
+  void *AllocTensorMemDynamic(size_t size) override;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h
index 1610d4337245cd5ce11204924bfa4cf5b81f98ef..cb022427e34fe5e1cc6a90e16e20ef5a0778933b 100644
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@@ -33,12 +33,14 @@ class CPUKernelRuntime;
 }  // namespace cpu
 namespace ascend {
 class AscendKernelRuntime;
+class AscendMemoryManager;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
 }  // namespace ascend
 namespace gpu {
 class GPUKernelRuntime;
+class GPUMemoryManager;
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
@@ -70,12 +72,15 @@ class DeviceAddress {
   TypeId type_id_{kNumberTypeFloat16};
   bool mem_dynamic_alloc_{false};
   friend class KernelRuntime;
+  friend class MemoryManager;
   friend class mindspore::device::ascend::tasksink::TaskGenerator;
   friend class mindspore::device::cpu::CPUSimpleMemPlan;
   friend class mindspore::device::cpu::CPUResourceManager;
   friend class mindspore::device::cpu::CPUKernelRuntime;
   friend class mindspore::device::gpu::GPUKernelRuntime;
+  friend class mindspore::device::gpu::GPUMemoryManager;
   friend class mindspore::device::ascend::AscendKernelRuntime;
+  friend class mindspore::device::ascend::AscendMemoryManager;
 };
 
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 9eeb1062f747cb37b99db9b91391b46e1e443d9c..597e188e9dcac8d5175804b46fd7b4b41bd817a1 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -26,6 +26,7 @@
 #include "device/kernel_runtime_manager.h"
 #include "device/gpu/gpu_common.h"
 #include "common/utils.h"
+#include "device/gpu/gpu_memory_manager.h"
 
 namespace mindspore {
 namespace device {
@@ -36,26 +37,14 @@ bool GPUKernelRuntime::Init() {
   if (device_init_ == true) {
     return true;
   }
-
   auto ret = InitDevice();
   if (!ret) {
     MS_LOG(ERROR) << "InitDevice error.";
     return ret;
   }
-
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  // If use the dynamic memory pool, then alloc the first memory block to init.
-  if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_addr = AllocTensorMemDynamic(1);
-    if (!device_addr) {
-      MS_LOG(ERROR) << "Dynamic memory pool init error.";
-      return false;
-    }
-  } else {
-    MallocDeviceMemory();
-  }
-
+  mem_manager_ = std::make_shared<GPUMemoryManager>();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->MallocDeviceMemory();
   const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
   bool collective_inited = CollectiveInitializer::instance().collective_inited();
   if (collective_inited && collective_handle_ != nullptr) {
@@ -101,16 +90,6 @@ bool GPUKernelRuntime::InitDevice() {
   return true;
 }
 
-void GPUKernelRuntime::MallocDeviceMemory() {
-  // Need to reserve 20% space for dynamic memory
-  const float init_gpu_mem_ratio = 0.8;
-  size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
-  auto alloc_size =
-    GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
-  device_mem_size_ = alloc_size;
-  static_mem_offset_ = device_mem_size_;
-}
-
 void GPUKernelRuntime::ReleaseDeviceRes() {
   // For dataset mode.
   if (GpuBufferMgr::GetInstance().IsInit()) {
@@ -122,39 +101,22 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
     CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue.");
   }
   GPUDeviceManager::GetInstance().ReleaseDevice();
-  if (device_mem_base_ != nullptr) {
-    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
-      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
-    }
-  }
-  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
-}
-
-void GPUKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
-
-void *GPUKernelRuntime::AllocTensorMemDynamic(size_t size) {
-  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
-}
-
-void GPUKernelRuntime::FreeTensorMemDynamic(void *device_ptr) {
-  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeDeviceMemory();
 }
 
 void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->ResetDynamicMemory();
   AssignStaticMemory(graph);
-  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
   bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool();
   if (is_enable_dynamic_mem) {
     // Use the dynamic memory pool.
     InitKernelRefCount(graph);
     InitKernelOutputAddress(graph);
-  } else if (is_enable_mem_reuse) {
-    // Use the memory reuse.
-    ReuseAssignDynamicMemory(graph);
   } else {
-    // Normal way.
     AssignDynamicMemory(graph);
   }
 }
@@ -179,32 +141,6 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) {
   return ret;
 }
 
-uint8_t *GPUKernelRuntime::MallocStaticMem(size_t size, bool) {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (context_ptr->enable_dynamic_mem_pool()) {
-    auto device_ptr = AllocTensorMemDynamic(size);
-    MS_EXCEPTION_IF_NULL(device_ptr);
-    return AddressOffset(device_ptr, 0);
-  }
-
-  auto align_size = GetCommonAlignSize(size);
-  if (static_mem_offset_ < align_size) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  auto offset = static_mem_offset_ - align_size;
-  if (dynamic_mem_offset_ > offset) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_static_size_ += align_size;
-  static_mem_offset_ = offset;
-  return device_mem_base_ + offset;
-}
-
 void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
@@ -273,6 +209,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
   MS_EXCEPTION_IF_NULL(kernel_inputs);
   MS_EXCEPTION_IF_NULL(kernel_workspaces);
   MS_EXCEPTION_IF_NULL(kernel_outputs);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i);
     MS_EXCEPTION_IF_NULL(device_address);
@@ -290,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
     MS_EXCEPTION_IF_NULL(device_address);
     auto device_ptr = device_address->ptr_;
     if (device_ptr == nullptr) {
-      device_ptr = AllocTensorMemDynamic(output_sizes[i]);
+      device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]);
       MS_EXCEPTION_IF_NULL(device_ptr);
       device_address->ptr_ = device_ptr;
     }
@@ -307,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
       kernel_workspaces->emplace_back(nullptr);
       continue;
     }
-    auto device_ptr = AllocTensorMemDynamic(workspace_sizes[i]);
+    auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]);
     MS_EXCEPTION_IF_NULL(device_ptr);
     kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(workspace);
@@ -333,6 +270,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph
 
 void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The reference count of communication kernel input is not 0.
   if (communication_op_input_ref_count_ != 0) {
     MS_LOG(ERROR) << "The reference count of communication kernel input is not 0.";
@@ -354,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
     addr_size.emplace_back(device_address.get(), output_size);
   }
 
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -366,6 +304,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
 
 void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The reference count of communication kernel output is not 0.
   if (communication_op_output_ref_count_ != 0) {
     MS_LOG(ERROR) << "The reference count of communication kernel output is not 0.";
@@ -389,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
     addr_size.emplace_back(device_address.get(), output_sizes[i]);
   }
 
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
   MS_EXCEPTION_IF_NULL(device_mem_ptr);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
@@ -402,6 +341,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
 void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
                                             const AddressPtrList &kernel_workspaces) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto cnode = kernel->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
   // Free the input of kernel by reference count.
@@ -421,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
         auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
         MS_EXCEPTION_IF_NULL(device_address);
         MS_EXCEPTION_IF_NULL(device_address->ptr_);
-        FreeTensorMemDynamic(device_address->ptr_);
+        mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
         device_address->ptr_ = nullptr;
       }
     }
@@ -432,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
     auto workspace = kernel_workspaces[i];
     if (workspace != nullptr) {
       MS_EXCEPTION_IF_NULL(workspace->addr);
-      FreeTensorMemDynamic(workspace->addr);
+      mem_manager_->FreeTensorMemDynamic(workspace->addr);
       workspace->addr = nullptr;
     }
   }
@@ -441,6 +381,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
 void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx,
                                                      bool *is_communication_op) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   // The inputs memory of communication kernel is one piece memory, need release together.
   if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) {
     communication_op_input_ref_count_--;
@@ -448,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
@@ -470,19 +411,12 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
       auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0);
       MS_EXCEPTION_IF_NULL(device_address);
       MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
       device_address->ptr_ = nullptr;
     }
     *is_communication_op = true;
   }
 }
-
-void GPUKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
-  auto device_ptr = AllocTensorMemDynamic(size);
-  MS_EXCEPTION_IF_NULL(device_ptr);
-  address->ptr_ = device_ptr;
-  address->mem_dynamic_alloc_ = true;
-}
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
index f3fdb5fa98f82e3d31b17e5b43683630d13eeeb0..6f761342d36e2b611d6edc0be1a04dde25ef5c57 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
@@ -33,7 +33,6 @@ class GPUKernelRuntime : public KernelRuntime {
   ~GPUKernelRuntime() override = default;
   bool Init() override;
   void ReleaseDeviceRes() override;
-  void FreeHostMemory() override;
   void AssignMemory(session::KernelGraph *graph) override;
   bool Run(session::KernelGraph *graph) override;
 
@@ -41,18 +40,11 @@ class GPUKernelRuntime : public KernelRuntime {
   DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                        TypeId type_id) override;
   bool SyncStream() override;
-  // Alloc memory use the dynamic memory pool.
-  void *AllocTensorMemDynamic(size_t size) override;
-  // Free memory use the dynamic memory pool.
-  void FreeTensorMemDynamic(void *device_ptr) override;
-  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
-  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
 
  private:
   GPUKernelRuntime(const GPUKernelRuntime &);
   GPUKernelRuntime &operator=(const GPUKernelRuntime &);
   bool InitDevice();
-  void MallocDeviceMemory();
   bool device_init_{false};
 
   // The related functions and members for using dynamic memory pool.
@@ -69,6 +61,7 @@ class GPUKernelRuntime : public KernelRuntime {
   void FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op);
   size_t communication_op_input_ref_count_{0};
   size_t communication_op_output_ref_count_{0};
+  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
 };
 MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
 }  // namespace gpu
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3944b504e411acd42dd669a624eeb8b64fc07670
--- /dev/null
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/gpu/gpu_memory_manager.h"
+#include "device/gpu/gpu_memory_allocator.h"
+#include "utils/context/ms_context.h"
+#include "utils/convert_utils.h"
+namespace mindspore {
+namespace device {
+namespace gpu {
+void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) {
+  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
+}
+
+void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
+}
+
+void GPUMemoryManager::MallocDeviceMemory() {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  // If use the dynamic memory pool, then alloc the first memory block to init.
+  if (context_ptr->enable_dynamic_mem_pool()) {
+    auto device_addr = AllocTensorMemDynamic(1);
+    if (!device_addr) {
+      MS_LOG(ERROR) << "Dynamic memory pool init error.";
+    }
+  } else {
+    // Need to reserve 20% space for dynamic memory
+    const float init_gpu_mem_ratio = 0.8;
+    size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
+    auto alloc_size =
+      GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
+    device_mem_size_ = alloc_size;
+    static_mem_offset_ = device_mem_size_;
+  }
+}
+
+void GPUMemoryManager::FreeDeviceMemory() {
+  if (device_mem_base_ != nullptr) {
+    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
+      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
+    }
+  }
+  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
+}
+
+uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (context_ptr->enable_dynamic_mem_pool()) {
+    auto device_ptr = AllocTensorMemDynamic(size);
+    MS_EXCEPTION_IF_NULL(device_ptr);
+    return AddressOffset(device_ptr, 0);
+  }
+
+  auto align_size = GetCommonAlignSize(size);
+  if (static_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  auto offset = static_mem_offset_ - align_size;
+  if (dynamic_mem_offset_ > offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_static_size_ += align_size;
+  static_mem_offset_ = offset;
+  return device_mem_base_ + offset;
+}
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..a18226bdf3f3ff37d2e0979131c2b0887df6e2c2
--- /dev/null
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
+#include "device/memory_manager.h"
+namespace mindspore {
+namespace device {
+namespace gpu {
+class GPUMemoryManager : public MemoryManager {
+ public:
+  GPUMemoryManager() = default;
+  virtual ~GPUMemoryManager() = default;
+
+  void MallocDeviceMemory() override;
+  void FreeDeviceMemory() override;
+
+  void *AllocTensorMemDynamic(size_t size) override;
+  void FreeTensorMemDynamic(void *device_ptr) override;
+
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem);
+};
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc
index 0a9be35fb50073e4455b05ee623fc8b093936cbe..16025ed8a4e6681b993121d67af91c446ab9e338 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
@@ -31,18 +31,13 @@
 #include "ir/value.h"
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
-using mindspore::memreuse::BestFitMemReuse;
-using mindspore::memreuse::MemReuseUtilPtr;
 
 namespace mindspore {
 namespace device {
 KernelRuntime::~KernelRuntime() {
-  device_mem_base_ = nullptr;
-  device_mem_pool_base_ = nullptr;
 #ifdef ENABLE_DUMP_E2E
   dump_conf_ptr_ = nullptr;
 #endif
-  mem_reuse_util_ptr_ = nullptr;
 }
 
 bool KernelRuntime::Run(session::KernelGraph *graph) {
@@ -88,11 +83,6 @@ bool KernelRuntime::LoadTask(const session::KernelGraph *graph) {
   return false;
 }
 
-void KernelRuntime::FreeHostMemory() {
-  dynamic_mem_offset_ = 0;
-  static_mem_offset_ = 0;
-}
-
 // for D to impl
 bool KernelRuntime::RunTask(const session::KernelGraph *graph) {
   if (graph != nullptr) {
@@ -126,13 +116,11 @@ size_t KernelRuntime::CountNodeDeviceMemorySize(const mindspore::AnfNodePtr &nod
 void KernelRuntime::AssignMemory(session::KernelGraph *graph) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->ResetDynamicMemory();
   AssignStaticMemory(graph);
-  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
-  if (is_enable_mem_reuse) {
-    ReuseAssignDynamicMemory(graph);
-  } else {
-    AssignDynamicMemory(graph);
-  }
+  AssignDynamicMemory(graph);
+
   UpdateRefNodeOutputMem(graph);
 }
 
@@ -159,6 +147,7 @@ void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
 void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors,
                                            const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) {
     auto item = graph->inputs()[input_index];
     MS_EXCEPTION_IF_NULL(item);
@@ -180,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
       auto device_address =
         CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       MS_EXCEPTION_IF_NULL(device_address);
-      MallocOpMemory(device_address, tensor_size, kStaticMem);
+      mem_manager_->MallocOpMemory(device_address, tensor_size);
       AnfAlgo::SetOutputAddr(device_address, index, item.get());
     }
   }
@@ -188,6 +177,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
 
 void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   auto output_sizes = kernel_mod->GetOutputSizeList();
@@ -208,13 +198,14 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
     auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
     auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
     MS_EXCEPTION_IF_NULL(device_address);
-    MallocOpMemory(device_address, output_sizes[i], kDynamicMem);
+    mem_manager_->MallocOpMemory(device_address, output_sizes[i]);
     AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
   }
 }
 
 void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   if (kernel->isa<CNode>()) {
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
     MS_EXCEPTION_IF_NULL(kernel_mod);
@@ -222,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
     for (size_t i = 0; i < workspace_lists.size(); ++i) {
       auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
       MS_EXCEPTION_IF_NULL(device_address);
-      MallocOpMemory(device_address, workspace_lists[i], kDynamicMem);
+      mem_manager_->MallocOpMemory(device_address, workspace_lists[i]);
       AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
     }
   }
@@ -230,6 +221,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
 
 void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (auto &item : graph->inputs()) {
     MS_EXCEPTION_IF_NULL(item);
     if (!item->isa<Parameter>()) {
@@ -247,7 +239,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
         output_type_id = AnfAlgo::GetOutputInferDataType(item, index);
       }
       auto tensor_size = CountNodeDeviceMemorySize(item, index);
-      auto ptr = MallocStaticMem(tensor_size, false);
+      auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size);
       auto address = CreateDeviceAddress(ptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       AnfAlgo::SetOutputAddr(address, index, item.get());
     }
@@ -301,6 +293,7 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
 
 void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(node);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   auto output_sizes = kernel_mod->GetOutputSizeList();
@@ -314,12 +307,12 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr
   std::vector<size_t> align_size_list;
   for (uint64_t mem_size : output_sizes) {
     if (context_ptr->enable_hccl()) {
-      mem_size = GetCommonAlignSize(mem_size);
+      mem_size = mem_manager_->GetCommonAlignSize(mem_size);
     }
     total_size += mem_size;
     align_size_list.emplace_back(mem_size);
   }
-  uint8_t *output_ptr = CalDeviceMem(node, total_size, flag, 0);
+  uint8_t *output_ptr = mem_manager_->MallocOutputMem(node, 0, flag, total_size);
   for (size_t j = 0; j < align_size_list.size(); ++j) {
     std::string output_format = AnfAlgo::GetOutputFormat(node, j);
     auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j);
@@ -333,6 +326,7 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   size_t total_size = 0;
   std::vector<std::pair<mindspore::device::DeviceAddress *, size_t>> addr_size;
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(node); ++i) {
@@ -340,12 +334,12 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
     MS_EXCEPTION_IF_NULL(address);
     auto mem_size = address->size();
     if (context_ptr->enable_hccl()) {
-      mem_size = GetCommonAlignSize(mem_size);
+      mem_size = mem_manager_->GetCommonAlignSize(mem_size);
     }
     total_size += mem_size;
     addr_size.emplace_back(address.get(), mem_size);
   }
-  uint8_t *input_ptr = CalDeviceMem(node, total_size, kDynamicMem, 0);
+  uint8_t *input_ptr = mem_manager_->MallocOutputMem(node, 0, kDynamicMem, total_size);
   for (const auto &iter : addr_size) {
     MS_EXCEPTION_IF_NULL(iter.first);
     iter.first->set_ptr(input_ptr);
@@ -355,7 +349,8 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) {
 
 void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) {
   MS_EXCEPTION_IF_NULL(node);
-  if (IsCommunicationOp(node)) {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  if (AnfAlgo::IsCommunicationOp(node)) {
     UpdateCommunicationOpInputMem(node);
     AssignCommunicationNodeOutputMem(flag, node);
     return;
@@ -375,7 +370,7 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in
       MS_LOG(INFO) << "Already malloc index:" << i;
       continue;
     }
-    auto ptr = CalDeviceMem(node, output_sizes[i], flag, i);
+    auto ptr = mem_manager_->MallocOutputMem(node, i, flag, output_sizes[i]);
     if (ptr == nullptr) {
       // reused ptr, no need alloc, continue;
       continue;
@@ -390,6 +385,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
                                           size_t output_idx) {
   MS_EXCEPTION_IF_NULL(value_node);
   MS_EXCEPTION_IF_NULL(node_value);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto tensor = node_value->cast<TensorPtr>();
   if (tensor == nullptr) {
     MS_LOG(WARNING) << "Tensor is null";
@@ -397,7 +393,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
   }
   size_t tensor_size = tensor->data().nbytes();
   auto node_size = CountNodeDeviceMemorySize(value_node, output_idx);
-  auto ptr = MallocStaticMem(node_size, false);
+  auto ptr = mem_manager_->MallocMem(kStaticMem, node_size);
   TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx);
   if (output_type_id == kTypeUnknown) {
     output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx);
@@ -414,6 +410,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
 
 void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   for (auto &value_node : graph->graph_value_nodes()) {
     MS_EXCEPTION_IF_NULL(value_node);
     if (AnfAlgo::OutputAddrExist(value_node, 0)) {
@@ -440,7 +437,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
     } else if (node_value->isa<StringImm>()) {
       auto value = GetValue<std::string>(node_value);
       size_t tensor_size = value.size();
-      auto ptr = MallocStaticMem(tensor_size, false);
+      auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size);
       auto address = CreateDeviceAddress(ptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8);
       MS_EXCEPTION_IF_NULL(address);
       AnfAlgo::SetOutputAddr(address, 0, value_node.get());
@@ -452,103 +449,37 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
   }
 }
 
-void KernelRuntime::AssignDynamicMemory(const session::KernelGraph *graph) {
+void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  // reset dynamic mem offset
-  dynamic_mem_offset_ = 0;
-  auto &kernels = graph->execution_order();
-  for (auto &kernel : kernels) {
-    AssignNodeOutputMem(kDynamicMem, kernel, kGetAllOuts);
-    AssignWorkSpaceMem(kernel);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
+  auto mem_flag = kDynamicMem;
+  if (is_enable_mem_reuse) {
+    mem_manager_->InitReuseDynamicMemory(graph);
+    mem_flag = kReuseDynamicMem;
   }
-}
-
-void KernelRuntime::ReuseAssignDynamicMemory(session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  dynamic_mem_offset_ = 0;
-  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
-  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
-  // set all infos
-  mem_reuse_util_ptr->SetAllInfo(graph);
-  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
-  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
-  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
-  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
-  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
-  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
-  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
-  mem_reuse_util_ptr_->set_mem_base(base_ptr);
   auto &kernels = graph->execution_order();
   for (auto &kernel : kernels) {
-    AssignNodeOutputMem(kReuseDynamicMem, kernel, kGetAllOuts);
-    AssignReuseWorkSpaceMem(kernel);
+    AssignNodeOutputMem(mem_flag, kernel, kGetAllOuts);
+    AssignWorkSpaceMem(mem_flag, kernel);
   }
 }
 
-void KernelRuntime::AssignReuseWorkSpaceMem(const AnfNodePtr &node) {
+void KernelRuntime::AssignWorkSpaceMem(int flag, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(node);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   size_t index = 0;
   for (auto &size : kernel_mod->GetWorkspaceSizeList()) {
-    auto wk_ptr = mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
-    AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(wk_ptr, size, "", kTypeUnknown), index, node.get());
+    auto ptr = mem_manager_->MallocWorkSpaceMem(node, flag, index, size);
+    AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get());
     index++;
   }
 }
 
-void KernelRuntime::AssignWorkSpaceMem(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (node->isa<CNode>()) {
-    auto kernel_mod = AnfAlgo::GetKernelMod(node);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
-    size_t index = 0;
-    for (auto &size : kernel_mod->GetWorkspaceSizeList()) {
-      auto ptr = MallocDynamicMem(size, false);
-      AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get());
-      index++;
-    }
-  }
-}
-
-bool KernelRuntime::IsCommunicationOp(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto kernel_name = AnfAlgo::GetCNodeName(node);
-  auto kernel_type = AnfAlgo::GetKernelType(node);
-  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
-    return true;
-  }
-  return false;
-}
-
-uint8_t *KernelRuntime::CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  uint8_t *ptr = nullptr;
-  if (IsCommunicationOp(node)) {
-    bool communication_mem = false;
-    if (context_ptr->enable_hccl()) {
-      communication_mem = true;
-    }
-    if (flag == kStaticMem) {
-      ptr = MallocStaticMem(size, communication_mem);
-    } else {
-      ptr = MallocDynamicMem(size, communication_mem);
-    }
-    return ptr;
-  }
-
-  if (flag == kStaticMem) {
-    ptr = MallocStaticMem(size, false);
-  } else if (flag == kDynamicMem) {
-    ptr = MallocDynamicMem(size, false);
-  } else if (flag == kReuseDynamicMem) {
-    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
-  }
-  return ptr;
-}
-
 void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
                                   AddressPtrList *kernel_inputs, AddressPtrList *const kernel_workspaces,
                                   AddressPtrList *kernel_outputs) {
@@ -659,65 +590,6 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
   return true;
 }
 
-size_t KernelRuntime::GetCommonAlignSize(size_t input_size) const {
-  return (input_size + mem_align_size_ + 31) / mem_align_size_ * mem_align_size_;
-}
-
-size_t KernelRuntime::GetCommunicationAlignSize(size_t input_size) const {
-  return (input_size + mem_align_size_ - 1) / mem_align_size_ * mem_align_size_ + 2 * mem_align_size_;
-}
-
-uint8_t *KernelRuntime::MallocStaticMem(size_t size, bool communication_mem) {
-  size_t align_size = 0;
-  if (communication_mem) {
-    align_size = GetCommunicationAlignSize(size);
-  } else {
-    align_size = GetCommonAlignSize(size);
-  }
-  if (static_mem_offset_ < align_size) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_static_size_ += align_size;
-  auto offset = static_mem_offset_ - align_size;
-  if (dynamic_mem_offset_ > offset) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  static_mem_offset_ = offset;
-  if (communication_mem) {
-    return device_mem_base_ + offset + mem_align_size_;
-  } else {
-    return device_mem_base_ + offset;
-  }
-}
-
-uint8_t *KernelRuntime::MallocDynamicMem(size_t size, bool communication_mem) {
-  size_t align_size = 0;
-  if (communication_mem) {
-    align_size = GetCommunicationAlignSize(size);
-  } else {
-    align_size = GetCommonAlignSize(size);
-  }
-  uint64_t offset = dynamic_mem_offset_;
-  auto new_offset = dynamic_mem_offset_ + align_size;
-  if (new_offset > static_mem_offset_) {
-    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
-                      << "] static[" << total_static_size_ << "])"
-                      << " malloc [" << align_size << "] failed!";
-  }
-  total_dynamic_size_ += align_size;
-  dynamic_mem_offset_ = new_offset;
-
-  if (communication_mem) {
-    return device_mem_base_ + offset + mem_align_size_;
-  } else {
-    return device_mem_base_ + offset;
-  }
-}
-
 bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   if (!LaunchKernelMod(*graph)) {
@@ -731,29 +603,6 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) {
   return true;
 }
 
-void KernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) {
-  if (flag == kStaticMem) {
-    address->ptr_ = MallocStaticMem(size, false);
-  } else if (flag == kDynamicMem) {
-    address->ptr_ = MallocDynamicMem(size, false);
-  } else {
-    MS_LOG(EXCEPTION) << "Unknown memory type!";
-  }
-}
-
-void *KernelRuntime::AllocTensorMemDynamic(size_t size) {
-  if (size == 0) {
-    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
-  }
-  return nullptr;
-}
-
-void KernelRuntime::FreeTensorMemDynamic(void *device_ptr) {
-  if (device_ptr == nullptr) {
-    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
-  }
-}
-
 #ifdef ENABLE_DUMP_E2E
 bool KernelRuntime::SetDumpConf() {
   dump_conf_ptr_ = std::make_shared<Dump>();
diff --git a/mindspore/ccsrc/device/kernel_runtime.h b/mindspore/ccsrc/device/kernel_runtime.h
index ac9a56ed4d8ff91521cbb70ba87c4990005942f4..1224bf14ebdc6e6dd80864f911f2b9179b6f38d6 100644
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/device/kernel_runtime.h
@@ -20,8 +20,7 @@
 #include <memory>
 #include <string>
 #include <map>
-#include "pre_activate/mem_reuse/mem_reuse.h"
-#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+
 #include "device/device_address.h"
 #include "ir/meta_tensor.h"
 #include "predict/generator/utils/ir_model_util.h"
@@ -32,21 +31,16 @@
 #include "session/anf_runtime_algorithm.h"
 #include "kernel/kernel.h"
 #include "utils/context/ms_context.h"
+#include "device/memory_manager.h"
 
 // using mindspore::session::KernelGraph;
 using mindspore::tensor::Tensor;
 using TensorPtr = std::shared_ptr<Tensor>;
-using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 using mindspore::kernel::AddressPtr;
 using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
 
 namespace mindspore {
 namespace device {
-const int kStaticMem = 0;
-const int kDynamicMem = 1;
-const int kReuseDynamicMem = 2;
-const int kGetAllOuts = -1;
-
 class KernelRuntime {
  public:
   KernelRuntime() = default;
@@ -65,7 +59,6 @@ class KernelRuntime {
   DumpConfPtr GetDumpConf();
 #endif
   virtual bool LoadTask(const session::KernelGraph *graph);
-  virtual void FreeHostMemory();
   // for GPU and D to impl
   virtual void ReleaseDeviceRes() {}
   void set_device_id(uint32_t device_id) { device_id_ = device_id; }
@@ -75,29 +68,17 @@ class KernelRuntime {
                                                TypeId type_id) = 0;
   virtual bool SyncStream() = 0;
   void AssignStaticMemory(session::KernelGraph *graph);
-  void AssignDynamicMemory(const session::KernelGraph *graph);
+  void AssignDynamicMemory(session::KernelGraph *graph);
   void ReuseAssignDynamicMemory(session::KernelGraph *graph);
   void AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index);
-  void AssignWorkSpaceMem(const AnfNodePtr &node);
+  void AssignWorkSpaceMem(int flag, const AnfNodePtr &node);
   void AssignReuseWorkSpaceMem(const AnfNodePtr &node);
   void AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node);
   void UpdateRefNodeOutputMem(const session::KernelGraph *graph);
   void UpdateCommunicationOpInputMem(const AnfNodePtr &node);
-  bool IsCommunicationOp(const AnfNodePtr &node);
-  size_t GetCommonAlignSize(size_t input_size) const;
-  size_t GetCommunicationAlignSize(size_t input_size) const;
-
-  uint8_t *CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index);
-  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
-  uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
 #ifdef ENABLE_DUMP_E2E
   bool SetDumpConf();
 #endif
-  // Alloc memory use the dynamic memory pool.
-  virtual void *AllocTensorMemDynamic(size_t size);
-  // Free memory use the dynamic memory pool.
-  virtual void FreeTensorMemDynamic(void *device_ptr);
-  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag);
 
  private:
   void AssignStaticMemoryOutput(const session::KernelGraph *graph);
@@ -114,20 +95,11 @@ class KernelRuntime {
 
  protected:
   uint32_t device_id_{0};
-  uint8_t *device_mem_base_{nullptr};
-  uint8_t *device_mem_pool_base_{nullptr};
-  uint64_t device_mem_size_{0};
-  uint64_t device_mem_pool_size_{0};
-  uint64_t dynamic_mem_offset_{0};
-  uint64_t static_mem_offset_{0};
-  const uint64_t mem_align_size_ = 512;
 #ifdef ENABLE_DUMP_E2E
   DumpConfPtr dump_conf_ptr_;
 #endif
   void *stream_ = nullptr;
-  size_t total_static_size_ = 0;
-  size_t total_dynamic_size_ = 0;
-  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
+  std::shared_ptr<MemoryManager> mem_manager_{nullptr};
 };
 using KernelRuntimePtr = std::shared_ptr<KernelRuntime>;
 }  // namespace device
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3c1ddee6bc5a654cb6b9d616e916eab53327bd9d
--- /dev/null
+++ b/mindspore/ccsrc/device/memory_manager.cc
@@ -0,0 +1,170 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/memory_manager.h"
+#include "session/anf_runtime_algorithm.h"
+#include "utils/context/ms_context.h"
+using mindspore::memreuse::BestFitMemReuse;
+using mindspore::memreuse::MemReuseUtilPtr;
+namespace mindspore {
+namespace device {
+MemoryManager::~MemoryManager() {
+  device_mem_base_ = nullptr;
+  device_mem_pool_base_ = nullptr;
+  mem_reuse_util_ptr_ = nullptr;
+}
+
+size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
+  return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
+}
+
+size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
+  return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
+}
+
+void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
+  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
+  // set all infos
+  mem_reuse_util_ptr->SetAllInfo(graph);
+  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
+  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
+  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
+  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
+  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
+  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
+  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
+  mem_reuse_util_ptr_->set_mem_base(base_ptr);
+}
+
+uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  uint8_t *ptr = nullptr;
+  if (AnfAlgo::IsCommunicationOp(node)) {
+    bool communication_mem = false;
+    if (context_ptr->enable_hccl()) {
+      communication_mem = true;
+    }
+    if (flag == kStaticMem) {
+      ptr = MallocStaticMem(size, communication_mem);
+    } else {
+      ptr = MallocDynamicMem(size, communication_mem);
+    }
+    return ptr;
+  }
+
+  if (flag == kStaticMem) {
+    ptr = MallocStaticMem(size, false);
+  } else if (flag == kDynamicMem) {
+    ptr = MallocDynamicMem(size, false);
+  } else if (flag == kReuseDynamicMem) {
+    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
+  }
+  return ptr;
+}
+
+uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
+  if (flag == kReuseDynamicMem) {
+    return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
+  }
+  return MallocDynamicMem(size, false);
+}
+
+uint8_t *MemoryManager::MallocMem(int flag, size_t size) {
+  uint8_t *ptr = nullptr;
+  if (flag == kStaticMem) {
+    ptr = MallocStaticMem(size, false);
+  } else if (flag == kDynamicMem) {
+    ptr = MallocDynamicMem(size, false);
+  }
+  return ptr;
+}
+
+uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  if (static_mem_offset_ < align_size) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_static_size_ += align_size;
+  auto offset = static_mem_offset_ - align_size;
+  if (dynamic_mem_offset_ > offset) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  static_mem_offset_ = offset;
+  if (communication_mem) {
+    return device_mem_base_ + offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + offset;
+  }
+}
+
+uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  uint64_t offset = dynamic_mem_offset_;
+  auto new_offset = dynamic_mem_offset_ + align_size;
+  if (new_offset > static_mem_offset_) {
+    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
+                      << "] static[" << total_static_size_ << "])"
+                      << " malloc [" << align_size << "] failed!";
+  }
+  total_dynamic_size_ += align_size;
+  dynamic_mem_offset_ = new_offset;
+
+  if (communication_mem) {
+    return device_mem_base_ + offset + kMemAlignSize;
+  } else {
+    return device_mem_base_ + offset;
+  }
+}
+
+void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) {
+  auto device_ptr = AllocTensorMemDynamic(size);
+  MS_EXCEPTION_IF_NULL(device_ptr);
+  address->ptr_ = device_ptr;
+  address->mem_dynamic_alloc_ = true;
+}
+
+void *MemoryManager::AllocTensorMemDynamic(size_t size) {
+  if (size == 0) {
+    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
+  }
+  return nullptr;
+}
+
+void MemoryManager::FreeTensorMemDynamic(void *device_ptr) {
+  if (device_ptr == nullptr) {
+    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
+  }
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..2e47237defa747b80c5e971289ca92d4f6ae88a4
--- /dev/null
+++ b/mindspore/ccsrc/device/memory_manager.h
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
+#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
+#include <memory>
+#include "pre_activate/mem_reuse/mem_reuse.h"
+#include "pre_activate/mem_reuse/mem_reuse_allocator.h"
+namespace mindspore {
+namespace device {
+const int kStaticMem = 0;
+const int kDynamicMem = 1;
+const int kReuseDynamicMem = 2;
+const int kGetAllOuts = -1;
+const uint64_t kMemAlignSize = 512;
+using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
+
+class MemoryManager {
+ public:
+  MemoryManager() = default;
+  virtual ~MemoryManager();
+
+  virtual void MallocDeviceMemory() = 0;
+  virtual void FreeDeviceMemory() = 0;
+  void ResetDynamicMemory() {
+    total_dynamic_size_ = 0;
+    dynamic_mem_offset_ = 0;
+  }
+
+  void InitReuseDynamicMemory(session::KernelGraph *graph);
+  uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
+  uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
+  virtual uint8_t *MallocMem(int flag, size_t size);
+
+  // Alloc memory use the dynamic memory pool.
+  virtual void *AllocTensorMemDynamic(size_t size);
+  // Free memory use the dynamic memory pool.
+  virtual void FreeTensorMemDynamic(void *device_ptr);
+  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size);
+  size_t GetCommonAlignSize(size_t input_size) const;
+  size_t GetCommunicationAlignSize(size_t input_size) const;
+
+ protected:
+  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
+  virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
+  uint8_t *device_mem_base_{nullptr};
+  uint8_t *device_mem_pool_base_{nullptr};
+  uint64_t device_mem_size_{0};
+  uint64_t device_mem_pool_size_{0};
+  uint64_t dynamic_mem_offset_{0};
+  uint64_t static_mem_offset_{0};
+  size_t total_static_size_ = 0;
+  size_t total_dynamic_size_ = 0;
+  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index cc23dbbdd2f336eadc6cae2659bd75b36ec0b811..78922448afa09f0fa792cbf438b508f040406cdd 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -857,5 +857,15 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i
   MS_EXCEPTION_IF_NULL(input_node);
   node->set_input(index + 1, input_node);
 }
+
+bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto kernel_name = AnfAlgo::GetCNodeName(node);
+  auto kernel_type = AnfAlgo::GetKernelType(node);
+  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
+    return true;
+  }
+  return false;
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h
index 2de68f0098939f8c02b2fc6c3ab096ac52363094..55650ac31e312a040878b46a7b6d71758e88bb6a 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h
@@ -166,6 +166,7 @@ class AnfRuntimeAlgorithm {
   static bool IsFeatureMapInput(const AnfNodePtr &node, size_t input_index);
   // get real input index for some tbe ops which input order is different between me and tbe impl
   static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index);
+  static bool IsCommunicationOp(const AnfNodePtr &node);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc
index 29330fb19384c58611eb6ace7767f6d90109fdb7..bbcf2228cceb1d72a96db0cccef34f703d2cb616 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@@ -102,10 +102,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
   graph->set_execution_order(execution_order);
   // Alloc memory, including static memory and dynamic memory
   AllocateMemory(graph.get());
-  // Reset memory resource
-  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  runtime_instance->FreeHostMemory();
   return graph_id;
 }
 
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 8d3f8a8138d94a8beb8c71ceb3958bde041b4e33..3c1351a85732106a8c7a2a48616b7079fc640166 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -85,6 +85,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "../../../mindspore/ccsrc/kernel/oplib/*.cc"
         "../../../mindspore/ccsrc/kernel/tbe/*.cc"
         "../../../mindspore/ccsrc/device/kernel_runtime.cc"
+        "../../../mindspore/ccsrc/device/memory_manager.cc"
         "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc"
         "../../../mindspore/ccsrc/device/kernel_info.cc"
         "../../../mindspore/ccsrc/device/ascend/profiling/*.cc"
@@ -92,6 +93,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "../../../mindspore/ccsrc/device/convert_tensor_utils.cc"
         "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
+        "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
         "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc"
         "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"