diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..5e62e5086429c32d44e5f95c3272465ce09bef87 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h @@ -0,0 +1,123 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_MANAGER_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_MANAGER_H_ + +#include +#include +#include +#include +#include +#include +#include "pre_activate/mem_reuse/mem_swap_util.h" + +using PerformPair = std::pair; +namespace mindspore { +namespace device { +namespace memswap { +class MemSwapManager { + public: + MemSwapManager() + : tensor_size_threshold_(0), tensor_size_threshold_idx_(0), tensor_size_num_(1), distance_threshold_(1) { + mem_copy_manager_ = std::make_shared(); + } + + ~MemSwapManager() = default; + + void Init(const mindspore::session::KernelGraph *kernel_graph); + + void AddMemSwapTask(SwapKind swap_kind, const DeviceAddressPtr &device_address, const HostAddress &host_address); + + bool SyncMemCopyStream(SwapKind swap_kind); + + DeviceAddressPtr UpdateSwapQueue(SwapKind swap_kind); + + // retreat to find a workable swap scheme + bool RetreatSwapInfo(); + + bool trigger_swap() const { return trigger_swap_; } + + bool mem_swap_init() const { return mem_swap_initialized_; } + + KernelExecutionInfo &SearchKernelExecutionInfo(const AnfNodePtr &kernel) const; + + void AddKernelExecutionPerform(const AnfNodePtr &kernel, float perform); + + float QueryKernelExecutionPerform(const AnfNodePtr &kernel) const; + + void AddKernelSwapPerform(const AnfNodePtr &kernel, size_t output_idx, const PerformPair &perform); + + const PerformPair &QueryKernelSwapPerform(const AnfNodePtr &kernel, size_t output_idx) const; + + bool QueryKerneTriggerSwap(const AnfNodePtr &kernel) const; + + bool QueryKerneNeedSwap(const AnfNodePtr &kernel) const; + + const std::vector &QueryKerneMemSwapInfo(const AnfNodePtr &kernel) const; + + void InsertSwapInBlackList(const void *device_ptr); + + bool FindInSwapInBlackList(const void *device_ptr) const; + + const HostAddress &kernel_host_addr(const AnfNodePtr &kernel, size_t output_idx) const; + + size_t AllocHostPinnedMemory(size_t size, HostMemPtr *addr) const; + + void FreeHostPinnedMemory(); + + void ClearSwapQueue(); + + private: + MemSwapManager(const MemSwapManager &) = delete; + + MemSwapManager &operator=(const MemSwapManager &) = delete; + + void AddSwapInfo(); + + void ResetSwapInfo(); + + void AddKernelTriggerSwap(const AnfNodePtr &kernel, bool trigger_swap); + + void AddKernelNeedSwap(const AnfNodePtr &kernel, bool need_swap); + + void AddKernelMemSwapInfo(const AnfNodePtr &kernel, const MemSwapInfo &mem_swap_info); + + std::vector execution_order_; + std::vector ordered_tensors_; + std::unordered_map kernel_execution_info_; + std::unordered_map> kernel_swap_perform_; + // trigger swap kernel key : MemSwapInfo of kernel need to be swapped + std::unordered_map> mem_swap_info_; + std::vector host_addrs_list_; + std::unordered_set swap_in_blacklist_; + + size_t tensor_size_threshold_; + size_t tensor_size_threshold_idx_; + size_t tensor_size_num_; + size_t distance_threshold_; + + MemCopyManagerPtr mem_copy_manager_{nullptr}; + bool mem_swap_initialized_{false}; + bool swap_info_already_set_{false}; + bool trigger_swap_{false}; +}; +using MemSwapManagerPtr = std::shared_ptr; +} // namespace memswap +} // namespace device +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_MANAGER_H_ diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_util.h b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_util.h new file mode 100644 index 0000000000000000000000000000000000000000..35a36d6ab5c6d5d2047ef6b4eea10e2f783e2c55 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_util.h @@ -0,0 +1,104 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_ + +#include +#include +#include +#include +#include +#include "session/kernel_graph.h" +#include "device/gpu/cuda_driver.h" +#include "kernel/kernel.h" + +using mindspore::device::gpu::DeviceEvent; +using mindspore::device::gpu::DeviceMemPtr; +using mindspore::device::gpu::DeviceStream; +using mindspore::device::gpu::HostMemPtr; +using HostAddress = mindspore::kernel::Address; +namespace mindspore { +namespace device { +namespace memswap { +enum class SwapKind { kDeviceToHost = 0, kHostToDevice = 1 }; + +struct TensorInfo { + size_t tensor_size_{0}; + AnfNodePtr kernel_{nullptr}; + size_t output_idx_{0}; +}; + +struct KernelExecutionInfo { + size_t topo_order_{0}; + float execution_perform_{0.0}; + bool trigger_swap_{false}; + bool need_swap_{false}; + // output index to topo orders of node users + std::map> node_users_map_; + // kernel output idx to host addr + std::map host_addrs_; + + KernelExecutionInfo() : KernelExecutionInfo(0, 0.0, false, false) {} + explicit KernelExecutionInfo(size_t topo_order) + : topo_order_(topo_order), execution_perform_(0.0), trigger_swap_(false), need_swap_(false) {} + KernelExecutionInfo(size_t topo_order, float execution_perform, bool trigger_swap, bool need_swap) + : topo_order_(topo_order), + execution_perform_(execution_perform), + trigger_swap_(trigger_swap), + need_swap_(need_swap) {} +}; + +// trigger swap +struct MemSwapInfo { + SwapKind swap_kind_; + // kernel need to be swapped + AnfNodePtr kernel_{nullptr}; + size_t output_idx_{0}; +}; + +class MemCopyManager { + public: + MemCopyManager() = default; + + ~MemCopyManager() = default; + + void Init(); + + void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr); + + void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr); + + bool SyncMemCopyStream(SwapKind swap_kind); + + DeviceAddressPtr UpdateSwapOutQueue(); + + DeviceAddressPtr UpdateSwapInQueue(); + + void ClearSwapQueue(); + + private: + DeviceStream swap_out_stream_{nullptr}; + DeviceStream swap_in_stream_{nullptr}; + std::queue> swap_out_queue_; + std::queue> swap_in_queue_; +}; +using MemCopyManagerPtr = std::shared_ptr; +} // namespace memswap +} // namespace device +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_