Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
7241f3a4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7241f3a4
编写于
4年前
作者:
M
mindspore-ci-bot
提交者:
Gitee
4年前
浏览文件
操作
浏览文件
下载
差异文件
!1401 change gpu kernel runtime to support memory swap
Merge pull request !1401 from zyli2020/add_mem_swap_module_header
上级
8e36a445
23a57476
master
r0.3
r0.5
r0.6
r0.7
v0.7.0-beta
v0.6.0-beta
v0.5.0-beta
v0.3.1-alpha
v0.3.0-alpha
无相关合并请求
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
262 addition
and
48 deletion
+262
-48
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+215
-20
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+14
-1
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
+28
-24
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
+5
-3
未找到文件。
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
浏览文件 @
7241f3a4
...
...
@@ -28,10 +28,13 @@
#include "common/utils.h"
#include "device/gpu/gpu_memory_manager.h"
#include "kernel/common_utils.h"
#include "device/gpu/gpu_memory_copy_manager.h"
namespace
mindspore
{
namespace
device
{
namespace
gpu
{
using
mindspore
::
device
::
memswap
::
MemSwapManager
;
using
mindspore
::
device
::
memswap
::
SwapKind
;
bool
GPUKernelRuntime
::
SyncStream
()
{
return
GPUDeviceManager
::
GetInstance
().
SyncStream
(
stream_
);
}
bool
GPUKernelRuntime
::
Init
()
{
...
...
@@ -101,6 +104,12 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
}
CHECK_OP_RET_WITH_EXCEPT
(
GpuBufferMgr
::
GetInstance
().
Destroy
(),
"Could not destroy gpu data queue."
);
}
// destroy remaining memory swap events and free host memory
if
(
mem_swap_manager_
->
trigger_swap
())
{
mem_swap_manager_
->
ClearSwapQueue
();
mem_swap_manager_
->
ReleaseHostPinnedMem
();
}
GPUDeviceManager
::
GetInstance
().
ReleaseDevice
();
if
(
mem_manager_
!=
nullptr
)
{
mem_manager_
->
FreeDeviceMemory
();
...
...
@@ -126,15 +135,29 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
}
bool
GPUKernelRuntime
::
Run
(
session
::
KernelGraph
*
graph
)
{
bool
ret
;
bool
ret
=
true
;
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
bool
is_enable_dynamic_mem
=
context_ptr
->
enable_dynamic_mem_pool
();
bool
is_enable_pynative_infer
=
context_ptr
->
enable_pynative_infer
();
auto
iter
=
mem_swap_map_
.
find
(
graph
);
if
(
iter
==
mem_swap_map_
.
end
())
{
GPUMemCopyManagerPtr
gpu_mem_copy_manager
=
std
::
make_shared
<
GPUMemCopyManager
>
();
iter
=
mem_swap_map_
.
emplace
(
graph
,
std
::
make_shared
<
MemSwapManager
>
(
gpu_mem_copy_manager
)).
first
;
}
mem_swap_manager_
=
iter
->
second
;
struct
timeval
start_time
,
end_time
;
(
void
)
gettimeofday
(
&
start_time
,
nullptr
);
if
(
is_enable_dynamic_mem
&&
!
is_enable_pynative_infer
)
{
ret
=
LaunchKernelDynamic
(
graph
);
while
(
!
LaunchKernelDynamic
(
graph
))
{
ClearKernelOutputAddress
(
graph
);
if
(
!
mem_swap_manager_
->
mem_swap_init
())
{
mem_swap_manager_
->
Init
(
graph
);
}
if
(
!
mem_swap_manager_
->
RetreatSwapInfo
())
{
return
false
;
}
}
}
else
{
ret
=
LaunchKernel
(
graph
);
}
...
...
@@ -181,6 +204,27 @@ void GPUKernelRuntime::InitKernelOutputAddress(const session::KernelGraph *graph
}
}
void
GPUKernelRuntime
::
ClearKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
)
{
MS_EXCEPTION_IF_NULL
(
graph
);
auto
&
kernels
=
graph
->
execution_order
();
for
(
const
auto
&
kernel
:
kernels
)
{
auto
kernel_mod
=
AnfAlgo
::
GetKernelMod
(
kernel
);
MS_EXCEPTION_IF_NULL
(
kernel_mod
);
auto
output_sizes
=
kernel_mod
->
GetOutputSizeList
();
for
(
size_t
i
=
0
;
i
<
output_sizes
.
size
();
++
i
)
{
if
(
!
AnfAlgo
::
OutputAddrExist
(
kernel
,
i
))
{
continue
;
}
auto
device_address
=
AnfAlgo
::
GetMutableOutputAddr
(
kernel
,
i
);
if
(
device_address
->
ptr_
)
{
mem_manager_
->
FreeMemFromMemPool
(
device_address
);
}
device_address
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
}
}
bool
GPUKernelRuntime
::
LaunchKernelDynamic
(
const
session
::
KernelGraph
*
graph
)
{
MS_EXCEPTION_IF_NULL
(
graph
);
auto
graph_id
=
graph
->
graph_id
();
...
...
@@ -198,32 +242,157 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph) {
AddressPtrList
kernel_inputs
;
AddressPtrList
kernel_workspaces
;
AddressPtrList
kernel_outputs
;
AllocKernelDynamicRes
(
*
kernel_mod
,
kernel
,
&
kernel_inputs
,
&
kernel_workspaces
,
&
kernel_outputs
);
if
(
!
kernel_mod
->
Launch
(
kernel_inputs
,
kernel_workspaces
,
kernel_outputs
,
stream_
))
{
MS_LOG
(
ERROR
)
<<
"Launch kernel failed."
;
auto
ret
=
AllocKernelDynamicRes
(
*
kernel_mod
,
kernel
,
&
kernel_inputs
,
&
kernel_workspaces
,
&
kernel_outputs
);
if
(
!
ret
)
{
return
false
;
}
if
(
!
kernel_mod
->
Launch
(
kernel_inputs
,
kernel_workspaces
,
kernel_outputs
,
stream_
))
{
MS_LOG
(
EXCEPTION
)
<<
"Launch kernel failed."
;
}
FreeKernelDynamicRes
(
kernel
,
kernel_workspaces
,
graph_id
);
if
(
mem_swap_manager_
->
trigger_swap
()
&&
mem_swap_manager_
->
QueryKernelTriggerSwap
(
kernel
))
{
CHECK_OP_RET_WITH_EXCEPT
(
SyncStream
(),
"SyncStream failed."
);
if
(
!
AddMemSwapTask
(
kernel
))
{
return
false
;
}
}
if
(
mem_swap_manager_
->
trigger_swap
())
{
mem_swap_manager_
->
SyncMemCopyStream
(
SwapKind
::
kDeviceToHost
);
}
}
if
(
!
SyncStream
())
{
MS_LOG
(
ERROR
)
<<
"SyncStream failed."
;
return
false
;
CHECK_OP_RET_WITH_EXCEPT
(
SyncStream
(),
"SyncStream failed."
);
if
(
mem_swap_manager_
->
trigger_swap
())
{
mem_swap_manager_
->
ClearSwapQueue
();
}
return
true
;
}
bool
GPUKernelRuntime
::
AddMemSwapTask
(
const
AnfNodePtr
&
kernel
)
{
auto
&
mem_swap_info_list
=
mem_swap_manager_
->
QueryKernelMemSwapInfo
(
kernel
);
for
(
auto
&
mem_swap_info
:
mem_swap_info_list
)
{
auto
&
kernel_exec_info
=
mem_swap_manager_
->
SearchKernelExecutionInfo
(
mem_swap_info
.
kernel_
);
const
HostAddress
&
host_address
=
kernel_exec_info
.
host_addrs_
[
mem_swap_info
.
output_idx_
];
auto
device_address
=
AnfAlgo
::
GetMutableOutputAddr
(
mem_swap_info
.
kernel_
,
mem_swap_info
.
output_idx_
);
if
(
mem_swap_info
.
swap_kind_
==
SwapKind
::
kDeviceToHost
)
{
mem_swap_manager_
->
AddMemSwapTask
(
SwapKind
::
kDeviceToHost
,
device_address
,
host_address
);
}
else
if
(
mem_swap_info
.
swap_kind_
==
SwapKind
::
kHostToDevice
)
{
auto
status
=
device_address
->
status
();
if
(
status
==
DeviceAddressStatus
::
kInDeviceToHost
)
{
mem_swap_manager_
->
InsertSwapInBlackList
(
device_address
->
ptr_
);
device_address
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
else
if
(
status
==
DeviceAddressStatus
::
kInHost
)
{
if
(
!
device_address
->
ptr_
&&
!
AttemptMallocMem
(
device_address
,
device_address
->
size_
))
{
return
false
;
}
if
(
!
mem_swap_manager_
->
FindInSwapInBlackList
(
device_address
->
ptr_
))
{
mem_swap_manager_
->
AddMemSwapTask
(
SwapKind
::
kHostToDevice
,
device_address
,
host_address
);
}
}
}
}
return
true
;
}
bool
GPUKernelRuntime
::
AttemptMallocMem
(
const
DeviceAddressPtr
&
device_address
,
size_t
size
)
{
auto
ret
=
mem_manager_
->
MallocMemFromMemPool
(
device_address
,
size
);
if
(
!
ret
)
{
if
(
!
mem_swap_manager_
->
trigger_swap
())
{
return
false
;
}
mem_swap_manager_
->
SyncMemCopyStream
(
SwapKind
::
kDeviceToHost
);
while
(
auto
device_address_swap_out
=
mem_swap_manager_
->
UpdateSwapQueue
(
SwapKind
::
kDeviceToHost
))
{
if
(
!
mem_swap_manager_
->
FindInSwapInBlackList
(
device_address_swap_out
->
ptr_
)
&&
device_address_swap_out
->
ptr_
)
{
device_address_swap_out
->
set_status
(
DeviceAddressStatus
::
kInHost
);
mem_manager_
->
FreeMemFromMemPool
(
device_address_swap_out
);
}
}
ret
=
mem_manager_
->
MallocMemFromMemPool
(
device_address
,
size
);
if
(
!
ret
)
{
return
false
;
}
}
return
true
;
}
void
GPUKernelRuntime
::
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
void
*
GPUKernelRuntime
::
AttemptMallocMem
(
size_t
size
)
{
auto
device_ptr
=
mem_manager_
->
MallocMemFromMemPool
(
size
);
if
(
!
device_ptr
)
{
if
(
!
mem_swap_manager_
->
trigger_swap
())
{
return
nullptr
;
}
mem_swap_manager_
->
SyncMemCopyStream
(
SwapKind
::
kDeviceToHost
);
while
(
auto
device_address_swap_out
=
mem_swap_manager_
->
UpdateSwapQueue
(
SwapKind
::
kDeviceToHost
))
{
if
(
!
mem_swap_manager_
->
FindInSwapInBlackList
(
device_address_swap_out
->
ptr_
)
&&
device_address_swap_out
->
ptr_
)
{
device_address_swap_out
->
set_status
(
DeviceAddressStatus
::
kInHost
);
mem_manager_
->
FreeMemFromMemPool
(
device_address_swap_out
);
}
}
device_ptr
=
mem_manager_
->
MallocMemFromMemPool
(
size
);
if
(
!
device_ptr
)
{
return
nullptr
;
}
}
return
device_ptr
;
}
bool
GPUKernelRuntime
::
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
AddressPtrList
*
kernel_workspaces
,
AddressPtrList
*
kernel_outputs
)
{
if
(
!
AllocKernelInputDynamicRes
(
kernel
,
kernel_inputs
))
{
return
false
;
}
if
(
!
AllocKernelOutputDynamicRes
(
kernel_mod
,
kernel
,
kernel_outputs
))
{
return
false
;
}
if
(
!
AllocKernelWorkspaceDynamicRes
(
kernel_mod
,
kernel
,
kernel_workspaces
))
{
return
false
;
}
return
true
;
}
bool
GPUKernelRuntime
::
AllocKernelInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
)
{
MS_EXCEPTION_IF_NULL
(
kernel
);
MS_EXCEPTION_IF_NULL
(
kernel_inputs
);
MS_EXCEPTION_IF_NULL
(
kernel_workspaces
);
MS_EXCEPTION_IF_NULL
(
kernel_outputs
);
MS_EXCEPTION_IF_NULL
(
mem_manager_
);
for
(
size_t
i
=
0
;
i
<
AnfAlgo
::
GetInputTensorNum
(
kernel
);
++
i
)
{
auto
device_address
=
AnfAlgo
::
GetPrevNodeOutputAddr
(
kernel
,
i
);
auto
device_address
=
AnfAlgo
::
GetPrevNode
Mutable
OutputAddr
(
kernel
,
i
);
MS_EXCEPTION_IF_NULL
(
device_address
);
if
(
mem_swap_manager_
->
trigger_swap
())
{
while
(
auto
device_address_swap_in
=
mem_swap_manager_
->
UpdateSwapQueue
(
SwapKind
::
kHostToDevice
))
{
device_address_swap_in
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
auto
status
=
device_address
->
status
();
switch
(
status
)
{
case
DeviceAddressStatus
::
kInDevice
:
break
;
case
DeviceAddressStatus
::
kInHost
:
break
;
case
DeviceAddressStatus
::
kInDeviceToHost
:
{
mem_swap_manager_
->
InsertSwapInBlackList
(
device_address
->
ptr_
);
device_address
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
break
;
}
case
DeviceAddressStatus
::
kInHostToDevice
:
{
while
(
device_address
->
status
()
!=
DeviceAddressStatus
::
kInDevice
)
{
while
(
auto
device_address_swap_in
=
mem_swap_manager_
->
UpdateSwapQueue
(
SwapKind
::
kHostToDevice
))
{
device_address_swap_in
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
}
break
;
}
default:
MS_LOG
(
ERROR
)
<<
"Invaild device address status"
;
return
false
;
}
}
MS_EXCEPTION_IF_NULL
(
device_address
->
ptr_
);
kernel
::
AddressPtr
input
=
std
::
make_shared
<
kernel
::
Address
>
();
MS_EXCEPTION_IF_NULL
(
input
);
...
...
@@ -231,15 +400,29 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
input
->
size
=
device_address
->
size_
;
kernel_inputs
->
emplace_back
(
input
);
}
return
true
;
}
bool
GPUKernelRuntime
::
AllocKernelOutputDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_outputs
)
{
MS_EXCEPTION_IF_NULL
(
kernel
);
MS_EXCEPTION_IF_NULL
(
kernel_outputs
);
MS_EXCEPTION_IF_NULL
(
mem_manager_
);
if
(
mem_swap_manager_
->
trigger_swap
())
{
while
(
auto
device_address_swap_out
=
mem_swap_manager_
->
UpdateSwapQueue
(
SwapKind
::
kDeviceToHost
))
{
if
(
!
mem_swap_manager_
->
FindInSwapInBlackList
(
device_address_swap_out
->
ptr_
)
&&
device_address_swap_out
->
ptr_
)
{
device_address_swap_out
->
set_status
(
DeviceAddressStatus
::
kInHost
);
mem_manager_
->
FreeMemFromMemPool
(
device_address_swap_out
);
}
}
}
auto
output_sizes
=
kernel_mod
.
GetOutputSizeList
();
for
(
size_t
i
=
0
;
i
<
output_sizes
.
size
();
++
i
)
{
auto
device_address
=
AnfAlgo
::
GetMutableOutputAddr
(
kernel
,
i
);
MS_EXCEPTION_IF_NULL
(
device_address
);
if
(
device_address
->
ptr_
==
nullptr
)
{
auto
ret
=
mem_manager_
->
MallocMemFromMemPool
(
device_address
,
output_sizes
[
i
]);
if
(
!
ret
)
{
MS_LOG
(
EXCEPTION
)
<<
"Malloc device memory failed."
;
}
if
(
device_address
->
ptr_
==
nullptr
&&
!
AttemptMallocMem
(
device_address
,
output_sizes
[
i
]))
{
return
false
;
}
kernel
::
AddressPtr
output
=
std
::
make_shared
<
kernel
::
Address
>
();
MS_EXCEPTION_IF_NULL
(
output
);
...
...
@@ -247,15 +430,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
output
->
size
=
output_sizes
[
i
];
kernel_outputs
->
emplace_back
(
output
);
}
return
true
;
}
bool
GPUKernelRuntime
::
AllocKernelWorkspaceDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_workspaces
)
{
MS_EXCEPTION_IF_NULL
(
kernel
);
MS_EXCEPTION_IF_NULL
(
kernel_workspaces
);
MS_EXCEPTION_IF_NULL
(
mem_manager_
);
auto
workspace_sizes
=
kernel_mod
.
GetWorkspaceSizeList
();
for
(
size_t
i
=
0
;
i
<
workspace_sizes
.
size
();
++
i
)
{
if
(
workspace_sizes
[
i
]
==
0
)
{
kernel_workspaces
->
emplace_back
(
nullptr
);
continue
;
}
auto
device_ptr
=
mem_manager_
->
MallocMemFromMemPool
(
workspace_sizes
[
i
]);
auto
device_ptr
=
AttemptMallocMem
(
workspace_sizes
[
i
]);
if
(
!
device_ptr
)
{
MS_LOG
(
EXCEPTION
)
<<
"Malloc device memory failed."
;
return
false
;
}
kernel
::
AddressPtr
workspace
=
std
::
make_shared
<
kernel
::
Address
>
();
MS_EXCEPTION_IF_NULL
(
workspace
);
...
...
@@ -263,6 +455,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
workspace
->
size
=
workspace_sizes
[
i
];
kernel_workspaces
->
emplace_back
(
workspace
);
}
return
true
;
}
void
GPUKernelRuntime
::
AllocCommunicationOpDynamicRes
(
const
session
::
KernelGraph
*
graph
)
{
...
...
@@ -371,6 +564,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
if
(
kernel_ref_count_ptr
->
ref_count_dynamic_use_
==
0
)
{
auto
device_address
=
AnfAlgo
::
GetPrevNodeMutableOutputAddr
(
kernel
,
i
);
mem_manager_
->
FreeMemFromMemPool
(
device_address
);
device_address
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
}
// Free the output of kernel, if output has no reference.
...
...
@@ -382,6 +576,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
if
(
kernel_ref_count_ptr
->
ref_count_dynamic_use_
==
0
)
{
auto
device_address
=
AnfAlgo
::
GetMutableOutputAddr
(
kernel
,
i
);
mem_manager_
->
FreeMemFromMemPool
(
device_address
);
device_address
->
set_status
(
DeviceAddressStatus
::
kInDevice
);
}
}
// Free the workspace of kernel.
...
...
This diff is collapsed.
Click to expand it.
mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
浏览文件 @
7241f3a4
...
...
@@ -24,10 +24,12 @@
#include <unordered_map>
#include "device/kernel_runtime.h"
#include "device/kernel_runtime_manager.h"
#include "pre_activate/mem_reuse/mem_swap_manager.h"
namespace
mindspore
{
namespace
device
{
namespace
gpu
{
using
mindspore
::
device
::
memswap
::
MemSwapManagerPtr
;
class
GPUKernelRuntime
:
public
KernelRuntime
{
public:
GPUKernelRuntime
()
=
default
;
...
...
@@ -51,10 +53,19 @@ class GPUKernelRuntime : public KernelRuntime {
// The related functions and members for using dynamic memory pool.
void
InitKernelRefCount
(
const
session
::
KernelGraph
*
graph
);
void
InitKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
void
ClearKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
bool
LaunchKernelDynamic
(
const
session
::
KernelGraph
*
graph
);
void
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
bool
AddMemSwapTask
(
const
AnfNodePtr
&
kernel
);
bool
AttemptMallocMem
(
const
DeviceAddressPtr
&
device_address
,
size_t
size
);
void
*
AttemptMallocMem
(
size_t
size
);
bool
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
AddressPtrList
*
kernel_workspaces
,
AddressPtrList
*
kernel_outputs
);
bool
AllocKernelInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
);
bool
AllocKernelOutputDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_outputs
);
bool
AllocKernelWorkspaceDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_workspaces
);
void
AllocCommunicationOpDynamicRes
(
const
session
::
KernelGraph
*
graph
);
void
AllocCommunicationOpInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
void
AllocCommunicationOpOutputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
...
...
@@ -64,6 +75,8 @@ class GPUKernelRuntime : public KernelRuntime {
void
FreeKernelDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
,
const
AddressPtrList
&
kernel_workspaces
,
uint32_t
graph_id
);
std
::
unordered_map
<
uint32_t
,
MemReuseUtilPtr
>
mem_reuse_util_map_
;
std
::
unordered_map
<
void
*
,
MemSwapManagerPtr
>
mem_swap_map_
;
MemSwapManagerPtr
mem_swap_manager_
{
nullptr
};
};
MS_REG_KERNEL_RUNTIME
(
kGPUDevice
,
GPUKernelRuntime
);
}
// namespace gpu
...
...
This diff is collapsed.
Click to expand it.
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.cc
浏览文件 @
7241f3a4
...
...
@@ -25,10 +25,7 @@ namespace memswap {
void
MemSwapManager
::
Init
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
execution_order_
=
kernel_graph
->
execution_order
();
FuncGraphManagerPtr
manager
=
kernel_graph
->
manager
();
NodeUsersMap
user_map
=
manager
->
node_users
();
size_t
kernel_index
=
0
;
for
(
const
auto
&
kernel
:
execution_order_
)
{
// parse topo order of kernel
kernel_execution_info_
.
emplace
(
kernel
.
get
(),
kernel_index
++
);
...
...
@@ -44,6 +41,31 @@ void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) {
}
// parse topo order of user kernel
SaveUserKernelTopoOrder
(
kernel_graph
);
sort
(
ordered_tensors_
.
begin
(),
ordered_tensors_
.
end
(),
[](
const
TensorInfo
&
a
,
const
TensorInfo
&
b
)
{
return
a
.
tensor_size_
>
b
.
tensor_size_
;
});
auto
cur_tensor_size
=
ordered_tensors_
.
front
().
tensor_size_
;
for
(
auto
&
tensor_info
:
ordered_tensors_
)
{
if
(
cur_tensor_size
!=
tensor_info
.
tensor_size_
)
{
cur_tensor_size
=
tensor_info
.
tensor_size_
;
tensor_size_num_
++
;
}
}
tensor_size_threshold_
=
ordered_tensors_
.
front
().
tensor_size_
;
tensor_size_threshold_idx_
=
0
;
distance_threshold_
=
kernel_index
/
kDistanceInitFactor
;
mem_swap_initialized_
=
true
;
MS_EXCEPTION_IF_NULL
(
mem_copy_manager_
);
mem_copy_manager_
->
Init
();
}
void
MemSwapManager
::
SaveUserKernelTopoOrder
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
FuncGraphManagerPtr
manager
=
kernel_graph
->
manager
();
NodeUsersMap
user_map
=
manager
->
node_users
();
for
(
const
auto
&
kernel
:
execution_order_
)
{
auto
iter
=
user_map
.
find
(
kernel
);
if
(
iter
==
user_map
.
end
())
{
...
...
@@ -66,24 +88,6 @@ void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) {
sort
(
node_user_pair
.
second
.
begin
(),
node_user_pair
.
second
.
end
());
}
}
sort
(
ordered_tensors_
.
begin
(),
ordered_tensors_
.
end
(),
[](
const
TensorInfo
&
a
,
const
TensorInfo
&
b
)
{
return
a
.
tensor_size_
>
b
.
tensor_size_
;
});
auto
cur_tensor_size
=
ordered_tensors_
.
front
().
tensor_size_
;
for
(
auto
&
tensor_info
:
ordered_tensors_
)
{
if
(
cur_tensor_size
!=
tensor_info
.
tensor_size_
)
{
cur_tensor_size
=
tensor_info
.
tensor_size_
;
tensor_size_num_
++
;
}
}
tensor_size_threshold_
=
ordered_tensors_
.
front
().
tensor_size_
;
tensor_size_threshold_idx_
=
0
;
distance_threshold_
=
kernel_index
/
kDistanceInitFactor
;
mem_swap_initialized_
=
true
;
MS_EXCEPTION_IF_NULL
(
mem_copy_manager_
);
mem_copy_manager_
->
Init
();
}
void
MemSwapManager
::
AddSwapInfo
()
{
...
...
@@ -228,12 +232,12 @@ float MemSwapManager::QueryKernelExecutionPerform(const AnfNodePtr &kernel) cons
return
kernel_exec_info
.
execution_perform_
;
}
bool
MemSwapManager
::
QueryKerneTriggerSwap
(
const
AnfNodePtr
&
kernel
)
const
{
bool
MemSwapManager
::
QueryKerne
l
TriggerSwap
(
const
AnfNodePtr
&
kernel
)
const
{
const
auto
&
kernel_exec_info
=
SearchKernelExecutionInfo
(
kernel
);
return
kernel_exec_info
.
trigger_swap_
;
}
bool
MemSwapManager
::
QueryKerneNeedSwap
(
const
AnfNodePtr
&
kernel
)
const
{
bool
MemSwapManager
::
QueryKerne
l
NeedSwap
(
const
AnfNodePtr
&
kernel
)
const
{
const
auto
&
kernel_exec_info
=
SearchKernelExecutionInfo
(
kernel
);
return
kernel_exec_info
.
need_swap_
;
}
...
...
@@ -254,7 +258,7 @@ const PerformPair &MemSwapManager::QueryKernelSwapPerform(const AnfNodePtr &kern
return
iter_output
->
second
;
}
const
std
::
vector
<
MemSwapInfo
>
&
MemSwapManager
::
QueryKerneMemSwapInfo
(
const
AnfNodePtr
&
kernel
)
const
{
const
std
::
vector
<
MemSwapInfo
>
&
MemSwapManager
::
QueryKerne
l
MemSwapInfo
(
const
AnfNodePtr
&
kernel
)
const
{
MS_EXCEPTION_IF_NULL
(
kernel
);
auto
iter
=
mem_swap_info_
.
find
(
kernel
.
get
());
if
(
iter
==
mem_swap_info_
.
end
())
{
...
...
This diff is collapsed.
Click to expand it.
mindspore/ccsrc/pre_activate/mem_reuse/mem_swap_manager.h
浏览文件 @
7241f3a4
...
...
@@ -63,11 +63,11 @@ class MemSwapManager {
const
PerformPair
&
QueryKernelSwapPerform
(
const
AnfNodePtr
&
kernel
,
size_t
output_idx
)
const
;
bool
QueryKerneTriggerSwap
(
const
AnfNodePtr
&
kernel
)
const
;
bool
QueryKerne
l
TriggerSwap
(
const
AnfNodePtr
&
kernel
)
const
;
bool
QueryKerneNeedSwap
(
const
AnfNodePtr
&
kernel
)
const
;
bool
QueryKerne
l
NeedSwap
(
const
AnfNodePtr
&
kernel
)
const
;
const
std
::
vector
<
MemSwapInfo
>
&
QueryKerneMemSwapInfo
(
const
AnfNodePtr
&
kernel
)
const
;
const
std
::
vector
<
MemSwapInfo
>
&
QueryKerne
l
MemSwapInfo
(
const
AnfNodePtr
&
kernel
)
const
;
void
InsertSwapInBlackList
(
const
void
*
device_ptr
);
...
...
@@ -90,6 +90,8 @@ class MemSwapManager {
void
ResetSwapInfo
();
void
SaveUserKernelTopoOrder
(
const
mindspore
::
session
::
KernelGraph
*
kernel_graph
);
void
AddKernelTriggerSwap
(
const
AnfNodePtr
&
kernel
,
bool
trigger_swap
);
void
AddKernelNeedSwap
(
const
AnfNodePtr
&
kernel
,
bool
need_swap
);
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部