Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
5e2f440e
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e2f440e
编写于
5月 27, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 27, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1271 refactor of memreuse allocator to adapt the control stream
Merge pull request !1271 from yangjie159/refactor_memreuse_allocator
上级
e7936ded
cbf5390b
变更
13
展开全部
隐藏空白更改
内联
并排
Showing
13 changed file
with
319 addition
and
536 deletion
+319
-536
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
+1
-1
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h
+5
-1
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
+28
-0
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
+3
-0
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
...spore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
+150
-245
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
+64
-34
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
+59
-15
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
+7
-1
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.cc
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.cc
+0
-102
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.h
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.h
+0
-63
tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
...ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
+2
-5
tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
+0
-6
tests/ut/cpp/pre_activate/mem_reuse/stream_reuse_test.cc
tests/ut/cpp/pre_activate/mem_reuse/stream_reuse_test.cc
+0
-63
未找到文件。
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.cc
浏览文件 @
5e2f440e
...
...
@@ -47,7 +47,7 @@ std::vector<int> KernelDef::GetOutputRefIndexs() const {
return
output_ref_indexs
;
}
std
::
vector
<
int
>
KernelDef
::
GetW
k
RefIndexs
()
const
{
std
::
vector
<
int
>
KernelDef
::
GetW
orkspace
RefIndexs
()
const
{
std
::
vector
<
int
>
wk_ref_indexs
;
if
(
wk_space_
.
empty
())
{
return
wk_ref_indexs
;
...
...
mindspore/ccsrc/pre_activate/mem_reuse/kernel_refcount.h
浏览文件 @
5e2f440e
...
...
@@ -20,6 +20,7 @@
#include <map>
#include <string>
#include <memory>
#include <set>
namespace
mindspore
{
namespace
memreuse
{
...
...
@@ -73,13 +74,15 @@ class KernelDef {
KernelRefCountPtrList
output_refs
()
const
{
return
output_refs_
;
}
std
::
vector
<
int
>
GetInputRefIndexs
()
const
;
std
::
vector
<
int
>
GetOutputRefIndexs
()
const
;
std
::
vector
<
int
>
GetW
k
RefIndexs
()
const
;
std
::
vector
<
int
>
GetW
orkspace
RefIndexs
()
const
;
void
set_stream_id
(
uint32_t
stream_id
)
{
stream_id_
=
stream_id
;
}
uint32_t
stream_id
()
const
{
return
stream_id_
;
}
void
set_kernel_name
(
const
std
::
string
&
kernel_name
)
{
kernel_name_
=
kernel_name
;
}
std
::
string
kernel_name
()
const
{
return
kernel_name_
;
}
void
set_scope_full_name
(
const
std
::
string
&
scop_name
)
{
scop_full_name_
=
scop_name
;
}
std
::
string
scope_full_name
()
const
{
return
scop_full_name_
;
}
void
InsertInputKernel
(
const
std
::
shared_ptr
<
KernelDef
>
&
input_kernel
)
{
input_kernels_
.
insert
(
input_kernel
);
}
const
std
::
set
<
std
::
shared_ptr
<
KernelDef
>>
&
input_kernels
()
{
return
input_kernels_
;
}
private:
std
::
string
scop_full_name_
;
...
...
@@ -87,6 +90,7 @@ class KernelDef {
uint32_t
stream_id_
{
0
};
KernelRefCountPtrList
input_refs_
;
KernelRefCountPtrList
output_refs_
;
std
::
set
<
std
::
shared_ptr
<
KernelDef
>>
input_kernels_
;
};
using
KernelDefPtr
=
std
::
shared_ptr
<
KernelDef
>
;
}
// namespace memreuse
...
...
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.cc
浏览文件 @
5e2f440e
...
...
@@ -245,6 +245,34 @@ void MemReuseUtil::SetKernelDefMap() {
kernel_def_ptr
->
set_input_refs
(
kernel_def_ptr
->
inputs_
[
key
]);
kernel_def_ptr
->
set_output_refs
(
kernel_def_ptr
->
outputs_
[
key
]);
kernel_def_ptr_list_
.
push_back
(
kernel_def_ptr
);
kernel_map_
[
key
]
=
kernel_def_ptr
;
}
SetKernelDefInputs
();
}
void
MemReuseUtil
::
SetKernelDefInputs
()
{
for
(
const
auto
&
kernel
:
graph_
->
execution_order
())
{
auto
key
=
kernel
.
get
();
// find kernel_def according to cnode addr
auto
iter
=
kernel_map_
.
find
(
key
);
if
(
iter
==
kernel_map_
.
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"kernel ["
<<
kernel
->
fullname_with_scope
()
<<
"] is not init."
;
}
auto
kernel_def
=
iter
->
second
;
for
(
size_t
i
=
0
;
i
<
AnfAlgo
::
GetInputTensorNum
(
kernel
);
++
i
)
{
auto
ref_ptr
=
GetKernelInputRef
(
kernel
,
i
);
if
(
ref_ptr
!=
nullptr
)
{
// set the inputs of this kernel_def
auto
input_node
=
AnfAlgo
::
GetInputNode
(
kernel
,
i
);
auto
input
=
AnfAlgo
::
VisitKernel
(
input_node
,
0
);
auto
input_key
=
(
input
.
first
).
get
();
auto
input_iter
=
kernel_map_
.
find
(
input_key
);
if
(
input_iter
==
kernel_map_
.
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"kernel ["
<<
(
input
.
first
)
->
fullname_with_scope
()
<<
"] is not init."
;
}
kernel_def
->
InsertInputKernel
(
input_iter
->
second
);
}
}
}
}
...
...
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse.h
浏览文件 @
5e2f440e
...
...
@@ -61,6 +61,7 @@ class MemReuseUtil {
void
SetInputMap
(
const
CNodePtr
&
kernel
,
KernelDef
*
kernel_def_ptr
);
void
SetOutputMap
(
const
CNodePtr
&
kernel
,
KernelDef
*
kernel_def_ptr
);
void
SetWkMap
(
const
CNodePtr
&
kernel
,
KernelDef
*
kernel_def_ptr
);
void
SetKernelDefInputs
();
void
SetReuseRefCount
();
// Set the reference count of graph output specially.
void
SetGraphOutputRefCount
();
...
...
@@ -94,6 +95,8 @@ class MemReuseUtil {
size_t
total_workspace_size_
=
0
;
size_t
total_reuseworkspace_size_
=
0
;
uint8_t
*
mem_base_
{
nullptr
};
// kernel_map_: key is the AnfNodePtr addr, value is the KernelDef
std
::
map
<
KernelKey
,
KernelDefPtr
>
kernel_map_
;
};
using
MemReuseUtilPtr
=
std
::
shared_ptr
<
MemReuseUtil
>
;
}
// namespace memreuse
...
...
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
浏览文件 @
5e2f440e
此差异已折叠。
点击以展开。
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.h
浏览文件 @
5e2f440e
...
...
@@ -29,31 +29,30 @@
#include <unordered_map>
#include <unordered_set>
#include <set>
#include <queue>
#include "pre_activate/mem_reuse/kernel_refcount.h"
#include "pre_activate/mem_reuse/mem_reuse.h"
#include "pre_activate/mem_reuse/stream_reuse.h"
namespace
mindspore
{
namespace
memreuse
{
static
constexpr
int
kW
k
IndexFactor
=
-
1000
;
static
constexpr
int
kDy
Fac
=
-
1
;
static
constexpr
int
kW
kFac
=
1
;
static
constexpr
int
kW
orkspace
IndexFactor
=
-
1000
;
static
constexpr
int
kDy
namicMem
=
-
1
;
static
constexpr
int
kW
orkspaceMem
=
1
;
static
constexpr
size_t
kTotalSize
=
0
;
enum
Status
{
kUnused
,
kReused
};
class
Membuf
{
public:
Membuf
()
=
default
;
Membuf
(
uint32_t
stream_id
,
Status
status
,
size_t
size
,
size_t
offset
,
int
index
)
:
st
ream_id_
(
stream_id
),
status_
(
status
),
size_
(
size
),
offset_
(
offset
),
index_
(
index
)
{}
Membuf
(
Status
status
,
size_t
size
,
size_t
offset
,
int
index
,
const
KernelDefPtr
&
used_kernel
)
:
st
atus_
(
status
),
size_
(
size
),
offset_
(
offset
),
index_
(
index
),
used_kernel_
(
used_kernel
)
{}
~
Membuf
()
=
default
;
// Memory block status flags
std
::
set
<
uint32_t
>
called_stream_ids_
;
uint32_t
stream_id_
{
0
};
Status
status_
=
kUnused
;
size_t
size_
{
0
};
size_t
offset_
{
0
};
// Store the tensor index stored in this memory block at a certain moment
int
index_
{
0
};
KernelDefPtr
used_kernel_
;
};
using
MembufPtr
=
std
::
shared_ptr
<
Membuf
>
;
...
...
@@ -61,24 +60,45 @@ class BestFitMemReuse {
public:
BestFitMemReuse
()
=
default
;
~
BestFitMemReuse
()
{
membuf_ptr_list_
.
clear
();
}
// Init all information need by memory reuse
/**
* Init all information need by memory reuse
* @param mem_reuse_util_ptr, initialize in the memreuse.cc
*/
void
InitMemReuseInfo
(
const
MemReuseUtil
*
mem_reuse_util_ptr
);
bool
CheckMembufIndx
(
const
std
::
vector
<
MembufPtr
>
&
membuf_ptr_list
,
size_t
check_idx
)
const
;
bool
IsMembufListEmpty
(
const
std
::
vector
<
MembufPtr
>
&
membuf_ptr_list
)
const
;
void
AssignNodeWkOffset
(
const
KernelDef
*
kernel_def_ptr
);
void
ReleasePreNodeWkSpace
(
const
KernelDef
*
kernel_def_ptr
);
// void assign node output tensor memory offset
void
AssignNodeOutputOffset
(
const
KernelDef
*
kernel_def_ptr
);
void
ReleaseParallStream
();
// update node input tensor refcount, and membuf list status
void
UpdateNodeInputAndMembuf
(
const
KernelDef
*
kernel_def_ptr
);
// check node output tensor which refcount is equal to zero
void
ReleaseNodeUnusedOutput
(
const
KernelDef
*
kernel_def_ptr
);
// If there are memory blocks that can be reused
void
CheckMembufIndx
(
size_t
check_idx
)
const
;
void
AssignNodeWorkspaceOffset
();
void
ReleasePreNodeWorkspace
(
const
KernelDef
*
kernel_def_ptr
);
/**
* Assign output tensor memory offset of current kernel
*/
void
AssignNodeOutputOffset
();
/**
* Update input tensor's status of current kernel, and the status of membuf used by current kernel
*/
void
UpdateNodeInputAndMembuf
();
/**
* Check whether to release the kernel output tensor which refcount is equal to zero
*/
void
ReleaseNodeUnusedOutput
();
/**
* Reuse the exist membuf if possible
* @param tensor_desc, the output tensor of current kernel
* @param membuf_index, the index of membuf to be reused
* @param flag
*/
void
ReuseExistMembuf
(
KernelRefCount
*
tensor_desc
,
size_t
membuf_index
,
int
flag
);
// Save memory blocks that can be reused to the map
/**
* Get the membuf that can be reused
* @param tensor_size, the size of the tensor ready to assign memory offset
* @return membuf map, key: the membuf size, value: the membuf index
*/
std
::
map
<
size_t
,
size_t
>
GetReusableMembufMap
(
size_t
tensor_size
);
// Update the status of the reused memory block
/**
* Update the status of the reused memory block
* @param tensor_desc, the tensor ready to assign memory
* @param membuf, the membuf to be reused
* @param flag, distinguish dynamic memory and workspace
*/
void
UpdateMembufInfo
(
KernelRefCount
*
tensor_desc
,
Membuf
*
membuf
,
int
flag
);
// If the size of the memory block is greater than the size of the tensor, split the extra memory
void
SplitMembuf
(
const
KernelRefCount
*
tensor_desc
,
size_t
membuf_index
);
...
...
@@ -88,30 +108,39 @@ class BestFitMemReuse {
void
AddNewMembufPtr
(
KernelRefCount
*
tensor_desc
,
int
flag
);
// Merge unused membuf
void
ReleaseMembuf
(
size_t
tensor_index
,
int
flag
);
bool
HasParallelId
(
const
std
::
set
<
uint32_t
>
&
called_ids
,
uint32_t
curr_id
);
void
MergeCalledIds
(
const
Membuf
*
membuf_target
,
Membuf
*
membuf
);
// Memory address alignment 512
size_t
AlignMemorySize
(
size_t
size
)
const
;
int
GetFacIdx
(
size_t
real_idx
,
int
flag
=
kDyFac
)
const
;
int
GetRealIdx
(
int
fac_idx
,
int
flag
=
kDyFac
)
const
;
size_t
FindIndx
(
const
std
::
vector
<
MembufPtr
>
&
membuf_ptr_list
,
int
fac_idx
)
const
;
void
CheckTensorIndex
(
int
tensor_index
)
const
;
int
GetRealIndex
(
size_t
index
,
int
flag
=
kDynamicMem
)
const
;
size_t
GetTensorIndex
(
int
index
)
const
;
size_t
GetWorkspaceIndex
(
int
index
)
const
;
// Memory reuse main program entry
void
Reuse
(
const
MemReuseUtil
*
mem_reuse_util_ptr
);
// Get the total memory that needs to be applied eventually
size_t
GetAllocatedSize
();
// If the target stream can be reused by current stream
bool
IsReusableStream
(
uint32_t
curr_stream_id
,
uint32_t
target_stream_id
);
// return false, when the node output cannot be released
bool
IsRelease
(
const
std
::
string
&
kernel_name
);
bool
IsRelease
();
/**
* determine if the kernel_curr can reuse the output tensor add of kernel_prev
* @param kernel_curr, current kernel
* @param kernel_prev, the membuf used by this kernel
* @return bool
*/
bool
IsUsable
(
const
KernelDefPtr
&
kernel_curr
,
const
KernelDefPtr
&
kernel_prev
);
/**
* init the dependence of all kernels in the graph
*/
void
InitKernelDependence
();
// set tensor_def and op_def
void
set_tensor_ptr_list
(
const
std
::
vector
<
KernelRefCountPtr
>
&
tensor_ptr_list
)
{
tensor_ptr_list_
=
tensor_ptr_list
;
}
void
set_workspace_ptr_list
(
const
std
::
vector
<
KernelRefCountPtr
>
&
workspace_ptr_list
)
{
wk_tensor_list_
=
workspace_ptr_list
;
}
void
set_op_ptr_list
(
const
std
::
vector
<
KernelDefPtr
>
&
op_ptr_list
)
{
op_ptr_list_
=
op_ptr_list
;
}
private:
uint32_t
current_stream_id_
{
0
}
;
KernelDefPtr
current_kernel_
;
// Save all tensor information
std
::
vector
<
KernelRefCountPtr
>
tensor_ptr_list_
;
std
::
vector
<
KernelRefCountPtr
>
wk_tensor_list_
;
...
...
@@ -119,7 +148,8 @@ class BestFitMemReuse {
std
::
vector
<
KernelDefPtr
>
op_ptr_list_
;
// Memory block information sequence, temporary variables
std
::
vector
<
MembufPtr
>
membuf_ptr_list_
;
std
::
unordered_map
<
uint32_t
,
std
::
unordered_set
<
uint32_t
>>
parallel_streams_map_
;
// kernel_front_map_, key: the kernel_def, value: kernels before this kernel_def
std
::
map
<
KernelDefPtr
,
std
::
set
<
KernelDefPtr
>>
kernel_front_map_
;
};
}
// namespace memreuse
}
// namespace mindspore
...
...
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.cc
浏览文件 @
5e2f440e
...
...
@@ -19,8 +19,6 @@
#include <vector>
#include <utility>
#include <string>
#include <unordered_map>
#include <unordered_set>
namespace
mindspore
{
namespace
memreuse
{
...
...
@@ -188,6 +186,27 @@ void MemReuseChecker::CheckMemReuseIR(const KernelRefCountPtrList &total_refs_li
ofs
.
close
();
}
void
MemReuseChecker
::
ExportKernelDependence
()
{
std
::
string
filename
=
"./memreuse_dependence.ir"
;
std
::
ofstream
ofs
(
filename
);
if
(
!
ofs
.
is_open
())
{
MS_LOG
(
ERROR
)
<<
"Open file ["
<<
filename
<<
"] failed!"
;
return
;
}
size_t
i
=
0
;
for
(
const
auto
&
kernel_front
:
kernel_front_map_
)
{
auto
kernel
=
kernel_front
.
first
;
auto
front
=
kernel_front
.
second
;
ofs
<<
"["
<<
i
++
<<
"] "
<<
kernel
->
scope_full_name
()
<<
"
\n
"
;
for
(
const
auto
&
node
:
front
)
{
ofs
<<
node
->
scope_full_name
()
<<
"
\n
"
;
}
ofs
<<
"
\n\n
"
;
}
ofs
.
close
();
}
bool
MemReuseChecker
::
CheckGraphOutputAssigned
(
const
session
::
KernelGraph
*
graph
)
{
// set real graph output node to be special who's refcount equal kMaxRefCount
for
(
const
auto
&
output
:
graph
->
outputs
())
{
...
...
@@ -393,7 +412,7 @@ void MemReuseChecker::CheckNormalIR(const session::KernelGraph *graph) {
void
MemReuseChecker
::
SetMembuInfos
(
const
KernelDef
*
op_def
,
const
std
::
vector
<
MembufPtr
>
&
membuf_ptr_list
)
{
std
::
vector
<
MembufPtr
>
curr_mem_infos
;
for
(
const
auto
&
mem
:
membuf_ptr_list
)
{
auto
mem_checker
=
std
::
make_shared
<
Membuf
>
(
mem
->
st
ream_id_
,
mem
->
status_
,
mem
->
size_
,
mem
->
offset_
,
mem
->
index
_
);
auto
mem_checker
=
std
::
make_shared
<
Membuf
>
(
mem
->
st
atus_
,
mem
->
size_
,
mem
->
offset_
,
mem
->
index_
,
mem
->
used_kernel
_
);
curr_mem_infos
.
push_back
(
mem_checker
);
}
membuf_all_infos_
.
push_back
(
curr_mem_infos
);
...
...
@@ -407,7 +426,7 @@ void MemReuseChecker::SetAddNewMembuInfos(const KernelDef *op_def, const std::ve
std
::
vector
<
MembufPtr
>
add_new_curr_mem
;
for
(
const
auto
&
mem
:
membuf_ptr_list
)
{
auto
mem_checker
=
std
::
make_shared
<
Membuf
>
(
mem
->
st
ream_id_
,
mem
->
status_
,
mem
->
size_
,
mem
->
offset_
,
mem
->
index
_
);
auto
mem_checker
=
std
::
make_shared
<
Membuf
>
(
mem
->
st
atus_
,
mem
->
size_
,
mem
->
offset_
,
mem
->
index_
,
mem
->
used_kernel
_
);
add_new_curr_mem
.
push_back
(
mem_checker
);
}
add_new_mem_infos_
.
push_back
(
add_new_curr_mem
);
...
...
@@ -424,11 +443,11 @@ void MemReuseChecker::ExportMembufInfoIR() {
if
(
!
ofs
.
is_open
())
{
MS_LOG
(
ERROR
)
<<
"Open file ["
<<
ir_file_name
<<
"] failed!"
;
}
ofs
<<
"
total_ori_static_size:
"
<<
total_ori_static_size_
<<
"
\n
"
;
ofs
<<
"
total_ori_weight_size:
"
<<
total_ori_input_size_
<<
"
\n
"
;
ofs
<<
"
total_ori_constant_size:
"
<<
total_ori_value_size_
<<
"
\n
"
;
ofs
<<
"
total_ori_dy_size:
"
<<
total_ori_dy_size_
<<
"
\n
"
;
ofs
<<
"
total_ori_wkspace_size:
"
<<
total_ori_wkspace_size_
<<
"
\n
"
;
ofs
<<
"
Total static size:
\t
"
<<
total_ori_static_size_
<<
"
\n
"
;
ofs
<<
"
Graph inputs size:
\t
"
<<
total_ori_input_size_
<<
"
\n
"
;
ofs
<<
"
Value nodes size:
\t
"
<<
total_ori_value_size_
<<
"
\n
"
;
ofs
<<
"
Total dynamic size:
\t
"
<<
total_ori_dy_size_
<<
"
\n
"
;
ofs
<<
"
Total workspace size:
\t
"
<<
total_ori_wkspace_size_
<<
"
\n
"
;
// get last membuf_list
if
(
membuf_all_infos_
.
empty
())
{
return
;
...
...
@@ -438,8 +457,10 @@ void MemReuseChecker::ExportMembufInfoIR() {
auto
checker_size
=
SizeToLong
(
membuf
->
size_
);
total_reuse_size
+=
checker_size
;
}
ofs
<<
"
total_reuse_size:"
<<
total_reuse_size
<<
"
\n
"
;
ofs
<<
"
After reuse size:
\t
"
<<
total_reuse_size
<<
"
\n
\n
"
;
size_t
i
=
0
;
std
::
vector
<
size_t
>
each_node_used_size
;
std
::
vector
<
size_t
>
each_node_allocated_size
;
for
(
const
auto
&
curr_membuf_list
:
membuf_all_infos_
)
{
ofs
<<
all_split_names_
.
at
(
i
)
<<
"
\n
"
;
++
i
;
...
...
@@ -449,17 +470,42 @@ void MemReuseChecker::ExportMembufInfoIR() {
<<
"tensor_idex
\t
"
<<
"mem_size
\t
"
<<
"mem_head
\t
"
<<
"mem_tail
\n
"
;
<<
"mem_tail
\t
"
<<
"used_kernel
\n
"
;
size_t
curr_used
=
0
;
size_t
curr_allocated
=
0
;
for
(
size_t
j
=
0
;
j
<
curr_membuf_list
.
size
();
++
j
)
{
auto
membuf
=
curr_membuf_list
.
at
(
j
);
auto
used_kernel
=
membuf
->
used_kernel_
->
scope_full_name
();
ofs
<<
"&"
<<
j
<<
"
\t
"
<<
"streamID[@"
<<
membuf
->
stream_id_
<<
"]"
<<
"streamID[@"
<<
membuf
->
used_kernel_
->
stream_id
()
<<
"]"
<<
"
\t
"
<<
"#"
<<
static_cast
<
int
>
(
membuf
->
status_
)
<<
"
\t
%"
<<
membuf
->
index_
<<
"T"
<<
"
\t
"
<<
membuf
->
size_
<<
"
\t
"
<<
membuf
->
offset_
<<
"
\t
"
<<
membuf
->
offset_
+
membuf
->
size_
<<
"
\n
"
;
<<
"
\t
"
<<
membuf
->
size_
<<
"
\t
"
<<
membuf
->
offset_
<<
"
\t
"
<<
membuf
->
offset_
+
membuf
->
size_
<<
"
\t
"
<<
GetSplitName
(
used_kernel
)
<<
"
\n
"
;
if
(
membuf
->
status_
==
kReused
)
{
curr_used
+=
membuf
->
size_
;
}
}
if
(
!
curr_membuf_list
.
empty
())
{
curr_allocated
=
curr_membuf_list
.
back
()
->
offset_
+
curr_membuf_list
.
back
()
->
size_
;
}
each_node_used_size
.
push_back
(
curr_used
);
each_node_allocated_size
.
push_back
(
curr_allocated
);
ofs
<<
"curr real used size:
\t
"
<<
curr_used
<<
"
\n
"
;
ofs
<<
"curr allocated size:
\t
"
<<
curr_allocated
<<
"
\n
"
;
ofs
<<
"
\n\n
"
;
}
ofs
<<
"each node used size:
\n
"
;
for
(
auto
size
:
each_node_used_size
)
{
ofs
<<
size
<<
"
\t
"
;
}
ofs
<<
"
\n\n
"
;
ofs
<<
"each node allocated size:
\n
"
;
for
(
auto
size
:
each_node_allocated_size
)
{
ofs
<<
size
<<
"
\t
"
;
}
ofs
<<
"
\n\n
"
;
ofs
.
close
();
}
...
...
@@ -479,7 +525,6 @@ void MemReuseChecker::ExportAddNewMmebufIR() {
<<
"
\n
"
;
i
++
;
ofs
<<
"mem_num
\t
"
<<
"stream_id
\t
"
<<
"status
\t
"
<<
"tensor_idex
\t
"
<<
"mem_size
\t
"
...
...
@@ -490,7 +535,6 @@ void MemReuseChecker::ExportAddNewMmebufIR() {
for
(
size_t
j
=
0
;
j
<
curr_membuf_list
.
size
();
++
j
)
{
auto
membuf
=
curr_membuf_list
.
at
(
j
);
ofs
<<
"&"
<<
j
<<
"
\t
"
<<
"streamID[@"
<<
membuf
->
stream_id_
<<
"]"
<<
"
\t
"
<<
"#"
<<
static_cast
<
int
>
(
membuf
->
status_
)
<<
"
\t
%"
<<
membuf
->
index_
<<
"T"
<<
"
\t
"
<<
membuf
->
size_
<<
"
\t
"
<<
membuf
->
offset_
<<
"
\t
"
<<
membuf
->
offset_
+
membuf
->
size_
<<
"
\t
"
;
...
...
mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_checker.h
浏览文件 @
5e2f440e
...
...
@@ -17,6 +17,7 @@
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_REUSE_CHECKER_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_REUSE_CHECKER_H_
#include <map>
#include <set>
#include <vector>
#include <string>
#include <memory>
...
...
@@ -59,10 +60,14 @@ class MemReuseChecker {
void
ExportMembufInfoIR
();
void
SetAddNewMembuInfos
(
const
KernelDef
*
op_def
,
const
std
::
vector
<
MembufPtr
>
&
membuf_ptr_list
,
size_t
op_idx
);
void
ExportAddNewMmebufIR
();
void
set_kernel_front_map
(
const
std
::
map
<
KernelDefPtr
,
std
::
set
<
KernelDefPtr
>>
&
kernel_front_map
)
{
kernel_front_map_
=
kernel_front_map
;
}
void
ExportKernelDependence
();
private:
MemReuseChecker
()
=
default
;
~
MemReuseChecker
()
{
MS_LOG
(
INFO
)
<<
"Total reused workspace size: "
<<
total_re_wkspe_size_checker_
;
}
~
MemReuseChecker
()
{}
size_t
total_re_wkspe_size_checker_
{
0
};
std
::
vector
<
std
::
vector
<
MembufPtr
>>
membuf_all_infos_
;
std
::
vector
<
const
void
*>
nor_output_tensors_
;
...
...
@@ -79,6 +84,7 @@ class MemReuseChecker {
std
::
vector
<
std
::
string
>
all_split_names_
;
std
::
map
<
int
,
std
::
vector
<
string
>>
tensor_from_
;
std
::
map
<
int
,
std
::
vector
<
string
>>
tensor_to_
;
std
::
map
<
KernelDefPtr
,
std
::
set
<
KernelDefPtr
>>
kernel_front_map_
;
int64_t
total_ori_static_size_
=
0
;
int64_t
total_ori_input_size_
=
0
;
int64_t
total_ori_value_size_
=
0
;
...
...
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.cc
已删除
100644 → 0
浏览文件 @
e7936ded
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/mem_reuse/stream_reuse.h"
namespace
mindspore
{
namespace
memreuse
{
void
StreamReuse
::
SetStreamReuseResource
()
{
#ifdef ENABLE_D
auto
logic_physic_map
=
device
::
ascend
::
AscendStreamAssign
::
GetInstance
().
logic_to_physic_map
();
auto
logic_independent_map
=
device
::
ascend
::
AscendStreamAssign
::
GetInstance
().
logic_to_independent_map
();
MS_LOG
(
INFO
)
<<
"stream mem reuse for Davici"
;
if
(
!
logic_independent_map
.
empty
()
&&
!
logic_physic_map
.
empty
())
{
set_logic_physic_map
(
logic_physic_map
);
set_logic_independent_map
(
logic_independent_map
);
InitReusableStreamMap
();
}
else
{
MS_LOG
(
INFO
)
<<
"Non task sink or No Parallel stream exists"
;
}
#endif
MS_LOG
(
INFO
)
<<
"no need to set stream mem reuse resource"
;
}
std
::
vector
<
std
::
pair
<
uint32_t
,
uint32_t
>>
StreamReuse
::
SortLogicPhysicMapToList
()
{
std
::
vector
<
std
::
pair
<
uint32_t
,
uint32_t
>>
logic_physic_list
;
(
void
)
std
::
transform
(
logic_physic_map_
.
begin
(),
logic_physic_map_
.
end
(),
std
::
back_inserter
(
logic_physic_list
),
[](
std
::
pair
<
uint32_t
,
uint32_t
>
log_phy
)
{
return
log_phy
;
});
std
::
sort
(
logic_physic_list
.
begin
(),
logic_physic_list
.
end
(),
[](
const
std
::
pair
<
uint32_t
,
uint32_t
>
&
logic_phyic_pair1
,
const
std
::
pair
<
uint32_t
,
uint32_t
>
&
logic_phyic_pair2
)
{
return
logic_phyic_pair1
.
second
<
logic_phyic_pair2
.
second
;
});
return
logic_physic_list
;
}
std
::
unordered_map
<
int
,
std
::
set
<
uint32_t
>>
StreamReuse
::
GetLogicPhysicsStreamMap
()
{
auto
logic_physic_list
=
SortLogicPhysicMapToList
();
std
::
unordered_map
<
int
,
std
::
set
<
uint32_t
>>
logic_phyics_map
;
for
(
size_t
i
=
0
;
i
<
logic_physic_list
.
size
()
-
IntToSize
(
1
);
++
i
)
{
auto
curr_logic_physic
=
logic_physic_list
.
at
(
i
);
auto
next_logic_physic
=
logic_physic_list
.
at
(
i
+
1
);
for
(
auto
j
=
curr_logic_physic
.
second
;
j
<
next_logic_physic
.
second
;
++
j
)
{
(
void
)
logic_phyics_map
[
curr_logic_physic
.
first
].
insert
(
j
);
}
}
// sort the logic independ map by value
std
::
map
<
uint32_t
,
uint32_t
>
temp_map
;
for
(
const
auto
&
logic_independ
:
logic_independent_map_
)
{
(
void
)
temp_map
.
insert
(
std
::
make_pair
(
logic_independ
.
second
,
logic_independ
.
first
));
}
auto
first_independent_stream_id
=
(
*
temp_map
.
begin
()).
first
;
auto
last_physic_logic_stream_id
=
(
*
logic_physic_list
.
rbegin
()).
second
;
for
(
auto
i
=
last_physic_logic_stream_id
;
i
<
first_independent_stream_id
;
++
i
)
{
(
void
)
logic_phyics_map
[(
*
logic_physic_list
.
rbegin
()).
first
].
insert
(
i
);
}
return
logic_phyics_map
;
}
void
StreamReuse
::
InitReusableStreamMap
()
{
// logic_phyics_map, key, logic_stream_id; value, physic_strema_ids included in that logic stream
auto
logic_phyics_map
=
GetLogicPhysicsStreamMap
();
// parallel_streams_map: key, current_stream_id; value, streams parallel to current stream
for
(
const
auto
&
logic_to_phyics
:
logic_phyics_map
)
{
auto
logic_stream_id
=
logic_to_phyics
.
first
;
auto
iter_inde
=
logic_independent_map_
.
find
(
logic_stream_id
);
if
(
iter_inde
!=
logic_independent_map_
.
end
())
{
// exist independent steam parallel to these logic streams
auto
independent_stream_id
=
iter_inde
->
second
;
auto
physics_stream_id
=
logic_to_phyics
.
second
;
for
(
const
auto
&
physic
:
physics_stream_id
)
{
(
void
)
parallel_streams_map_
[
physic
].
insert
(
independent_stream_id
);
}
}
}
for
(
const
auto
&
logic_to_independent
:
logic_independent_map_
)
{
auto
logic_stream_id
=
logic_to_independent
.
first
;
auto
independent_stream_id
=
logic_to_independent
.
second
;
auto
iter_physics
=
logic_phyics_map
.
find
(
logic_stream_id
);
if
(
iter_physics
!=
logic_phyics_map
.
end
())
{
// exist logic steam parallel to these independent streams, default
auto
physics_set
=
iter_physics
->
second
;
for
(
const
auto
&
physic
:
physics_set
)
{
(
void
)
parallel_streams_map_
[
independent_stream_id
].
insert
(
physic
);
}
}
}
}
}
// namespace memreuse
}
// namespace mindspore
mindspore/ccsrc/pre_activate/mem_reuse/stream_reuse.h
已删除
100644 → 0
浏览文件 @
e7936ded
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_STREAM_REUSE_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_STREAM_REUSE_H_
#include <cmath>
#include <map>
#include <set>
#include <list>
#include <memory>
#include <vector>
#include <numeric>
#include <algorithm>
#include <utility>
#include <fstream>
#include <unordered_set>
#include <unordered_map>
#include "session/anf_runtime_algorithm.h"
#include "pre_activate/mem_reuse/kernel_refcount.h"
#ifdef ENABLE_D
#include "device/ascend/ascend_stream_assign.h"
#endif
namespace
mindspore
{
namespace
memreuse
{
class
StreamReuse
{
public:
StreamReuse
()
=
default
;
~
StreamReuse
()
=
default
;
void
SetStreamReuseResource
();
void
InitReusableStreamMap
();
std
::
vector
<
std
::
pair
<
uint32_t
,
uint32_t
>>
SortLogicPhysicMapToList
();
std
::
unordered_map
<
int
,
std
::
set
<
uint32_t
>>
GetLogicPhysicsStreamMap
();
void
set_logic_physic_map
(
const
std
::
unordered_map
<
uint32_t
,
uint32_t
>
&
logic_physic_map
)
{
logic_physic_map_
=
logic_physic_map
;
}
void
set_logic_independent_map
(
const
std
::
unordered_map
<
uint32_t
,
uint32_t
>
&
logic_independent_map
)
{
logic_independent_map_
=
logic_independent_map
;
}
std
::
unordered_map
<
uint32_t
,
std
::
unordered_set
<
uint32_t
>>
parallel_streams_map
()
{
return
parallel_streams_map_
;
}
private:
std
::
unordered_map
<
uint32_t
,
std
::
unordered_set
<
uint32_t
>>
parallel_streams_map_
;
std
::
unordered_map
<
uint32_t
,
uint32_t
>
logic_physic_map_
;
std
::
unordered_map
<
uint32_t
,
uint32_t
>
logic_independent_map_
;
};
}
// namespace memreuse
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_STREAM_REUSE_H_
tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_allocator_test.cc
浏览文件 @
5e2f440e
...
...
@@ -117,16 +117,13 @@ TEST_F(TestMemReuseAllocator, mem_reuse_allocator) {
MS_LOG
(
INFO
)
<<
"run mem reuse success"
;
size_t
total_allocated_size
=
best_fit_mem_reuse
->
GetAllocatedSize
();
ASSERT_NE
(
total_allocated_size
,
0
);
auto
is_reusable_stream
=
best_fit_mem_reuse
->
IsReusableStream
(
1
,
3
);
ASSERT_EQ
(
is_reusable_stream
,
true
);
}
TEST_F
(
TestMemReuseAllocator
,
mem_reuse_allocator_add_membuf
)
{
auto
best_fit_mem_reuse
=
std
::
make_shared
<
BestFitMemReuse
>
();
auto
tensor_desc
=
std
::
make_shared
<
KernelRefCount
>
();
tensor_desc
->
SetKernelRefCountInfo
(
0
,
1024
,
kDynamicRefCount
);
best_fit_mem_reuse
->
AddNewMembufPtr
(
tensor_desc
.
get
(),
kDy
Fac
);
best_fit_mem_reuse
->
AddNewMembufPtr
(
tensor_desc
.
get
(),
kDy
namicMem
);
auto
allocated_size
=
best_fit_mem_reuse
->
GetAllocatedSize
();
ASSERT_EQ
(
allocated_size
,
1024
);
}
...
...
@@ -135,7 +132,7 @@ TEST_F(TestMemReuseAllocator, mem_reuse_allocator_split_membuf) {
auto
best_fit_mem_reuse
=
std
::
make_shared
<
BestFitMemReuse
>
();
auto
tensor_0
=
std
::
make_shared
<
KernelRefCount
>
();
tensor_0
->
SetKernelRefCountInfo
(
0
,
2048
,
kDynamicRefCount
);
best_fit_mem_reuse
->
AddNewMembufPtr
(
tensor_0
.
get
(),
kDy
Fac
);
best_fit_mem_reuse
->
AddNewMembufPtr
(
tensor_0
.
get
(),
kDy
namicMem
);
auto
tensor_1
=
std
::
make_shared
<
KernelRefCount
>
();
tensor_1
->
SetKernelRefCountInfo
(
1
,
800
,
kDynamicRefCount
);
...
...
tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc
浏览文件 @
5e2f440e
...
...
@@ -228,12 +228,6 @@ TEST_F(TestMemReuseWithPy, KernelRef) {
ASSERT_EQ
(
kernel_def_ptr
->
dirty
,
false
);
MembufPtr
membuf_ptr
=
std
::
make_shared
<
Membuf
>
();
ASSERT_NE
(
membuf_ptr
,
nullptr
);
MembufPtr
membuf_ptr_x
=
std
::
make_shared
<
Membuf
>
(
0
,
memreuse
::
kUnused
,
512
,
128
,
2
);
ASSERT_EQ
(
membuf_ptr_x
->
status_
,
memreuse
::
kUnused
);
ASSERT_EQ
(
membuf_ptr_x
->
size_
,
512
);
ASSERT_EQ
(
membuf_ptr_x
->
offset_
,
128
);
ASSERT_EQ
(
membuf_ptr_x
->
index_
,
2
);
ASSERT_EQ
(
membuf_ptr_x
->
stream_id_
,
0
);
}
TEST_F
(
TestMemReuseWithPy
,
ReuseAssignDynamicMemory
)
{
...
...
tests/ut/cpp/pre_activate/mem_reuse/stream_reuse_test.cc
已删除
100644 → 0
浏览文件 @
e7936ded
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include <vector>
#include <string>
#include "operator/ops.h"
#include "pre_activate/mem_reuse/stream_reuse.h"
#include "common/common_test.h"
#include "common/py_func_graph_fetcher.h"
using
mindspore
::
memreuse
::
StreamReuse
;
namespace
mindspore
{
class
TestStreamMemReuse
:
public
UT
::
Common
{
public:
TestStreamMemReuse
()
:
getPyFun_
(
"gtest_input.mem_reuse.TestMemReuseAllocator"
,
true
)
{}
void
SetUp
()
{}
public:
UT
::
PyFuncGraphFetcher
getPyFun_
;
};
TEST_F
(
TestStreamMemReuse
,
init_reusable_stream_map_test
)
{
std
::
unordered_map
<
uint32_t
,
uint32_t
>
logic_physic_map
{{
1
,
0
},
{
2
,
8
},
{
3
,
3
}};
std
::
unordered_map
<
uint32_t
,
uint32_t
>
logic_independent_map
{{
3
,
10
},
{
2
,
11
}};
auto
stream_reuse
=
std
::
make_shared
<
StreamReuse
>
();
stream_reuse
->
set_logic_physic_map
(
logic_physic_map
);
stream_reuse
->
set_logic_independent_map
(
logic_independent_map
);
auto
logic_phyics_map
=
stream_reuse
->
GetLogicPhysicsStreamMap
();
for
(
const
auto
&
logic_physics
:
logic_phyics_map
)
{
MS_LOG
(
INFO
)
<<
"[logic_id: "
<<
logic_physics
.
first
<<
"]"
;
for
(
const
auto
&
physic
:
logic_physics
.
second
)
{
MS_LOG
(
INFO
)
<<
"physic: "
<<
physic
;
}
}
MS_LOG
(
INFO
)
<<
"===========UT logic_physic_map size: "
<<
logic_physic_map
.
size
()
<<
"========"
;
ASSERT_EQ
(
logic_physic_map
.
size
(),
3
);
stream_reuse
->
InitReusableStreamMap
();
auto
parallel_streams_map
=
stream_reuse
->
parallel_streams_map
();
for
(
const
auto
&
parallel_streams
:
parallel_streams_map
)
{
MS_LOG
(
INFO
)
<<
"[stream id: "
<<
parallel_streams
.
first
<<
"]"
;
for
(
const
auto
&
stream
:
parallel_streams
.
second
)
{
MS_LOG
(
INFO
)
<<
"parallel stream id: "
<<
stream
;
}
}
ASSERT_EQ
(
parallel_streams_map
[
7
].
size
(),
1
);
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录