Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
d4b52ac5
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d4b52ac5
编写于
7月 29, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 29, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3489 use kernelruntime::mem_manager to reduce rtMalloc and rtFree time in trans data format
Merge pull request !3489 from lvchangquan/master
上级
fcdad59c
fdbe4c19
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
37 addition
and
33 deletion
+37
-33
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
...pore/ccsrc/runtime/device/ascend/ascend_device_address.cc
+20
-28
mindspore/ccsrc/runtime/device/kernel_runtime.cc
mindspore/ccsrc/runtime/device/kernel_runtime.cc
+13
-3
mindspore/ccsrc/runtime/device/kernel_runtime.h
mindspore/ccsrc/runtime/device/kernel_runtime.h
+4
-2
未找到文件。
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
浏览文件 @
d4b52ac5
...
@@ -153,6 +153,16 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s
...
@@ -153,6 +153,16 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s
return
true
;
return
true
;
}
}
DeviceAddressPtr
AssignLaunchMemory
(
size_t
size
,
const
std
::
string
&
format
,
TypeId
type
)
{
auto
ms_context
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
ms_context
);
auto
device_id
=
ms_context
->
device_id
();
auto
runtime_instance
=
device
::
KernelRuntimeManager
::
Instance
().
GetKernelRuntime
(
kAscendDevice
,
device_id
);
MS_EXCEPTION_IF_NULL
(
runtime_instance
);
auto
address_ptr
=
runtime_instance
->
AssignSingleOpLaunchMemory
(
size
,
format
,
type
);
return
address_ptr
;
}
size_t
GetCommonAlignSize
(
size_t
input_size
)
{
size_t
GetCommonAlignSize
(
size_t
input_size
)
{
return
(
input_size
+
kMemAlignSize
+
31
)
/
kMemAlignSize
*
kMemAlignSize
;
return
(
input_size
+
kMemAlignSize
+
31
)
/
kMemAlignSize
*
kMemAlignSize
;
}
}
...
@@ -325,18 +335,15 @@ void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, v
...
@@ -325,18 +335,15 @@ void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, v
AddressPtrList
kernel_inputs
=
{
input_address
};
AddressPtrList
kernel_inputs
=
{
input_address
};
AddressPtrList
kernel_outputs
=
{
output_address
};
AddressPtrList
kernel_outputs
=
{
output_address
};
AddressPtrList
kernel_workspaces
;
AddressPtrList
kernel_workspaces
;
std
::
vector
<
void
*>
workspaces_address_ptr
(
workspace_size_list
.
size
(),
nullptr
);
if
(
!
workspace_size_list
.
empty
())
{
if
(
!
workspace_size_list
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
workspace_size_list
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
workspace_size_list
.
size
();
++
i
)
{
auto
workspace_size
=
GetCommonAlignSize
(
workspace_size_list
[
i
]);
auto
workspace_size
=
GetCommonAlignSize
(
workspace_size_list
[
i
]);
auto
ret_malloc
=
rtMalloc
(
&
workspaces_address_ptr
[
i
],
workspace_size
,
RT_MEMORY_HBM
);
auto
workspace_address_ptr
=
AssignLaunchMemory
(
workspace_size
,
""
,
kTypeUnknown
);
if
(
ret_malloc
!=
RT_ERROR_NONE
)
{
MS_EXCEPTION_IF_NULL
(
workspace_address_ptr
);
MS_LOG
(
ERROR
)
<<
"Failed to rtMalloc memory"
;
}
auto
workspace_address
=
std
::
make_shared
<
kernel
::
Address
>
();
auto
workspace_address
=
std
::
make_shared
<
kernel
::
Address
>
();
MS_EXCEPTION_IF_NULL
(
workspace_address
);
MS_EXCEPTION_IF_NULL
(
workspace_address
);
workspace_address
->
addr
=
workspace
s_address_ptr
[
i
]
;
workspace_address
->
addr
=
workspace
_address_ptr
->
GetMutablePtr
()
;
workspace_address
->
size
=
workspace_
size
;
workspace_address
->
size
=
workspace_
address_ptr
->
GetSize
()
;
kernel_workspaces
.
push_back
(
workspace_address
);
kernel_workspaces
.
push_back
(
workspace_address
);
}
}
}
}
...
@@ -350,15 +357,6 @@ void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, v
...
@@ -350,15 +357,6 @@ void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, v
if
(
!
ret
)
{
if
(
!
ret
)
{
MS_LOG
(
ERROR
)
<<
"Launch kernel failed."
;
MS_LOG
(
ERROR
)
<<
"Launch kernel failed."
;
}
}
SyncStream
();
if
(
!
workspace_size_list
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
workspace_size_list
.
size
();
++
i
)
{
auto
ret_free
=
rtFree
(
workspaces_address_ptr
[
i
]);
if
(
ret_free
!=
RT_ERROR_NONE
)
{
MS_LOG
(
ERROR
)
<<
"Failed to rtFree memory"
;
}
}
}
}
}
kernel
::
KernelModPtr
AscendDeviceAddress
::
CompileTransDataAndObtainKernelMod
(
const
nlohmann
::
json
&
kernel_json
)
const
{
kernel
::
KernelModPtr
AscendDeviceAddress
::
CompileTransDataAndObtainKernelMod
(
const
nlohmann
::
json
&
kernel_json
)
const
{
...
@@ -418,19 +416,17 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
...
@@ -418,19 +416,17 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
size
=
device_dtype_size
*
shape_size
;
size
=
device_dtype_size
*
shape_size
;
}
}
size
=
GetCommonAlignSize
(
size
);
size
=
GetCommonAlignSize
(
size
);
void
*
output_address_ptr
=
nullptr
;
auto
output_address
=
AssignLaunchMemory
(
size
,
kOpFormat_NCHW
,
type_id_
);
auto
ret_malloc
=
rtMalloc
(
&
output_address_ptr
,
size
,
RT_MEMORY_HBM
);
MS_EXCEPTION_IF_NULL
(
output_address
);
if
(
ret_malloc
!=
RT_ERROR_NONE
)
{
MS_LOG
(
ERROR
)
<<
"Failed to rtMalloc memory"
;
}
auto
workspace_size_list
=
GetWorkspaceSizeList
(
kernel_json
);
auto
workspace_size_list
=
GetWorkspaceSizeList
(
kernel_json
);
// launch
// launch
LaunchTransData
(
kernel_mod_ptr
,
output_address_ptr
,
size
,
workspace_size_list
);
LaunchTransData
(
kernel_mod_ptr
,
output_address
->
GetMutablePtr
(),
output_address
->
GetSize
(),
workspace_size_list
);
SyncStream
();
if
(
type_id_
==
type
)
{
if
(
type_id_
==
type
)
{
SyncMemory
(
host_ptr
,
output_address
_ptr
,
host_size
,
RT_MEMCPY_DEVICE_TO_HOST
);
SyncMemory
(
host_ptr
,
output_address
->
GetPtr
()
,
host_size
,
RT_MEMCPY_DEVICE_TO_HOST
);
}
else
{
}
else
{
auto
host
=
std
::
vector
<
uint8_t
>
(
size
);
auto
host
=
std
::
vector
<
uint8_t
>
(
size
);
SyncMemory
(
host
.
data
(),
output_address
_ptr
,
size
,
RT_MEMCPY_DEVICE_TO_HOST
);
SyncMemory
(
host
.
data
(),
output_address
->
GetPtr
()
,
size
,
RT_MEMCPY_DEVICE_TO_HOST
);
auto
shape_size
=
trans
::
ShapeSize
(
host_shape
);
auto
shape_size
=
trans
::
ShapeSize
(
host_shape
);
const
trans
::
TypeIdArgs
type_args
{
host
.
data
(),
shape_size
,
type_id_
,
type
,
host_size
};
const
trans
::
TypeIdArgs
type_args
{
host
.
data
(),
shape_size
,
type_id_
,
type
,
host_size
};
sync_ok
=
trans
::
TransDataType
(
type_args
,
host_ptr
);
sync_ok
=
trans
::
TransDataType
(
type_args
,
host_ptr
);
...
@@ -439,10 +435,6 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
...
@@ -439,10 +435,6 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
return
false
;
return
false
;
}
}
}
}
auto
ret_free
=
rtFree
(
output_address_ptr
);
if
(
ret_free
!=
RT_ERROR_NONE
)
{
MS_LOG
(
ERROR
)
<<
"Failed to rtFree memory"
;
}
return
sync_ok
;
return
sync_ok
;
}
}
...
...
mindspore/ccsrc/runtime/device/kernel_runtime.cc
浏览文件 @
d4b52ac5
...
@@ -842,9 +842,10 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
...
@@ -842,9 +842,10 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
MS_LOG
(
INFO
)
<<
"Clear graph:"
<<
graph_id
<<
" runtime resource"
;
MS_LOG
(
INFO
)
<<
"Clear graph:"
<<
graph_id
<<
" runtime resource"
;
}
}
bool
KernelRuntime
::
LaunchTaskBasedOnSingleKernel
(
kernel
::
KernelModPtr
kernel_mod_ptr
,
AddressPtrList
kernel_inputs
,
bool
KernelRuntime
::
LaunchTaskBasedOnSingleKernel
(
kernel
::
KernelModPtr
kernel_mod_ptr
,
AddressPtrList
kernel_outputs
,
const
AddressPtrList
&
kernel_inputs
,
AddressPtrList
kernel_workspaces
)
const
{
const
AddressPtrList
&
kernel_outputs
,
const
AddressPtrList
&
kernel_workspaces
)
const
{
MS_EXCEPTION_IF_NULL
(
kernel_mod_ptr
);
MS_EXCEPTION_IF_NULL
(
kernel_mod_ptr
);
auto
ret
=
kernel_mod_ptr
->
Launch
(
kernel_inputs
,
kernel_workspaces
,
kernel_outputs
,
stream_
);
auto
ret
=
kernel_mod_ptr
->
Launch
(
kernel_inputs
,
kernel_workspaces
,
kernel_outputs
,
stream_
);
if
(
!
ret
)
{
if
(
!
ret
)
{
...
@@ -854,6 +855,15 @@ bool KernelRuntime::LaunchTaskBasedOnSingleKernel(kernel::KernelModPtr kernel_mo
...
@@ -854,6 +855,15 @@ bool KernelRuntime::LaunchTaskBasedOnSingleKernel(kernel::KernelModPtr kernel_mo
return
true
;
return
true
;
}
}
DeviceAddressPtr
KernelRuntime
::
AssignSingleOpLaunchMemory
(
size_t
size
,
const
std
::
string
&
format
,
TypeId
type
)
{
auto
device_address
=
CreateDeviceAddress
(
nullptr
,
size
,
format
,
type
);
MS_EXCEPTION_IF_NULL
(
device_address
);
MS_EXCEPTION_IF_NULL
(
mem_manager_
);
auto
base_ptr
=
mem_manager_
->
MallocMem
(
kDynamicMem
,
size
);
device_address
->
set_ptr
(
base_ptr
);
return
device_address
;
}
#ifdef ENABLE_DUMP_E2E
#ifdef ENABLE_DUMP_E2E
bool
KernelRuntime
::
SetDumpConf
()
{
bool
KernelRuntime
::
SetDumpConf
()
{
dump_conf_ptr_
=
std
::
make_shared
<
Dump
>
();
dump_conf_ptr_
=
std
::
make_shared
<
Dump
>
();
...
...
mindspore/ccsrc/runtime/device/kernel_runtime.h
浏览文件 @
d4b52ac5
...
@@ -65,8 +65,9 @@ class KernelRuntime {
...
@@ -65,8 +65,9 @@ class KernelRuntime {
virtual
bool
RunTask
(
const
session
::
KernelGraph
*
graph
);
virtual
bool
RunTask
(
const
session
::
KernelGraph
*
graph
);
virtual
bool
GenTask
(
const
session
::
KernelGraph
*
graph
);
virtual
bool
GenTask
(
const
session
::
KernelGraph
*
graph
);
bool
LaunchKernel
(
const
session
::
KernelGraph
*
graph
);
bool
LaunchKernel
(
const
session
::
KernelGraph
*
graph
);
bool
LaunchTaskBasedOnSingleKernel
(
kernel
::
KernelModPtr
kernel_mod_ptr
,
AddressPtrList
kernel_inputs
,
bool
LaunchTaskBasedOnSingleKernel
(
kernel
::
KernelModPtr
kernel_mod_ptr
,
const
AddressPtrList
&
kernel_inputs
,
AddressPtrList
kernel_outputs
,
AddressPtrList
kernel_workspaces
)
const
;
const
AddressPtrList
&
kernel_outputs
,
const
AddressPtrList
&
kernel_workspaces
)
const
;
virtual
void
AssignStaticMemoryInput
(
const
session
::
KernelGraph
*
graph
);
virtual
void
AssignStaticMemoryInput
(
const
session
::
KernelGraph
*
graph
);
virtual
void
AssignStaticMemoryValueNode
(
session
::
KernelGraph
*
graph
);
virtual
void
AssignStaticMemoryValueNode
(
session
::
KernelGraph
*
graph
);
virtual
void
ClearGraphRuntimeResource
(
uint32_t
graph_id
);
virtual
void
ClearGraphRuntimeResource
(
uint32_t
graph_id
);
...
@@ -79,6 +80,7 @@ class KernelRuntime {
...
@@ -79,6 +80,7 @@ class KernelRuntime {
// for GPU and D to impl
// for GPU and D to impl
virtual
void
ReleaseDeviceRes
()
{}
virtual
void
ReleaseDeviceRes
()
{}
void
set_device_id
(
uint32_t
device_id
)
{
device_id_
=
device_id
;
}
void
set_device_id
(
uint32_t
device_id
)
{
device_id_
=
device_id
;
}
DeviceAddressPtr
AssignSingleOpLaunchMemory
(
size_t
size
,
const
std
::
string
&
format
,
TypeId
type
);
protected:
protected:
virtual
DeviceAddressPtr
CreateDeviceAddress
(
void
*
device_ptr
,
size_t
device_size
,
const
string
&
format
,
virtual
DeviceAddressPtr
CreateDeviceAddress
(
void
*
device_ptr
,
size_t
device_size
,
const
string
&
format
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录