Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
c0070d3d
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c0070d3d
编写于
9月 07, 2020
作者:
Z
Zhang Qinghua
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use the unified Execute function to run Graph or Single Op Graph.
上级
77dd91a6
变更
12
展开全部
隐藏空白更改
内联
并排
Showing
12 changed file
with
1291 addition
and
1298 deletion
+1291
-1298
mindspore/ccsrc/backend/session/ascend_session.cc
mindspore/ccsrc/backend/session/ascend_session.cc
+14
-16
mindspore/ccsrc/backend/session/ascend_session.h
mindspore/ccsrc/backend/session/ascend_session.h
+3
-2
mindspore/ccsrc/backend/session/cpu_session.cc
mindspore/ccsrc/backend/session/cpu_session.cc
+1
-1
mindspore/ccsrc/backend/session/gpu_session.cc
mindspore/ccsrc/backend/session/gpu_session.cc
+2
-2
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
...pore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
+2
-8
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
...spore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
+2
-2
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
+1
-1
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
+1
-1
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+1150
-1150
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
+112
-112
mindspore/ccsrc/runtime/device/kernel_runtime.cc
mindspore/ccsrc/runtime/device/kernel_runtime.cc
+1
-1
mindspore/ccsrc/runtime/device/kernel_runtime.h
mindspore/ccsrc/runtime/device/kernel_runtime.h
+2
-2
未找到文件。
mindspore/ccsrc/backend/session/ascend_session.cc
浏览文件 @
c0070d3d
...
...
@@ -318,7 +318,7 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
#endif
{
// run task on device
Execute
(
kernel_graph
);
Execute
(
kernel_graph
,
true
);
}
// summary
Summary
(
kernel_graph
.
get
());
...
...
@@ -348,17 +348,6 @@ void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelG
MS_LOG
(
INFO
)
<<
"Finish"
;
}
void
AscendSession
::
RunOpExecTask
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
MS_LOG
(
INFO
)
<<
"Start!"
;
auto
runtime_instance
=
device
::
KernelRuntimeManager
::
Instance
().
GetKernelRuntime
(
kAscendDevice
,
device_id_
);
MS_EXCEPTION_IF_NULL
(
runtime_instance
);
bool
ret_ok
=
runtime_instance
->
LaunchKernel
(
kernel_graph
.
get
());
if
(
!
ret_ok
)
{
MS_LOG
(
EXCEPTION
)
<<
"Run task error!"
;
}
MS_LOG
(
INFO
)
<<
"Finish!"
;
}
bool
AscendSession
::
GraphCacheExist
(
const
GraphInfo
&
graph_info
)
const
{
return
run_op_graphs_
.
find
(
graph_info
)
!=
run_op_graphs_
.
end
();
}
...
...
@@ -398,7 +387,7 @@ void AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_i
// load input data to device
LoadInputData
(
graph
,
input_tensors
);
// run op
RunOpExecTask
(
graph
);
Execute
(
graph
,
false
);
// get output
if
(
op_run_info
.
value
!=
nullptr
)
{
std
::
vector
<
tensor
::
TensorPtr
>
pre_output_tensors
;
...
...
@@ -552,21 +541,30 @@ void AscendSession::RunOpMemoryClear(const KernelGraph *kernel_graph) const {
void
AscendSession
::
Load
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
MS_LOG
(
INFO
)
<<
"Start!"
;
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
bool
is_task_sink
=
context_ptr
->
get_param
<
bool
>
(
MS_CTX_ENABLE_TASK_SINK
);
(
void
)
device
::
KernelAdjust
::
GetInstance
().
StepLoadCtrlInputs
(
kernel_graph
);
auto
runtime_instance
=
device
::
KernelRuntimeManager
::
Instance
().
GetKernelRuntime
(
kAscendDevice
,
device_id_
);
MS_EXCEPTION_IF_NULL
(
runtime_instance
);
bool
ret_ok
=
runtime_instance
->
Load
(
kernel_graph
.
get
());
bool
ret_ok
=
runtime_instance
->
Load
(
kernel_graph
.
get
()
,
is_task_sink
);
if
(
!
ret_ok
)
{
MS_LOG
(
EXCEPTION
)
<<
"Load task error!"
;
}
MS_LOG
(
INFO
)
<<
"Finish!"
;
}
void
AscendSession
::
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
{
void
AscendSession
::
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
,
bool
is_task
)
const
{
MS_LOG
(
INFO
)
<<
"Start!"
;
bool
is_task_sink
=
false
;
if
(
is_task
)
{
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
is_task_sink
=
context_ptr
->
get_param
<
bool
>
(
MS_CTX_ENABLE_TASK_SINK
);
}
auto
runtime_instance
=
device
::
KernelRuntimeManager
::
Instance
().
GetKernelRuntime
(
kAscendDevice
,
device_id_
);
MS_EXCEPTION_IF_NULL
(
runtime_instance
);
bool
ret_ok
=
runtime_instance
->
Run
(
kernel_graph
.
get
());
bool
ret_ok
=
runtime_instance
->
Run
(
kernel_graph
.
get
()
,
is_task_sink
);
if
(
!
ret_ok
)
{
MS_LOG
(
EXCEPTION
)
<<
"run task error!"
;
}
...
...
mindspore/ccsrc/backend/session/ascend_session.h
浏览文件 @
c0070d3d
...
...
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_SESSION_ASCEND_SESSION_H
#define MINDSPORE_CCSRC_BACKEND_SESSION_ASCEND_SESSION_H
#include <unordered_map>
#include <string>
#include <memory>
...
...
@@ -82,13 +84,12 @@ class AscendSession : public SessionBasic {
KernelGraph
*
kernel_graph
)
const
;
void
RunOpMemoryClear
(
const
KernelGraph
*
kernel_graph
)
const
;
void
Load
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
void
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
void
Execute
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
,
bool
is_task
)
const
;
void
Dump
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
void
DumpAllGraphs
(
const
std
::
vector
<
KernelGraphPtr
>
&
all_graphs
);
void
LoadTensor
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
// below functions are used for run op
void
RunOpHardwareOptimize
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
)
const
;
void
RunOpExecTask
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
)
const
;
static
void
BackendOptimization
(
const
std
::
vector
<
KernelGraphPtr
>
&
all_graphs
);
static
void
LinkChildGraphs
(
NotNull
<
KernelGraphPtr
>
graph
);
...
...
mindspore/ccsrc/backend/session/cpu_session.cc
浏览文件 @
c0070d3d
...
...
@@ -118,7 +118,7 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
debugger_
->
PreExecute
(
kernel_graph
);
}
#endif
bool
ret
=
runtime_
.
Run
(
kernel_graph
.
get
());
bool
ret
=
runtime_
.
Run
(
kernel_graph
.
get
()
,
false
);
if
(
!
ret
)
{
MS_LOG
(
EXCEPTION
)
<<
"Run graph failed"
;
}
...
...
mindspore/ccsrc/backend/session/gpu_session.cc
浏览文件 @
c0070d3d
...
...
@@ -191,9 +191,9 @@ void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const
auto
runtime_instance
=
device
::
KernelRuntimeManager
::
Instance
().
GetSingleKernelRuntime
(
kGPUDevice
,
device_id_
);
MS_EXCEPTION_IF_NULL
(
runtime_instance
);
#ifdef ENABLE_DEBUGGER
if
(
!
runtime_instance
->
Run
(
kernel_graph
.
get
(),
debugger_
.
get
()))
{
if
(
!
runtime_instance
->
Run
(
kernel_graph
.
get
(),
false
,
debugger_
.
get
()))
{
#else
if
(
!
runtime_instance
->
Run
(
kernel_graph
.
get
()))
{
if
(
!
runtime_instance
->
Run
(
kernel_graph
.
get
()
,
false
))
{
#endif
MS_LOG
(
EXCEPTION
)
<<
"GPU execute graph failed!"
;
}
...
...
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
浏览文件 @
c0070d3d
...
...
@@ -454,10 +454,7 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
return
std
::
make_shared
<
AscendDeviceAddress
>
(
device_ptr
,
device_size
,
format
,
type_id
);
}
bool
AscendKernelRuntime
::
Load
(
session
::
KernelGraph
*
graph
)
{
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
bool
is_task_sink
=
context_ptr
->
get_param
<
bool
>
(
MS_CTX_ENABLE_TASK_SINK
);
bool
AscendKernelRuntime
::
Load
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
)
{
if
(
!
is_task_sink
)
{
return
true
;
}
...
...
@@ -609,17 +606,14 @@ void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
}
}
bool
AscendKernelRuntime
::
Run
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
)
{
bool
AscendKernelRuntime
::
Run
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
,
Debugger
*
debugger
)
{
bool
ret
=
false
;
auto
context_ptr
=
MsContext
::
GetInstance
();
MS_EXCEPTION_IF_NULL
(
context_ptr
);
#if defined(_WIN32) || defined(_WIN64)
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
#else
struct
timeval
start_time
,
end_time
;
(
void
)
gettimeofday
(
&
start_time
,
nullptr
);
#endif
bool
is_task_sink
=
context_ptr
->
get_param
<
bool
>
(
MS_CTX_ENABLE_TASK_SINK
);
if
(
is_task_sink
)
{
ret
=
RunTask
(
graph
);
}
else
{
...
...
mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
浏览文件 @
c0070d3d
...
...
@@ -44,8 +44,8 @@ class AscendKernelRuntime : public KernelRuntime {
bool
GenTask
(
const
session
::
KernelGraph
*
graph
);
bool
LoadTask
(
const
session
::
KernelGraph
*
graph
);
bool
RunTask
(
const
session
::
KernelGraph
*
graph
);
bool
Load
(
session
::
KernelGraph
*
graph
)
override
;
bool
Run
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
override
;
bool
Load
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
)
override
;
bool
Run
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
,
Debugger
*
debugger
=
nullptr
)
override
;
void
ClearGraphRuntimeResource
(
uint32_t
graph_id
,
const
std
::
vector
<
AnfNodePtr
>
&
inputs
,
const
std
::
unordered_set
<
ValueNodePtr
>
&
value_nodes
,
const
std
::
vector
<
CNodePtr
>
&
execution_order
)
override
;
...
...
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
浏览文件 @
c0070d3d
...
...
@@ -287,7 +287,7 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput
resource_manager_
.
DecreaseSummaryRefCount
(
summary_outputs
);
}
bool
CPUKernelRuntime
::
Run
(
session
::
KernelGraph
*
kernel_graph
,
Debugger
*
debugger
)
{
bool
CPUKernelRuntime
::
Run
(
session
::
KernelGraph
*
kernel_graph
,
bool
is_task_sink
,
Debugger
*
debugger
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
resource_manager_
.
IncreaseAddressRefCount
(
kernel_graph
);
...
...
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
浏览文件 @
c0070d3d
...
...
@@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime {
~
CPUKernelRuntime
()
override
=
default
;
bool
Init
()
override
{
return
true
;
}
bool
Run
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
override
;
bool
Run
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
,
Debugger
*
debugger
=
nullptr
)
override
;
void
AssignKernelAddress
(
session
::
KernelGraph
*
kernel_graph
);
void
BindInputOutput
(
session
::
KernelGraph
*
kernel_graph
,
const
std
::
vector
<
tensor
::
TensorPtr
>
&
inputs
,
VectorRef
*
outputs
);
...
...
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
浏览文件 @
c0070d3d
此差异已折叠。
点击以展开。
mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
浏览文件 @
c0070d3d
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
#include <string>
#include <memory>
#include <vector>
#include <set>
#include <utility>
#include <unordered_map>
#include <unordered_set>
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
namespace
mindspore
{
namespace
device
{
namespace
gpu
{
using
mindspore
::
device
::
memswap
::
MemSwapManagerPtr
;
class
GPUKernelRuntime
:
public
KernelRuntime
{
public:
GPUKernelRuntime
()
=
default
;
~
GPUKernelRuntime
()
override
=
default
;
bool
Init
()
override
;
void
ReleaseDeviceRes
()
override
;
void
ClearGraphRuntimeResource
(
uint32_t
graph_id
,
const
std
::
vector
<
AnfNodePtr
>
&
inputs
,
const
std
::
unordered_set
<
ValueNodePtr
>
&
value_nodes
,
const
std
::
vector
<
CNodePtr
>
&
execution_order
)
override
;
void
AssignMemory
(
session
::
KernelGraph
*
graph
)
override
;
bool
Run
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
override
;
#ifdef ENABLE_DUMP_E2E
bool
DumpData
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
override
;
#endif
protected:
DeviceAddressPtr
CreateDeviceAddress
(
void
*
device_ptr
,
size_t
device_size
,
const
string
&
format
,
TypeId
type_id
)
override
;
bool
SyncStream
()
override
;
private:
GPUKernelRuntime
(
const
GPUKernelRuntime
&
);
GPUKernelRuntime
&
operator
=
(
const
GPUKernelRuntime
&
);
bool
InitDevice
();
bool
device_init_
{
false
};
// The related functions and members for using dynamic memory pool.
void
InitKernelRefCount
(
const
session
::
KernelGraph
*
graph
);
void
InitKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
void
InitKernelWorkspaceAddress
(
const
session
::
KernelGraph
*
graph
);
void
InitMemorySwapInfo
(
const
session
::
KernelGraph
*
graph
);
void
SaveGraphOutputNode
(
const
session
::
KernelGraph
*
graph
);
bool
IsGraphOutput
(
const
session
::
KernelGraph
*
graph
,
const
mindspore
::
AnfNodePtr
&
kernel
)
const
;
void
ClearKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
void
ClearKernelWorkspaceAddress
(
const
session
::
KernelGraph
*
graph
);
void
ClearKernelOldOutputAndWorkspace
(
const
session
::
KernelGraph
*
graph
);
bool
RunOneStep
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
SearchMemSwapScheme
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
RefineMemSwapScheme
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
LaunchKernelDynamic
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
,
bool
mock
=
false
,
bool
profiling
=
false
);
void
LaunchKernelWithTimeProfiling
(
const
AnfNodePtr
&
kernel
,
const
AddressPtrList
&
inputs
,
const
AddressPtrList
&
workspace
,
const
AddressPtrList
&
outputs
);
bool
AttemptMallocMem
(
const
DeviceAddressPtr
&
device_address
,
size_t
size
,
bool
mock
);
bool
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
AddressPtrList
*
kernel_workspaces
,
AddressPtrList
*
kernel_outputs
,
bool
mock
);
bool
AllocKernelInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
bool
mock
);
bool
AllocKernelOutputDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_outputs
,
bool
mock
);
bool
AllocKernelWorkspaceDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_workspaces
,
bool
mock
);
void
AllocCommunicationOpDynamicRes
(
const
session
::
KernelGraph
*
graph
);
void
AllocCommunicationOpInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
void
AllocCommunicationOpOutputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
void
AllocCommunicationOpMemory
(
bool
is_need_alloc_memory
,
bool
is_need_free_memory
,
const
DeviceAddressPtrList
addr_list
,
size_t
total_size
,
std
::
vector
<
size_t
>
size_list
);
void
FreeKernelDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
bool
UpdateMemorySwapTask
(
const
AnfNodePtr
&
kernel
,
bool
mock
,
bool
profiling
);
bool
AddMemorySwapTask
(
const
AnfNodePtr
&
kernel
,
bool
mock
,
bool
profiling
);
void
UpdateHostSwapInQueue
(
const
DeviceAddressPtr
device_address
,
bool
mock
);
void
UpdateHostSwapOutQueue
(
bool
mock
);
void
ClearSwapInfo
(
bool
mock
);
std
::
unordered_map
<
uint32_t
,
MemReuseUtilPtr
>
mem_reuse_util_map_
;
std
::
unordered_map
<
uint32_t
,
MemSwapManagerPtr
>
mem_swap_map_
;
std
::
unordered_map
<
uint32_t
,
bool
>
is_first_step_map_
;
std
::
unordered_map
<
uint32_t
,
std
::
set
<
AnfNodePtr
>>
graph_output_map_
;
MemReuseUtilPtr
mem_reuse_util_
{
nullptr
};
MemSwapManagerPtr
mem_swap_manager_
{
nullptr
};
};
MS_REG_KERNEL_RUNTIME
(
kGPUDevice
,
GPUKernelRuntime
);
}
// namespace gpu
}
// namespace device
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
#include <string>
#include <memory>
#include <vector>
#include <set>
#include <utility>
#include <unordered_map>
#include <unordered_set>
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "backend/optimizer/mem_reuse/mem_swap_manager.h"
namespace
mindspore
{
namespace
device
{
namespace
gpu
{
using
mindspore
::
device
::
memswap
::
MemSwapManagerPtr
;
class
GPUKernelRuntime
:
public
KernelRuntime
{
public:
GPUKernelRuntime
()
=
default
;
~
GPUKernelRuntime
()
override
=
default
;
bool
Init
()
override
;
void
ReleaseDeviceRes
()
override
;
void
ClearGraphRuntimeResource
(
uint32_t
graph_id
,
const
std
::
vector
<
AnfNodePtr
>
&
inputs
,
const
std
::
unordered_set
<
ValueNodePtr
>
&
value_nodes
,
const
std
::
vector
<
CNodePtr
>
&
execution_order
)
override
;
void
AssignMemory
(
session
::
KernelGraph
*
graph
)
override
;
bool
Run
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
,
Debugger
*
debugger
=
nullptr
)
override
;
#ifdef ENABLE_DUMP_E2E
bool
DumpData
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
override
;
#endif
protected:
DeviceAddressPtr
CreateDeviceAddress
(
void
*
device_ptr
,
size_t
device_size
,
const
string
&
format
,
TypeId
type_id
)
override
;
bool
SyncStream
()
override
;
private:
GPUKernelRuntime
(
const
GPUKernelRuntime
&
);
GPUKernelRuntime
&
operator
=
(
const
GPUKernelRuntime
&
);
bool
InitDevice
();
bool
device_init_
{
false
};
// The related functions and members for using dynamic memory pool.
void
InitKernelRefCount
(
const
session
::
KernelGraph
*
graph
);
void
InitKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
void
InitKernelWorkspaceAddress
(
const
session
::
KernelGraph
*
graph
);
void
InitMemorySwapInfo
(
const
session
::
KernelGraph
*
graph
);
void
SaveGraphOutputNode
(
const
session
::
KernelGraph
*
graph
);
bool
IsGraphOutput
(
const
session
::
KernelGraph
*
graph
,
const
mindspore
::
AnfNodePtr
&
kernel
)
const
;
void
ClearKernelOutputAddress
(
const
session
::
KernelGraph
*
graph
);
void
ClearKernelWorkspaceAddress
(
const
session
::
KernelGraph
*
graph
);
void
ClearKernelOldOutputAndWorkspace
(
const
session
::
KernelGraph
*
graph
);
bool
RunOneStep
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
SearchMemSwapScheme
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
RefineMemSwapScheme
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
bool
LaunchKernelDynamic
(
const
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
,
bool
mock
=
false
,
bool
profiling
=
false
);
void
LaunchKernelWithTimeProfiling
(
const
AnfNodePtr
&
kernel
,
const
AddressPtrList
&
inputs
,
const
AddressPtrList
&
workspace
,
const
AddressPtrList
&
outputs
);
bool
AttemptMallocMem
(
const
DeviceAddressPtr
&
device_address
,
size_t
size
,
bool
mock
);
bool
AllocKernelDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
AddressPtrList
*
kernel_workspaces
,
AddressPtrList
*
kernel_outputs
,
bool
mock
);
bool
AllocKernelInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_inputs
,
bool
mock
);
bool
AllocKernelOutputDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_outputs
,
bool
mock
);
bool
AllocKernelWorkspaceDynamicRes
(
const
mindspore
::
kernel
::
KernelMod
&
kernel_mod
,
const
mindspore
::
AnfNodePtr
&
kernel
,
AddressPtrList
*
kernel_workspaces
,
bool
mock
);
void
AllocCommunicationOpDynamicRes
(
const
session
::
KernelGraph
*
graph
);
void
AllocCommunicationOpInputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
void
AllocCommunicationOpOutputDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
void
AllocCommunicationOpMemory
(
bool
is_need_alloc_memory
,
bool
is_need_free_memory
,
const
DeviceAddressPtrList
addr_list
,
size_t
total_size
,
std
::
vector
<
size_t
>
size_list
);
void
FreeKernelDynamicRes
(
const
mindspore
::
AnfNodePtr
&
kernel
);
bool
UpdateMemorySwapTask
(
const
AnfNodePtr
&
kernel
,
bool
mock
,
bool
profiling
);
bool
AddMemorySwapTask
(
const
AnfNodePtr
&
kernel
,
bool
mock
,
bool
profiling
);
void
UpdateHostSwapInQueue
(
const
DeviceAddressPtr
device_address
,
bool
mock
);
void
UpdateHostSwapOutQueue
(
bool
mock
);
void
ClearSwapInfo
(
bool
mock
);
std
::
unordered_map
<
uint32_t
,
MemReuseUtilPtr
>
mem_reuse_util_map_
;
std
::
unordered_map
<
uint32_t
,
MemSwapManagerPtr
>
mem_swap_map_
;
std
::
unordered_map
<
uint32_t
,
bool
>
is_first_step_map_
;
std
::
unordered_map
<
uint32_t
,
std
::
set
<
AnfNodePtr
>>
graph_output_map_
;
MemReuseUtilPtr
mem_reuse_util_
{
nullptr
};
MemSwapManagerPtr
mem_swap_manager_
{
nullptr
};
};
MS_REG_KERNEL_RUNTIME
(
kGPUDevice
,
GPUKernelRuntime
);
}
// namespace gpu
}
// namespace device
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_RUNTIME_H_
mindspore/ccsrc/runtime/device/kernel_runtime.cc
浏览文件 @
c0070d3d
...
...
@@ -40,7 +40,7 @@ KernelRuntime::~KernelRuntime() {
#endif
}
bool
KernelRuntime
::
Load
(
session
::
KernelGraph
*
graph
)
{
return
true
;
}
bool
KernelRuntime
::
Load
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
)
{
return
true
;
}
bool
KernelRuntime
::
DumpData
(
mindspore
::
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
)
{
if
(
graph
!=
nullptr
)
{
...
...
mindspore/ccsrc/runtime/device/kernel_runtime.h
浏览文件 @
c0070d3d
...
...
@@ -59,8 +59,8 @@ class KernelRuntime {
bool
DumpDataEnabled
();
bool
DumpDataEnabledIteration
();
virtual
bool
DumpData
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
);
virtual
bool
Load
(
session
::
KernelGraph
*
graph
);
virtual
bool
Run
(
session
::
KernelGraph
*
graph
,
Debugger
*
debugger
=
nullptr
)
=
0
;
virtual
bool
Load
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
);
virtual
bool
Run
(
session
::
KernelGraph
*
graph
,
bool
is_task_sink
,
Debugger
*
debugger
=
nullptr
)
=
0
;
bool
LaunchKernel
(
const
session
::
KernelGraph
*
graph
);
bool
LaunchTaskBasedOnSingleKernel
(
kernel
::
KernelModPtr
kernel_mod_ptr
,
const
AddressPtrList
&
kernel_inputs
,
const
AddressPtrList
&
kernel_outputs
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录