Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
56848e6a
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 接近 3 年
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
56848e6a
编写于
1月 02, 2018
作者:
W
willzhang4a58
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cpu device
上级
e47e915f
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
116 addition
and
65 deletion
+116
-65
oneflow/core/graph/chain_graph.cpp
oneflow/core/graph/chain_graph.cpp
+1
-1
oneflow/core/graph/chain_node.cpp
oneflow/core/graph/chain_node.cpp
+2
-2
oneflow/core/job/id_manager.cpp
oneflow/core/job/id_manager.cpp
+24
-13
oneflow/core/job/id_manager.h
oneflow/core/job/id_manager.h
+8
-2
oneflow/core/job/job_desc.h
oneflow/core/job/job_desc.h
+3
-1
oneflow/core/job/oneflow.cpp
oneflow/core/job/oneflow.cpp
+2
-4
oneflow/core/job/parallel_desc.cpp
oneflow/core/job/parallel_desc.cpp
+45
-21
oneflow/core/job/parallel_desc.h
oneflow/core/job/parallel_desc.h
+3
-3
oneflow/core/job/resource.proto
oneflow/core/job/resource.proto
+15
-9
oneflow/core/job/runtime.cpp
oneflow/core/job/runtime.cpp
+2
-0
oneflow/core/register/register_desc.cpp
oneflow/core/register/register_desc.cpp
+4
-2
oneflow/core/thread/thread_manager.cpp
oneflow/core/thread/thread_manager.cpp
+7
-7
未找到文件。
oneflow/core/graph/chain_graph.cpp
浏览文件 @
56848e6a
...
...
@@ -343,7 +343,7 @@ void ChainGraph::BuildLossPrintStruct() {
ParallelConf
loss_print_pr_conf
;
loss_print_pr_conf
.
set_policy
(
kDataParallel
);
loss_print_pr_conf
.
add_device_name
(
IDMgr
::
Singleton
()
->
MachineName4MachineId
(
0
)
+
":
0
"
);
IDMgr
::
Singleton
()
->
MachineName4MachineId
(
0
)
+
":
persistence:1
"
);
auto
loss_print_chain
=
NewNode
<
LossPrintChainNode
>
();
loss_print_chain
->
mut_op_vec
()
=
{
loss_print_op
};
loss_print_chain
->
mut_parallel_desc
().
reset
(
...
...
oneflow/core/graph/chain_node.cpp
浏览文件 @
56848e6a
...
...
@@ -128,10 +128,10 @@ void ChainNode::GenSortedCompTaskNodes(CompTaskNodeHandler Handler) const {
int64_t
parallel_idx
=
0
;
int64_t
parallel_num
=
parallel_desc_
->
parallel_num
();
for
(
int64_t
machine_id
:
parallel_desc_
->
sorted_machine_ids
())
{
for
(
int64_t
dev_phy_id
:
parallel_desc_
->
sorted_dev_phy
_ids
(
machine_id
))
{
for
(
int64_t
thrd_id
:
parallel_desc_
->
sorted_thrd
_ids
(
machine_id
))
{
CompTaskNode
*
comp_task_node
=
NewCompTaskNode
();
comp_task_node
->
set_machine_id
(
machine_id
);
comp_task_node
->
set_thrd_id
(
dev_phy
_id
);
comp_task_node
->
set_thrd_id
(
thrd
_id
);
comp_task_node
->
set_chain_node
(
this
);
comp_task_node
->
mut_parallel_ctx
()
->
set_parallel_id
(
parallel_idx
++
);
comp_task_node
->
mut_parallel_ctx
()
->
set_parallel_num
(
parallel_num
);
...
...
oneflow/core/job/id_manager.cpp
浏览文件 @
56848e6a
...
...
@@ -10,8 +10,8 @@ const std::string& IDMgr::MachineName4MachineId(int64_t machine_id) const {
}
DeviceType
IDMgr
::
GetDeviceTypeFromThrdId
(
int64_t
thrd_id
)
const
{
if
(
thrd_id
<
device_num_per_machine
_
)
{
return
JobDesc
::
Singleton
()
->
resource
().
device_type
()
;
if
(
cpu_device_num_
<=
thrd_id
&&
thrd_id
<
xpu_device_num
_
)
{
return
DeviceType
::
kGPU
;
}
else
{
return
DeviceType
::
kCPU
;
}
...
...
@@ -24,21 +24,30 @@ int64_t IDMgr::NewTaskId(int64_t machine_id, int64_t thrd_id) {
return
machine_thrd_id
|
(
thread_id2num_of_tasks_
[
machine_thrd_id
]
++
);
}
int64_t
IDMgr
::
GetGpuDeviceThrdId
(
int64_t
dev_phy_id
)
const
{
return
cpu_device_num_
+
dev_phy_id
;
}
int64_t
IDMgr
::
GetGpuDevPhyIdFromThrdId
(
int64_t
thrd_id
)
const
{
CHECK_GE
(
thrd_id
,
cpu_device_num_
);
return
thrd_id
-
cpu_device_num_
;
}
int64_t
IDMgr
::
AllocatePersistenceThrdId
(
int64_t
machine_id
)
{
int64_t
&
offset
=
persistence_thrd_offset_
[
machine_id
];
int64_t
ret
=
device_num_per_machine
_
+
offset
;
int64_t
ret
=
xpu_device_num
_
+
offset
;
offset
=
(
offset
+
1
)
%
JobDesc
::
Singleton
()
->
PersistenceWorkerNum
();
return
ret
;
}
int64_t
IDMgr
::
AllocateBoxingThrdId
(
int64_t
machine_id
)
{
int64_t
offset
=
boxing_thrd_offset_
[
machine_id
];
int64_t
ret
=
device_num_per_machine_
+
JobDesc
::
Singleton
()
->
PersistenceWorkerNum
()
+
offset
;
int64_t
ret
=
xpu_device_num_
+
JobDesc
::
Singleton
()
->
PersistenceWorkerNum
()
+
offset
;
offset
=
(
offset
+
1
)
%
JobDesc
::
Singleton
()
->
BoxingWorkerNum
();
return
ret
;
}
int64_t
IDMgr
::
CommNetThrdId
()
const
{
return
device_num_per_machine
_
+
JobDesc
::
Singleton
()
->
PersistenceWorkerNum
()
return
xpu_device_num
_
+
JobDesc
::
Singleton
()
->
PersistenceWorkerNum
()
+
JobDesc
::
Singleton
()
->
BoxingWorkerNum
();
}
...
...
@@ -75,19 +84,21 @@ int64_t IDMgr::NewWorkStreamId(int64_t machine_id, int64_t thrd_id) {
IDMgr
::
IDMgr
()
{
const
Resource
&
resource
=
JobDesc
::
Singleton
()
->
resource
();
machine_num_
=
resource
.
machine_size
();
CHECK_LT
(
machine_num_
,
static_cast
<
int64_t
>
(
1
)
<<
machine_id_bit_num_
);
device_num_per_machine_
=
resource
.
device_num_per_machine
();
CHECK_LT
(
device_num_per_machine_
,
int64_t
machine_num
=
resource
.
machine_size
();
CHECK_LT
(
machine_num
,
static_cast
<
int64_t
>
(
1
)
<<
machine_id_bit_num_
);
cpu_device_num_
=
resource
.
cpu_device_num
();
gpu_device_num_
=
resource
.
gpu_device_num
();
xpu_device_num_
=
cpu_device_num_
+
gpu_device_num_
;
CHECK_LT
(
xpu_device_num_
,
(
static_cast
<
int64_t
>
(
1
)
<<
thread_id_bit_num_
)
-
3
);
for
(
int64_t
i
=
0
;
i
<
machine_num
_
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
machine_num
;
++
i
)
{
const
std
::
string
&
machine_name
=
resource
.
machine
(
i
).
name
();
CHECK
(
machine_name2machine_id_
.
emplace
(
machine_name
,
i
).
second
);
CHECK
(
machine_id2machine_name_
.
emplace
(
i
,
machine_name
).
second
);
}
regst_desc_id_count_
=
0
;
persistence_thrd_offset_
.
assign
(
machine_num
_
,
0
);
boxing_thrd_offset_
.
assign
(
machine_num
_
,
0
);
persistence_thrd_offset_
.
assign
(
machine_num
,
0
);
boxing_thrd_offset_
.
assign
(
machine_num
,
0
);
}
int64_t
IDMgr
::
GetMachineThrdId
(
int64_t
machine_id
,
int64_t
thrd_id
)
{
...
...
oneflow/core/job/id_manager.h
浏览文件 @
56848e6a
...
...
@@ -20,6 +20,11 @@ class IDMgr final {
DeviceType
GetDeviceTypeFromThrdId
(
int64_t
thrd_id
)
const
;
int64_t
NewTaskId
(
int64_t
machine_id
,
int64_t
thrd_id
);
int64_t
GetCpuDeviceThrdId
(
int64_t
dev_phy_id
)
const
{
return
dev_phy_id
;
}
int64_t
GetGpuDeviceThrdId
(
int64_t
dev_phy_id
)
const
;
int64_t
GetGpuDevPhyIdFromThrdId
(
int64_t
thrd_id
)
const
;
int64_t
AllocatePersistenceThrdId
(
int64_t
machine_id
);
int64_t
AllocateBoxingThrdId
(
int64_t
machine_id
);
int64_t
CommNetThrdId
()
const
;
...
...
@@ -44,8 +49,9 @@ class IDMgr final {
IDMgr
();
int64_t
GetMachineThrdId
(
int64_t
machine_id
,
int64_t
thrd_id
);
int32_t
machine_num_
;
int64_t
device_num_per_machine_
;
int64_t
cpu_device_num_
;
int64_t
gpu_device_num_
;
int64_t
xpu_device_num_
;
int64_t
regst_desc_id_count_
;
HashMap
<
int64_t
,
int64_t
>
thread_id2num_of_tasks_
;
HashMap
<
int64_t
,
int64_t
>
thread_id2num_of_streams_
;
...
...
oneflow/core/job/job_desc.h
浏览文件 @
56848e6a
...
...
@@ -28,7 +28,9 @@ class JobDesc final {
size_t
SizeOfOneDataId
()
const
;
bool
use_rdma
()
const
{
return
job_conf_
.
use_rdma
();
}
int64_t
TotalMachineNum
()
const
{
return
resource_
.
machine
().
size
();
}
DeviceType
GetDeviceType
()
const
{
return
resource_
.
device_type
();
}
int32_t
CpuDeviceNum
()
const
{
return
resource_
.
cpu_device_num
();
}
int32_t
GpuDeviceNum
()
const
{
return
resource_
.
gpu_device_num
();
}
int32_t
XpuDeviceNum
()
const
{
return
CpuDeviceNum
()
+
GpuDeviceNum
();
}
int32_t
PersistenceWorkerNum
()
const
;
int32_t
BoxingWorkerNum
()
const
;
int32_t
CommNetWorkerNum
()
const
;
...
...
oneflow/core/job/oneflow.cpp
浏览文件 @
56848e6a
...
...
@@ -22,10 +22,8 @@ std::string GetAmdCtrlKey(int64_t machine_id) {
void
PushAvailableMemDescOfThisMachine
()
{
const
JobDesc
*
job_desc
=
JobDesc
::
Singleton
();
AvailableMemDescOfMachine
this_machine_mem_desc
;
if
(
job_desc
->
GetDeviceType
()
==
DeviceType
::
kGPU
)
{
FOR_RANGE
(
int
,
i
,
0
,
job_desc
->
resource
().
device_num_per_machine
())
{
this_machine_mem_desc
.
add_zone_size
(
GetAvailableGpuMemSize
(
i
));
}
FOR_RANGE
(
int
,
i
,
0
,
job_desc
->
GpuDeviceNum
())
{
this_machine_mem_desc
.
add_zone_size
(
GetAvailableGpuMemSize
(
i
));
}
this_machine_mem_desc
.
add_zone_size
(
GetAvailableCpuMemSize
());
CtrlClient
::
Singleton
()
->
PushKV
(
...
...
oneflow/core/job/parallel_desc.cpp
浏览文件 @
56848e6a
...
...
@@ -5,34 +5,59 @@ namespace oneflow {
namespace
{
void
ParseDeviceNameConf
(
const
std
::
string
&
device_name
,
std
::
string
*
mchn_name
,
std
::
string
*
device_id_str
)
{
size_t
delimiter_pos
=
device_name
.
rfind
(
":"
);
CHECK_NE
(
delimiter_pos
,
std
::
string
::
npos
);
*
mchn_name
=
device_name
.
substr
(
0
,
delimiter_pos
);
*
device_id_str
=
device_name
.
substr
(
delimiter_pos
+
1
);
std
::
string
*
device_tag
,
std
::
string
*
device_id_str
)
{
size_t
second_delimiter_pos
=
device_name
.
rfind
(
":"
);
CHECK_NE
(
second_delimiter_pos
,
std
::
string
::
npos
);
size_t
first_delimiter_pos
=
device_name
.
rfind
(
":"
,
second_delimiter_pos
-
1
);
CHECK_NE
(
first_delimiter_pos
,
std
::
string
::
npos
);
*
mchn_name
=
device_name
.
substr
(
0
,
first_delimiter_pos
);
*
device_tag
=
device_name
.
substr
(
first_delimiter_pos
+
1
,
second_delimiter_pos
-
first_delimiter_pos
-
1
);
*
device_id_str
=
device_name
.
substr
(
second_delimiter_pos
+
1
);
}
}
// namespace
ParallelDesc
::
ParallelDesc
(
const
ParallelConf
&
user_conf
)
{
policy_
=
user_conf
.
policy
();
HashSet
<
std
::
string
>
machine_name_set
;
std
::
string
device_tag_check
;
for
(
const
std
::
string
&
device_name
:
user_conf
.
device_name
())
{
std
::
string
mchn_name
;
std
::
string
device_tag
;
std
::
string
device_id_str
;
ParseDeviceNameConf
(
device_name
,
&
mchn_name
,
&
device_id_str
);
ParseDeviceNameConf
(
device_name
,
&
mchn_name
,
&
device_tag
,
&
device_id_str
);
if
(
device_tag_check
==
""
)
{
device_tag_check
=
device_tag
;
}
else
if
(
device_tag_check
==
"persistence"
)
{
CHECK_STREQ
(
device_tag
.
c_str
(),
"persistence"
);
CHECK
(
machine_name_set
.
find
(
mchn_name
)
==
machine_name_set
.
end
());
}
else
{
// do nothing
}
machine_name_set
.
insert
(
mchn_name
);
if
(
device_tag
==
"persistence"
)
{
int64_t
part_num
=
oneflow_cast
<
int64_t
>
(
device_id_str
);
device_id_str
=
"0-"
+
std
::
to_string
(
part_num
-
1
);
}
int64_t
machine_id
=
IDMgr
::
Singleton
()
->
MachineID4MachineName
(
mchn_name
);
sorted_machine_ids_
.
push_back
(
machine_id
);
int64_t
minus_pos
=
device_id_str
.
r
find
(
"-"
);
int64_t
minus_pos
=
device_id_str
.
find
(
"-"
);
if
(
minus_pos
==
std
::
string
::
npos
)
{
int64_t
dev_phy_id
=
oneflow_cast
<
int64_t
>
(
device_id_str
);
machine_id2sorted_dev_phy_ids_
[
machine_id
]
=
{
dev_phy_id
};
continue
;
device_id_str
=
device_id_str
+
"-"
+
device_id_str
;
minus_pos
=
device_id_str
.
find
(
"-"
);
}
int64_t
min_id
=
oneflow_cast
<
int64_t
>
(
device_id_str
.
substr
(
0
,
minus_pos
));
int64_t
max_id
=
oneflow_cast
<
int64_t
>
(
device_id_str
.
substr
(
minus_pos
+
1
));
CHECK_LE
(
min_id
,
max_id
);
for
(
int64_t
dev_phy_id
=
min_id
;
dev_phy_id
<=
max_id
;
++
dev_phy_id
)
{
machine_id2sorted_dev_phy_ids_
[
machine_id
].
push_back
(
dev_phy_id
);
int64_t
thrd_id
=
-
1
;
if
(
device_tag
==
"gpu"
)
{
thrd_id
=
IDMgr
::
Singleton
()
->
GetGpuDeviceThrdId
(
dev_phy_id
);
}
else
{
thrd_id
=
IDMgr
::
Singleton
()
->
GetCpuDeviceThrdId
(
dev_phy_id
);
}
machine_id2sorted_thrd_ids_
[
machine_id
].
push_back
(
thrd_id
);
}
}
ClearUp
();
...
...
@@ -47,7 +72,7 @@ void ParallelDesc::RemoveNeedlessDevice(const std::string& op_name,
int32_t
device_cnt
=
0
;
int64_t
max_machine_id
=
-
1
;
for
(
int64_t
machine_id
:
sorted_machine_ids_
)
{
auto
it
=
machine_id2sorted_
dev_phy
_ids_
.
find
(
machine_id
);
auto
it
=
machine_id2sorted_
thrd
_ids_
.
find
(
machine_id
);
int32_t
cur_device_num
=
it
->
second
.
size
();
int32_t
cur_device_max_num
=
max_device_num
-
device_cnt
;
if
(
cur_device_num
>
cur_device_max_num
)
{
...
...
@@ -71,7 +96,7 @@ void ParallelDesc::RemoveNeedlessDevice(const std::string& op_name,
}
}
EraseIf
<
int64_t
,
std
::
vector
<
int64_t
>>
(
&
machine_id2sorted_
dev_phy
_ids_
,
&
machine_id2sorted_
thrd
_ids_
,
[
&
](
HashMap
<
int64_t
,
std
::
vector
<
int64_t
>>::
iterator
it
)
{
return
it
->
first
>
max_machine_id
;
});
...
...
@@ -80,10 +105,10 @@ void ParallelDesc::RemoveNeedlessDevice(const std::string& op_name,
void
ParallelDesc
::
RemoveInvalidDevice
(
const
std
::
string
&
op_name
)
{
for
(
int64_t
machine_id
:
sorted_machine_ids_
)
{
auto
&
sorted_dev_ids
=
machine_id2sorted_
dev_phy
_ids_
.
at
(
machine_id
);
auto
bound_it
=
std
::
lower_bound
(
sorted_dev_ids
.
begin
(),
sorted_dev_ids
.
end
(),
JobDesc
::
Singleton
()
->
resource
().
device_num_per_machine
());
auto
&
sorted_dev_ids
=
machine_id2sorted_
thrd
_ids_
.
at
(
machine_id
);
auto
bound_it
=
s
td
::
lower_bound
(
s
orted_dev_ids
.
begin
(),
sorted_dev_ids
.
end
(),
JobDesc
::
Singleton
()
->
XpuDeviceNum
());
if
(
bound_it
==
sorted_dev_ids
.
end
())
{
continue
;
}
else
{
...
...
@@ -100,19 +125,18 @@ void ParallelDesc::RemoveInvalidDevice(const std::string& op_name) {
bool
ParallelDesc
::
Equal
(
const
ParallelDesc
&
rhs
)
const
{
return
policy_
==
rhs
.
policy_
&&
sorted_machine_ids_
==
rhs
.
sorted_machine_ids_
&&
machine_id2sorted_dev_phy_ids_
==
rhs
.
machine_id2sorted_dev_phy_ids_
;
&&
machine_id2sorted_thrd_ids_
==
rhs
.
machine_id2sorted_thrd_ids_
;
}
void
ParallelDesc
::
ClearUp
()
{
EraseIf
<
int64_t
,
std
::
vector
<
int64_t
>>
(
&
machine_id2sorted_
dev_phy
_ids_
,
&
machine_id2sorted_
thrd
_ids_
,
[](
HashMap
<
int64_t
,
std
::
vector
<
int64_t
>>::
iterator
it
)
{
return
it
->
second
.
empty
();
});
sorted_machine_ids_
.
clear
();
parallel_num_
=
0
;
for
(
auto
&
pair
:
machine_id2sorted_
dev_phy
_ids_
)
{
for
(
auto
&
pair
:
machine_id2sorted_
thrd
_ids_
)
{
sorted_machine_ids_
.
push_back
(
pair
.
first
);
SortAndRemoveDuplication
(
&
(
pair
.
second
));
parallel_num_
+=
pair
.
second
.
size
();
...
...
oneflow/core/job/parallel_desc.h
浏览文件 @
56848e6a
...
...
@@ -21,8 +21,8 @@ class ParallelDesc {
const
std
::
vector
<
int64_t
>&
sorted_machine_ids
()
const
{
return
sorted_machine_ids_
;
}
const
std
::
vector
<
int64_t
>&
sorted_
dev_phy
_ids
(
int64_t
machine_id
)
const
{
return
machine_id2sorted_
dev_phy
_ids_
.
at
(
machine_id
);
const
std
::
vector
<
int64_t
>&
sorted_
thrd
_ids
(
int64_t
machine_id
)
const
{
return
machine_id2sorted_
thrd
_ids_
.
at
(
machine_id
);
}
int64_t
parallel_num
()
const
{
return
parallel_num_
;
}
...
...
@@ -44,7 +44,7 @@ class ParallelDesc {
ParallelPolicy
policy_
;
std
::
vector
<
int64_t
>
sorted_machine_ids_
;
HashMap
<
int64_t
,
std
::
vector
<
int64_t
>>
machine_id2sorted_
dev_phy
_ids_
;
HashMap
<
int64_t
,
std
::
vector
<
int64_t
>>
machine_id2sorted_
thrd
_ids_
;
int64_t
parallel_num_
;
};
...
...
oneflow/core/job/resource.proto
浏览文件 @
56848e6a
...
...
@@ -14,18 +14,24 @@ enum DeviceType {
message
Resource
{
repeated
Machine
machine
=
1
;
required
int32
device_num_per_machine
=
2
;
required
DeviceType
device_type
=
3
;
required
int32
cpu_device_num
=
2
;
required
int32
gpu_device_num
=
3
;
optional
int32
persistence_worker_num
=
4
[
default
=
4
];
optional
int32
boxing_worker_num
=
5
[
default
=
4
];
optional
int32
comm_net_worker_num
=
6
[
default
=
4
];
}
// If one machine named "machine_xxx" and device_num_per_machine = 4
// then we have 4 devices:
// "machine_xxx:0"
// "machine_xxx:1"
// "machine_xxx:2"
// "machine_xxx:3"
// If one machine named "machine_xxx" and cpu_device_num_per_machine = 4, gpu_device_num_per_machine = 2
// then we have follow device_names:
// "machine_xxx:cpu:0"
// "machine_xxx:cpu:1"
// "machine_xxx:cpu:2"
// "machine_xxx:cpu:3"
// "machine_xxx:gpu:0"
// "machine_xxx:gpu:1"
// "machine_xxx:persistence"
//
// "machine_xxx:0-2" means {"machine_xxx:0", "machine_xxx:1", "machine_xxx:2"}
// "machine_xxx:persistence" is special device_name for persistence_op such as "data_loader", "print"
// "data_loader" placed with "machine_xxx:persistence:10" means: load 10-part on "machine_xxx:persistence"
// "machine_xxx:xpu:0-2" means {"machine_xxx:xpu:0", "machine_xxx:xpu:1", "machine_xxx:xpu:2"}
oneflow/core/job/runtime.cpp
浏览文件 @
56848e6a
...
...
@@ -86,6 +86,7 @@ void Runtime::NewAllSingleton(const Plan& plan, bool is_experiment_phase) {
EpollCommNet
::
Init
();
#endif
SnapshotMgr
::
NewSingleton
(
plan
);
MemoryAllocator
::
NewSingleton
();
RegstMgr
::
NewSingleton
();
ActorMsgBus
::
NewSingleton
();
ThreadMgr
::
NewSingleton
();
...
...
@@ -95,6 +96,7 @@ void Runtime::DeleteAllSingleton() {
ThreadMgr
::
DeleteSingleton
();
ActorMsgBus
::
DeleteSingleton
();
RegstMgr
::
DeleteSingleton
();
MemoryAllocator
::
DeleteSingleton
();
SnapshotMgr
::
DeleteSingleton
();
delete
CommNet
::
Singleton
();
RuntimeCtx
::
DeleteSingleton
();
...
...
oneflow/core/register/register_desc.cpp
浏览文件 @
56848e6a
...
...
@@ -100,7 +100,8 @@ void RegstDesc::InferMemCase() {
int64_t
thrd_id
=
producer_
->
thrd_id
();
if
(
auto
cp_hd_producer
=
dynamic_cast
<
const
CopyHdTaskNode
*>
(
producer_
))
{
if
(
cp_hd_producer
->
copy_type
()
==
CopyHdOpConf
::
H2D
)
{
mem_case_
.
mutable_device_cuda_mem
()
->
set_device_id
(
thrd_id
);
mem_case_
.
mutable_device_cuda_mem
()
->
set_device_id
(
IDMgr
::
Singleton
()
->
GetGpuDevPhyIdFromThrdId
(
thrd_id
));
}
else
{
mem_case_
.
mutable_host_pinned_mem
()
->
set_used_by_device
(
true
);
SetHostPinnedMemoryAccordingToConsumers
(
consumers_
,
&
mem_case_
);
...
...
@@ -110,7 +111,8 @@ void RegstDesc::InferMemCase() {
SetHostPinnedMemoryAccordingToConsumers
(
consumers_
,
&
mem_case_
);
}
else
{
if
(
producer_
->
device_type
()
==
kGPU
)
{
mem_case_
.
mutable_device_cuda_mem
()
->
set_device_id
(
thrd_id
);
mem_case_
.
mutable_device_cuda_mem
()
->
set_device_id
(
IDMgr
::
Singleton
()
->
GetGpuDevPhyIdFromThrdId
(
thrd_id
));
}
else
{
mem_case_
.
mutable_host_pageable_mem
();
SetHostPinnedMemoryAccordingToConsumers
(
consumers_
,
&
mem_case_
);
...
...
oneflow/core/thread/thread_manager.cpp
浏览文件 @
56848e6a
...
...
@@ -19,13 +19,13 @@ Thread* ThreadMgr::GetThrd(int64_t thrd_id) { return threads_.at(thrd_id); }
ThreadMgr
::
ThreadMgr
()
{
const
JobDesc
*
job_desc
=
JobDesc
::
Singleton
();
int64_t
thrd_id
=
0
;
// device
FOR_RANGE
(
int64_t
,
dev_id
,
0
,
job_desc
->
resource
().
device_num_per_machine
())
{
if
(
job_desc
->
resource
().
device_type
()
==
kGPU
)
{
threads_
.
push_back
(
new
GpuThread
(
thrd_id
++
,
dev_id
));
}
else
{
threads_
.
push_back
(
new
CpuThread
(
thrd_id
++
));
}
//
cpu
device
FOR_RANGE
(
int64_t
,
i
,
0
,
job_desc
->
CpuDeviceNum
())
{
threads_
.
push_back
(
new
CpuThread
(
thrd_id
++
));
}
// gpu device
FOR_RANGE
(
int64_t
,
i
,
0
,
job_desc
->
GpuDeviceNum
())
{
threads_
.
push_back
(
new
GpuThread
(
thrd_id
++
,
i
));
}
// persistence
FOR_RANGE
(
int64_t
,
i
,
0
,
job_desc
->
PersistenceWorkerNum
())
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录