Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
26242d6b
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
26242d6b
编写于
3月 29, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test(mgb/opr-mm): add collective_comm tests
GitOrigin-RevId: 02edb26ba7dca10478d95dce55abf92ad5ed4557
上级
cd8ab9e3
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
165 addition
and
0 deletion
+165
-0
src/opr-mm/test/collective_comm.cpp
src/opr-mm/test/collective_comm.cpp
+165
-0
未找到文件。
src/opr-mm/test/collective_comm.cpp
浏览文件 @
26242d6b
...
...
@@ -76,6 +76,45 @@ class MockGroupClient final : public opr::GroupClient {
TEST
(
TestOprCollectiveComm
,
AllReduce
)
{
REQUIRE_GPU
(
2
);
auto
run_mode
=
[](
const
Mode
mode
)
{
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
auto
host_x1
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
graph
=
ComputingGraph
::
make
();
auto
x0
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
,
cn0
);
auto
x1
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x1
,
cn0
);
auto
x1c
=
opr
::
Copy
::
make
(
x1
,
cn1
);
auto
y0
=
opr
::
CollectiveComm
::
make
({
x0
},
graph
.
get
(),
"all_reduce"
,
2
,
0
,
0
,
client
,
{
mode
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y1
=
opr
::
CollectiveComm
::
make
({
x1c
},
graph
.
get
(),
"all_reduce"
,
2
,
1
,
0
,
client
,
{
mode
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y_expect
=
make_all_reduce_output
(
mode
,
{
x0
,
x1
});
auto
func
=
graph
->
compile
({
make_callback_copy
(
y0
,
host_y0
),
make_callback_copy
(
y1
,
host_y1
),
make_callback_copy
(
y_expect
,
host_y_expect
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y_expect
,
host_y0
);
MGB_ASSERT_TENSOR_EQ
(
host_y_expect
,
host_y1
);
};
run_mode
(
Mode
::
ALL_REDUCE_MAX
);
run_mode
(
Mode
::
ALL_REDUCE_MIN
);
run_mode
(
Mode
::
ALL_REDUCE_SUM
);
}
TEST
(
TestOprCollectiveComm
,
AllReduceMultiThread
)
{
REQUIRE_GPU
(
2
);
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
...
...
@@ -227,6 +266,38 @@ TEST(TestOprCollectiveComm, AllGather) {
auto
host_x1
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
graph
=
ComputingGraph
::
make
();
auto
x0
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
,
cn0
);
auto
x1
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x1
,
cn0
);
auto
x1c
=
opr
::
Copy
::
make
(
x1
,
cn1
);
auto
y0
=
opr
::
CollectiveComm
::
make
({
x0
},
graph
.
get
(),
"all_gather"
,
2
,
0
,
0
,
client
,
{
Mode
::
ALL_GATHER
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y1
=
opr
::
CollectiveComm
::
make
({
x1c
},
graph
.
get
(),
"all_gather"
,
2
,
1
,
0
,
client
,
{
Mode
::
ALL_GATHER
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y_expect
=
opr
::
Concat
::
make
({
x0
,
x1
},
0
);
auto
func
=
graph
->
compile
({
make_callback_copy
(
y0
,
host_y0
),
make_callback_copy
(
y1
,
host_y1
),
make_callback_copy
(
y_expect
,
host_y_expect
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y_expect
,
host_y0
);
MGB_ASSERT_TENSOR_EQ
(
host_y_expect
,
host_y1
);
}
TEST
(
TestOprCollectiveComm
,
AllGatherMultiThread
)
{
REQUIRE_GPU
(
2
);
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
auto
host_x1
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
run_0
=
[
&
]()
{
// rank 0
...
...
@@ -360,6 +431,39 @@ TEST(TestOprCollectiveComm, ReduceScatterSum) {
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
auto
host_x1
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y0_expect
,
host_y1_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
graph
=
ComputingGraph
::
make
();
auto
x0
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
,
cn0
);
auto
x1
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x1
,
cn0
);
auto
x1c
=
opr
::
Copy
::
make
(
x1
,
cn1
);
auto
y0
=
opr
::
CollectiveComm
::
make
({
x0
},
graph
.
get
(),
"reduce_scatter_sum"
,
2
,
0
,
0
,
client
,
{
Mode
::
REDUCE_SCATTER_SUM
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y1
=
opr
::
CollectiveComm
::
make
({
x1c
},
graph
.
get
(),
"reduce_scatter_sum"
,
2
,
1
,
0
,
client
,
{
Mode
::
REDUCE_SCATTER_SUM
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y_expect
=
make_reduce_scatter_sum_output
({
x0
,
x1
});
auto
func
=
graph
->
compile
({
make_callback_copy
(
y0
,
host_y0
),
make_callback_copy
(
y1
,
host_y1
),
make_callback_copy
(
y_expect
[
0
],
host_y0_expect
),
make_callback_copy
(
y_expect
[
1
],
host_y1_expect
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y0_expect
,
host_y0
);
MGB_ASSERT_TENSOR_EQ
(
host_y1_expect
,
host_y1
);
}
TEST
(
TestOprCollectiveComm
,
ReduceScatterSumMultiThread
)
{
REQUIRE_GPU
(
2
);
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
8
});
auto
host_x1
=
gen
({
8
});
...
...
@@ -499,6 +603,37 @@ TEST(TestOprCollectiveComm, ReduceSum) {
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
auto
host_x1
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
graph
=
ComputingGraph
::
make
();
auto
x0
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
,
cn0
);
auto
x1
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x1
,
cn0
);
auto
x1c
=
opr
::
Copy
::
make
(
x1
,
cn1
);
auto
y0
=
opr
::
CollectiveComm
::
make
({
x0
},
graph
.
get
(),
"reduce_sum"
,
2
,
0
,
0
,
client
,
{
Mode
::
REDUCE_SUM
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y1
=
opr
::
CollectiveComm
::
make
({
x1c
},
graph
.
get
(),
"reduce_sum"
,
2
,
1
,
0
,
client
,
{
Mode
::
REDUCE_SUM
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y_expect
=
x0
+
x1
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y0
,
host_y0
),
make_callback_copy
(
y1
,
host_y1
),
make_callback_copy
(
y_expect
,
host_y_expect
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y_expect
,
host_y0
);
}
TEST
(
TestOprCollectiveComm
,
ReduceSumMultiThread
)
{
REQUIRE_GPU
(
2
);
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
auto
host_x1
=
gen
({
28
,
28
});
...
...
@@ -623,6 +758,36 @@ TEST(TestOprCollectiveComm, Broadcast) {
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
,
host_y_expect
;
auto
client
=
std
::
make_shared
<
MockGroupClient
>
();
auto
graph
=
ComputingGraph
::
make
();
auto
x0
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
,
cn0
);
auto
y0
=
opr
::
CollectiveComm
::
make
({
x0
},
graph
.
get
(),
"broadcast"
,
2
,
0
,
0
,
client
,
{
Mode
::
BROADCAST
},
dtype
::
Float32
(),
"nccl"
)[
0
];
auto
y_dev
=
std
::
make_shared
<
DeviceTensorND
>
(
DeviceTensorND
()
.
comp_node
(
cn1
)
.
dtype
(
dtype
::
Float32
())
.
resize
(
host_x0
->
shape
()));
auto
y1
=
opr
::
CollectiveComm
::
make
({},
graph
.
get
(),
"broadcast"
,
2
,
1
,
0
,
client
,
{
y_dev
},
{
Mode
::
BROADCAST
},
dtype
::
Float32
(),
"nccl"
,
{
cn1
})[
0
];
auto
func
=
graph
->
compile
({
make_callback_copy
(
y0
,
host_y0
),
make_callback_copy
(
y1
,
host_y1
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
*
host_x0
,
host_y0
);
MGB_ASSERT_TENSOR_EQ
(
*
host_x0
,
host_y1
);
}
TEST
(
TestOprCollectiveComm
,
BroadcastMultiThread
)
{
REQUIRE_GPU
(
2
);
auto
cn0
=
CompNode
::
load
(
"gpu0"
);
auto
cn1
=
CompNode
::
load
(
"gpu1"
);
HostTensorGenerator
<>
gen
;
auto
host_x0
=
gen
({
28
,
28
});
HostTensorND
host_y0
,
host_y1
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录