Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
318e3012
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
318e3012
编写于
5月 25, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 25, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1422 Fix result error when calling AllReduce serially.
Merge pull request !1422 from ZPaC/fix-multi-allreduce-calling-error
上级
55027096
d9bcdac3
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
64 addition
and
14 deletion
+64
-14
mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
+17
-13
mindspore/ccsrc/device/gpu/gpu_stream_assign.h
mindspore/ccsrc/device/gpu/gpu_stream_assign.h
+1
-1
tests/st/nccl/test_nccl_all_reduce_op.py
tests/st/nccl/test_nccl_all_reduce_op.py
+46
-0
未找到文件。
mindspore/ccsrc/device/gpu/gpu_stream_assign.cc
浏览文件 @
318e3012
...
@@ -40,21 +40,24 @@ void AssignGpuStream(const std::shared_ptr<session::KernelGraph> &kernel_graph)
...
@@ -40,21 +40,24 @@ void AssignGpuStream(const std::shared_ptr<session::KernelGraph> &kernel_graph)
}
}
}
}
if
(
allreduce_kernels
.
size
()
>
1
)
{
if
(
allreduce_kernels
.
size
()
>
1
)
{
DeviceStream
comm_stream
=
nullptr
;
// Assign multiple streams only when there's Recv node for AllReduce.
GPUDeviceManager
::
GetInstance
().
CreateStream
(
&
comm_stream
);
std
::
transform
(
allreduce_kernels
.
begin
(),
allreduce_kernels
.
end
(),
allreduce_kernels
.
begin
(),
[
&
](
CNodePtr
allreduce_kernel
)
{
AnfAlgo
::
SetNodeAttr
(
"stream_id"
,
MakeValue
(
reinterpret_cast
<
uintptr_t
>
(
comm_stream
)),
allreduce_kernel
);
return
allreduce_kernel
;
});
std
::
vector
<
SendRecvPair
>
send_recv_pairs
;
std
::
vector
<
SendRecvPair
>
send_recv_pairs
;
FindAllReduceStreamSwitchPos
(
kernel_graph
,
&
send_recv_pairs
);
if
(
FindAllReduceStreamSwitchPos
(
kernel_graph
,
&
send_recv_pairs
))
{
InsertStreamSwitchNode
(
kernel_graph
,
send_recv_pairs
);
DeviceStream
comm_stream
=
nullptr
;
GPUDeviceManager
::
GetInstance
().
CreateStream
(
&
comm_stream
);
std
::
transform
(
allreduce_kernels
.
begin
(),
allreduce_kernels
.
end
(),
allreduce_kernels
.
begin
(),
[
&
](
CNodePtr
allreduce_kernel
)
{
AnfAlgo
::
SetNodeAttr
(
"stream_id"
,
MakeValue
(
reinterpret_cast
<
uintptr_t
>
(
comm_stream
)),
allreduce_kernel
);
return
allreduce_kernel
;
});
InsertStreamSwitchNode
(
kernel_graph
,
send_recv_pairs
);
}
else
{
return
;
}
}
}
}
}
void
FindAllReduceStreamSwitchPos
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
,
bool
FindAllReduceStreamSwitchPos
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
,
std
::
vector
<
SendRecvPair
>
*
send_recv_pairs
)
{
std
::
vector
<
SendRecvPair
>
*
send_recv_pairs
)
{
auto
execution_kernels
=
kernel_graph
->
execution_order
();
auto
execution_kernels
=
kernel_graph
->
execution_order
();
std
::
vector
<
CNodePtr
>::
iterator
iter
,
iter_begin
;
std
::
vector
<
CNodePtr
>::
iterator
iter
,
iter_begin
;
...
@@ -77,14 +80,15 @@ void FindAllReduceStreamSwitchPos(const std::shared_ptr<session::KernelGraph> &k
...
@@ -77,14 +80,15 @@ void FindAllReduceStreamSwitchPos(const std::shared_ptr<session::KernelGraph> &k
std
::
vector
<
CNodePtr
>::
iterator
mock_recv_node_iter
=
std
::
vector
<
CNodePtr
>::
iterator
mock_recv_node_iter
=
FindRecvNodePos
(
iter
,
iter_end
,
*
iter
,
kAllReduceStreamSwitch
);
FindRecvNodePos
(
iter
,
iter_end
,
*
iter
,
kAllReduceStreamSwitch
);
if
(
mock_recv_node_iter
==
iter_end
)
{
if
(
mock_recv_node_iter
==
iter_end
)
{
MS_LOG
(
WARNING
)
<<
"Can't find
send node place before
AllReduce node."
;
MS_LOG
(
WARNING
)
<<
"Can't find
recv node place after
AllReduce node."
;
continu
e
;
return
fals
e
;
}
}
SendRecvPair
pair2
=
{
kAllReduceStreamSwitch
,
*
iter
,
*
mock_recv_node_iter
,
IntToSize
(
iter
-
iter_begin
+
1
),
SendRecvPair
pair2
=
{
kAllReduceStreamSwitch
,
*
iter
,
*
mock_recv_node_iter
,
IntToSize
(
iter
-
iter_begin
+
1
),
IntToSize
(
mock_recv_node_iter
-
iter_begin
)};
IntToSize
(
mock_recv_node_iter
-
iter_begin
)};
send_recv_pairs
->
push_back
(
pair2
);
send_recv_pairs
->
push_back
(
pair2
);
}
}
}
}
return
true
;
}
}
std
::
vector
<
CNodePtr
>::
iterator
FindSendNodePos
(
std
::
vector
<
CNodePtr
>::
iterator
begin
,
std
::
vector
<
CNodePtr
>::
iterator
FindSendNodePos
(
std
::
vector
<
CNodePtr
>::
iterator
begin
,
...
...
mindspore/ccsrc/device/gpu/gpu_stream_assign.h
浏览文件 @
318e3012
...
@@ -48,7 +48,7 @@ struct StreamSwitchNode {
...
@@ -48,7 +48,7 @@ struct StreamSwitchNode {
}
}
};
};
void
AssignGpuStream
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
);
void
AssignGpuStream
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
);
void
FindAllReduceStreamSwitchPos
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
,
bool
FindAllReduceStreamSwitchPos
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph
,
std
::
vector
<
SendRecvPair
>
*
send_recv_pairs
);
std
::
vector
<
SendRecvPair
>
*
send_recv_pairs
);
// Find Send node position according to "mock" recv node.
// Find Send node position according to "mock" recv node.
// "mock" recv node is a gpu kernel node after a real Recv node, e.g. AllReduce node.
// "mock" recv node is a gpu kernel node after a real Recv node, e.g. AllReduce node.
...
...
tests/st/nccl/test_nccl_all_reduce_op.py
浏览文件 @
318e3012
...
@@ -75,3 +75,49 @@ def test_AllReduce():
...
@@ -75,3 +75,49 @@ def test_AllReduce():
error2
=
np
.
ones
(
shape
=
expect2
.
shape
)
*
1.0e-5
error2
=
np
.
ones
(
shape
=
expect2
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff2
<
error2
)
assert
np
.
all
(
diff2
<
error2
)
assert
output
[
2
].
shape
()
==
expect2
.
shape
assert
output
[
2
].
shape
()
==
expect2
.
shape
class
Net2
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
Net2
,
self
).
__init__
()
self
.
x1
=
Parameter
(
initializer
(
Tensor
(
x
),
x
.
shape
),
name
=
'x1'
)
self
.
op0
=
"sum"
self
.
op1
=
"sum"
self
.
op2
=
"sum"
self
.
all_reduce1
=
P
.
AllReduce
(
self
.
op0
,
group
=
NCCL_WORLD_COMM_GROUP
)
self
.
all_reduce2
=
P
.
AllReduce
(
self
.
op1
,
group
=
NCCL_WORLD_COMM_GROUP
)
self
.
all_reduce3
=
P
.
AllReduce
(
self
.
op2
,
group
=
NCCL_WORLD_COMM_GROUP
)
def
construct
(
self
):
x
=
self
.
all_reduce1
(
self
.
x1
)
y
=
self
.
all_reduce2
(
x
)
z
=
self
.
all_reduce3
(
y
)
return
(
x
,
y
,
z
)
def
test_AllReduce2
():
all_reduce
=
Net2
()
output
=
all_reduce
()
expect0
=
np
.
ones
([
3
,
1
,
3
,
3
]).
astype
(
np
.
float32
)
*
0
for
i
in
range
(
size
):
part
=
np
.
ones
([
3
,
1
,
3
,
3
]).
astype
(
np
.
float32
)
*
0.01
*
(
i
+
1
)
expect0
+=
part
diff0
=
abs
(
output
[
0
].
asnumpy
()
-
expect0
)
error0
=
np
.
ones
(
shape
=
expect0
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff0
<
error0
)
assert
output
[
0
].
shape
()
==
expect0
.
shape
expect1
=
expect0
*
size
diff1
=
abs
(
output
[
1
].
asnumpy
()
-
expect1
)
error1
=
np
.
ones
(
shape
=
expect1
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff1
<
error1
)
assert
output
[
1
].
shape
()
==
expect1
.
shape
expect2
=
expect1
*
size
diff2
=
abs
(
output
[
2
].
asnumpy
()
-
expect2
)
error2
=
np
.
ones
(
shape
=
expect2
.
shape
)
*
1.0e-5
assert
np
.
all
(
diff2
<
error2
)
assert
output
[
2
].
shape
()
==
expect2
.
shape
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录