Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
934bdce4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
934bdce4
编写于
6月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2280 support host reduce
Merge pull request !2280 from chenjianping/host_reduce
上级
21ade668
35900037
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
22 addition
and
37 deletion
+22
-37
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
+12
-9
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h
+4
-3
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
+2
-11
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
+1
-2
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
+3
-11
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
+0
-1
未找到文件。
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.cc
浏览文件 @
934bdce4
...
@@ -179,8 +179,8 @@ bool MPIAdapter::ReduceScatter(const float *input, float *output, const std::vec
...
@@ -179,8 +179,8 @@ bool MPIAdapter::ReduceScatter(const float *input, float *output, const std::vec
return
result
;
return
result
;
}
}
bool
MPIAdapter
::
ReduceScatterOverwriteInput
(
float
*
input
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
,
bool
MPIAdapter
::
ReduceScatterOverwriteInput
(
float
*
input
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
input_
data_num
,
const
std
::
string
&
op_type
,
float
*
output
)
{
size_t
output_size
,
const
std
::
string
&
op_type
,
float
*
output
)
{
int
scatter_index
=
GetScatterIndex
(
rank_id_
,
ranks_group
);
int
scatter_index
=
GetScatterIndex
(
rank_id_
,
ranks_group
);
auto
group
=
AddGroup
(
ranks_group
);
auto
group
=
AddGroup
(
ranks_group
);
if
(
group
==
MPI_GROUP_NULL
)
{
if
(
group
==
MPI_GROUP_NULL
)
{
...
@@ -193,7 +193,7 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
...
@@ -193,7 +193,7 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
}
}
MPI_Win
window
;
MPI_Win
window
;
auto
ret
=
MPI_Win_create
(
input
,
data_num
*
sizeof
(
float
),
sizeof
(
float
),
MPI_INFO_NULL
,
comm
,
&
window
);
auto
ret
=
MPI_Win_create
(
input
,
input_
data_num
*
sizeof
(
float
),
sizeof
(
float
),
MPI_INFO_NULL
,
comm
,
&
window
);
if
(
ret
!=
MPI_SUCCESS
)
{
if
(
ret
!=
MPI_SUCCESS
)
{
MS_LOG
(
ERROR
)
<<
"mpi window create fail! ret = "
<<
ret
;
MS_LOG
(
ERROR
)
<<
"mpi window create fail! ret = "
<<
ret
;
return
false
;
return
false
;
...
@@ -205,18 +205,21 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
...
@@ -205,18 +205,21 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
continue
;
continue
;
}
}
auto
op
=
GetMpiOp
(
op_type
);
auto
op
=
GetMpiOp
(
op_type
);
ret
=
MPI_Accumulate
(
input
+
i
*
data_num
,
data_num
,
MPI_FLOAT
,
remote_rank
,
i
*
data_num
,
data_num
,
MPI_FLOAT
,
op
,
ret
=
MPI_Accumulate
(
input
+
i
*
input_data_num
,
input_data_num
,
MPI_FLOAT
,
remote_rank
,
i
*
input_data_num
,
window
);
input_data_num
,
MPI_FLOAT
,
op
,
window
);
if
(
ret
!=
MPI_SUCCESS
)
{
if
(
ret
!=
MPI_SUCCESS
)
{
MS_LOG
(
EXCEPTION
)
<<
"mpi accumulate "
<<
op_type
<<
" fail!ret = "
<<
ret
;
MS_LOG
(
EXCEPTION
)
<<
"mpi accumulate "
<<
op_type
<<
" fail!ret = "
<<
ret
;
}
}
}
}
MPI_Win_fence
(
0
,
window
);
MPI_Win_fence
(
0
,
window
);
if
(
output
!=
nullptr
)
{
if
(
output
!=
nullptr
)
{
auto
data_size
=
data_num
*
sizeof
(
float
);
auto
data_size
=
input_data_num
*
sizeof
(
float
);
auto
copy_ret
=
memcpy_s
(
output
,
data_size
,
input
+
scatter_index
*
data_num
,
data_size
);
if
(
output_size
<
data_size
)
{
MS_LOG
(
EXCEPTION
)
<<
"output buffer size "
<<
output_size
<<
" < input size "
<<
data_size
;
}
auto
copy_ret
=
memcpy_s
(
output
,
output_size
,
input
+
scatter_index
*
input_data_num
,
data_size
);
if
(
copy_ret
!=
0
)
{
if
(
copy_ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"copy output memory fail!
"
;
MS_LOG
(
EXCEPTION
)
<<
"copy output memory fail!
ret = "
<<
copy_ret
;
}
}
}
}
MPI_Win_free
(
&
window
);
MPI_Win_free
(
&
window
);
...
@@ -224,7 +227,7 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
...
@@ -224,7 +227,7 @@ bool MPIAdapter::ReduceScatterOverwriteInput(float *input, const std::vector<int
return
true
;
return
true
;
}
}
bool
MPIAdapter
::
AllGather
(
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
)
{
bool
MPIAdapter
::
AllGather
(
const
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
)
{
if
(
ranks_group
.
empty
())
{
if
(
ranks_group
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"input rank group is empty!"
;
MS_LOG
(
ERROR
)
<<
"input rank group is empty!"
;
return
false
;
return
false
;
...
...
mindspore/ccsrc/device/cpu/mpi/mpi_adapter.h
浏览文件 @
934bdce4
...
@@ -34,9 +34,10 @@ class MPIAdapter {
...
@@ -34,9 +34,10 @@ class MPIAdapter {
int
GetRankId
()
const
;
int
GetRankId
()
const
;
bool
ReduceScatter
(
const
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
,
bool
ReduceScatter
(
const
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
,
const
std
::
string
&
op_type
=
kOpTypeSum
);
const
std
::
string
&
op_type
=
kOpTypeSum
);
bool
ReduceScatterOverwriteInput
(
float
*
input
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
,
bool
ReduceScatterOverwriteInput
(
float
*
input
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
input_data_num
,
const
std
::
string
&
op_type
=
kOpTypeSum
,
float
*
output
=
nullptr
);
size_t
output_size
,
const
std
::
string
&
op_type
=
kOpTypeSum
,
bool
AllGather
(
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
);
float
*
output
=
nullptr
);
bool
AllGather
(
const
float
*
input
,
float
*
output
,
const
std
::
vector
<
int
>
&
ranks_group
,
size_t
data_num
);
private:
private:
MPIAdapter
();
MPIAdapter
();
...
...
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.cc
浏览文件 @
934bdce4
...
@@ -26,21 +26,11 @@ constexpr auto kRanksGroup = "group";
...
@@ -26,21 +26,11 @@ constexpr auto kRanksGroup = "group";
constexpr
auto
kAllGatherInputNum
=
1
;
constexpr
auto
kAllGatherInputNum
=
1
;
}
// namespace
}
// namespace
AllGatherCPUKernel
::
AllGatherCPUKernel
()
:
input_data_number_
(
0
)
{}
void
AllGatherCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
void
AllGatherCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
size_t
input_num
=
AnfAlgo
::
GetInputTensorNum
(
kernel_node
);
if
(
input_num
!=
kAllGatherInputNum
)
{
if
(
input_num
!=
kAllGatherInputNum
)
{
MS_LOG
(
EXCEPTION
)
<<
"allgather input num:"
<<
input_num
;
MS_LOG
(
EXCEPTION
)
<<
"allgather input num:"
<<
input_num
;
}
}
for
(
size_t
i
=
0
;
i
<
input_num
;
++
i
)
{
auto
shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
i
);
size_t
count
=
1
;
for
(
size_t
j
=
0
;
j
<
shape
.
size
();
j
++
)
{
count
*=
IntToSize
(
shape
[
j
]);
}
input_data_number_
+=
count
;
}
auto
ranks_group
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
kRanksGroup
);
auto
ranks_group
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
kRanksGroup
);
if
(
ranks_group
!=
nullptr
)
{
if
(
ranks_group
!=
nullptr
)
{
...
@@ -55,8 +45,9 @@ bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
...
@@ -55,8 +45,9 @@ bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
auto
input_data_num
=
inputs
[
0
]
->
size
/
sizeof
(
float
);
return
device
::
cpu
::
MPIAdapter
::
Instance
().
AllGather
(
input_addr
,
output_addr
,
ranks_group_
,
input_data_num
ber_
);
return
device
::
cpu
::
MPIAdapter
::
Instance
().
AllGather
(
input_addr
,
output_addr
,
ranks_group_
,
input_data_num
);
}
}
}
// namespace kernel
}
// namespace kernel
}
// namespace mindspore
}
// namespace mindspore
mindspore/ccsrc/kernel/cpu/allgather_cpu_kernel.h
浏览文件 @
934bdce4
...
@@ -24,7 +24,7 @@ namespace mindspore {
...
@@ -24,7 +24,7 @@ namespace mindspore {
namespace
kernel
{
namespace
kernel
{
class
AllGatherCPUKernel
:
public
CPUKernel
{
class
AllGatherCPUKernel
:
public
CPUKernel
{
public:
public:
AllGatherCPUKernel
();
AllGatherCPUKernel
()
=
default
;
~
AllGatherCPUKernel
()
override
=
default
;
~
AllGatherCPUKernel
()
override
=
default
;
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
void
InitKernel
(
const
CNodePtr
&
kernel_node
)
override
;
...
@@ -33,7 +33,6 @@ class AllGatherCPUKernel : public CPUKernel {
...
@@ -33,7 +33,6 @@ class AllGatherCPUKernel : public CPUKernel {
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
private:
size_t
input_data_number_
;
std
::
vector
<
int
>
ranks_group_
;
std
::
vector
<
int
>
ranks_group_
;
};
};
...
...
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.cc
浏览文件 @
934bdce4
...
@@ -24,18 +24,9 @@ namespace {
...
@@ -24,18 +24,9 @@ namespace {
constexpr
auto
kRanksGroup
=
"group"
;
constexpr
auto
kRanksGroup
=
"group"
;
}
// namespace
}
// namespace
ReduceScatterCPUKernel
::
ReduceScatterCPUKernel
()
:
o
utput_data_number_
(
0
),
o
p_type_
(
device
::
cpu
::
kOpTypeSum
)
{}
ReduceScatterCPUKernel
::
ReduceScatterCPUKernel
()
:
op_type_
(
device
::
cpu
::
kOpTypeSum
)
{}
void
ReduceScatterCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
void
ReduceScatterCPUKernel
::
InitKernel
(
const
CNodePtr
&
kernel_node
)
{
size_t
output_num
=
AnfAlgo
::
GetOutputTensorNum
(
kernel_node
);
for
(
size_t
i
=
0
;
i
<
output_num
;
++
i
)
{
auto
shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
i
);
size_t
size
=
1
;
for
(
size_t
j
=
0
;
j
<
shape
.
size
();
j
++
)
{
size
*=
IntToSize
(
shape
[
j
]);
}
output_data_number_
+=
size
;
}
auto
op
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"op"
);
auto
op
=
AnfAlgo
::
GetCNodePrimitive
(
kernel_node
)
->
GetAttr
(
"op"
);
if
(
op
!=
nullptr
)
{
if
(
op
!=
nullptr
)
{
op_type_
=
GetValue
<
std
::
string
>
(
op
);
op_type_
=
GetValue
<
std
::
string
>
(
op
);
...
@@ -54,8 +45,9 @@ bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
...
@@ -54,8 +45,9 @@ bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
const
std
::
vector
<
kernel
::
AddressPtr
>
&
outputs
)
{
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
input_addr
=
reinterpret_cast
<
float
*>
(
inputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
auto
output_addr
=
reinterpret_cast
<
float
*>
(
outputs
[
0
]
->
addr
);
auto
output_data_num
=
outputs
[
0
]
->
size
/
sizeof
(
float
);
return
device
::
cpu
::
MPIAdapter
::
Instance
().
ReduceScatter
(
input_addr
,
output_addr
,
ranks_group_
,
output_data_num
ber_
,
return
device
::
cpu
::
MPIAdapter
::
Instance
().
ReduceScatter
(
input_addr
,
output_addr
,
ranks_group_
,
output_data_num
,
op_type_
);
op_type_
);
}
}
}
// namespace kernel
}
// namespace kernel
...
...
mindspore/ccsrc/kernel/cpu/reduce_scatter_cpu_kernel.h
浏览文件 @
934bdce4
...
@@ -33,7 +33,6 @@ class ReduceScatterCPUKernel : public CPUKernel {
...
@@ -33,7 +33,6 @@ class ReduceScatterCPUKernel : public CPUKernel {
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
const
std
::
vector
<
AddressPtr
>
&
outputs
)
override
;
private:
private:
size_t
output_data_number_
;
std
::
string
op_type_
;
std
::
string
op_type_
;
std
::
vector
<
int
>
ranks_group_
;
std
::
vector
<
int
>
ranks_group_
;
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录