Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
ee49ee71
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ee49ee71
编写于
7月 22, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 22, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3275 add enable_parameter_server flag
Merge pull request !3275 from jinyaohui/master
上级
5100ac5f
1641df4c
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
71 addition
and
6 deletion
+71
-6
mindspore/nn/wrap/grad_reducer.py
mindspore/nn/wrap/grad_reducer.py
+71
-6
未找到文件。
mindspore/nn/wrap/grad_reducer.py
浏览文件 @
ee49ee71
...
@@ -45,8 +45,35 @@ def _init_allreduce_operators(length):
...
@@ -45,8 +45,35 @@ def _init_allreduce_operators(length):
return
op_list
return
op_list
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"Tensor"
)
def
_tensors_allreduce
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
):
"""
Apply allreduce on gradient.
Args:
degree (int): The mean coefficient.
mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
allgather (Primitive): The communication operator for sparse gradients.
allreduce (Primitive): The communication operator for gradients.
allreduce_filter (bool): When it is true, allreduce would apply.
grad (Tensor): The gradient tensor before operation.
Returns:
Tensor, the gradient tensor after operation.
"""
if
allreduce_filter
:
grad
=
allreduce
(
grad
)
if
mean
:
degree
=
F
.
scalar_cast
(
degree
,
F
.
dtype
(
grad
))
cast_op
=
P
.
Cast
()
mul_op
=
P
.
Mul
()
grad
=
mul_op
(
grad
,
cast_op
(
F
.
scalar_to_array
(
1.0
/
degree
),
F
.
dtype
(
grad
)))
return
grad
return
grad
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"Tensor"
,
"Bool"
)
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"Tensor"
,
"Bool"
)
def
_tensors_allreduce
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
,
ps_parameter
):
def
_tensors_allreduce
_ps
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
,
ps_parameter
):
"""
"""
Apply allreduce on gradient.
Apply allreduce on gradient.
...
@@ -76,8 +103,37 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
...
@@ -76,8 +103,37 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
return
grad
return
grad
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"IndexedSlices"
)
def
_tensors_allreduce_with_sparse
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
):
"""
Apply allgather on gradient instead of allreduce for sparse feature.
Allgather is a communication operation used for distributed deep learning.
Args:
degree (int): The mean coefficient.
mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
allgather (Primitive): The communication operator for sparse gradients.
allreduce (Primitive): The communication operator for gradients.
allreduce_filter (bool): When it is true, allgather would apply.
grad (tuple): The indices, gradient tensor and tensor_shape before operation.
Returns:
IndexedSlices, the gradient after operation.
"""
if
allreduce_filter
:
indices
=
allgather
(
grad
.
indices
())
dout
=
allgather
(
grad
.
values
())
if
mean
:
degree
=
F
.
scalar_cast
(
degree
,
F
.
dtype
(
grad
.
values
()))
cast_op
=
P
.
Cast
()
mul_op
=
P
.
Mul
()
dout
=
mul_op
(
dout
,
cast_op
(
F
.
scalar_to_array
(
1.0
/
degree
),
F
.
dtype
(
dout
)))
grad
=
IndexedSlices
(
indices
,
dout
,
grad
.
dense_shape
())
return
grad
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"IndexedSlices"
,
"Bool"
)
@
reduce_opt
.
register
(
"Number"
,
"Bool"
,
"Function"
,
"Function"
,
"Bool"
,
"IndexedSlices"
,
"Bool"
)
def
_tensors_allreduce_with_sparse
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
,
ps_parameter
):
def
_tensors_allreduce_with_sparse
_ps
(
degree
,
mean
,
allgather
,
allreduce
,
allreduce_filter
,
grad
,
ps_parameter
):
"""
"""
Apply allgather on gradient instead of allreduce for sparse feature.
Apply allgather on gradient instead of allreduce for sparse feature.
Allgather is a communication operation used for distributed deep learning.
Allgather is a communication operation used for distributed deep learning.
...
@@ -269,6 +325,7 @@ class DistributedGradReducer(Cell):
...
@@ -269,6 +325,7 @@ class DistributedGradReducer(Cell):
self
.
allgather
=
AllGather
(
GlobalComm
.
WORLD_COMM_GROUP
)
self
.
allgather
=
AllGather
(
GlobalComm
.
WORLD_COMM_GROUP
)
ps_filter
=
lambda
x
:
x
.
is_param_ps
ps_filter
=
lambda
x
:
x
.
is_param_ps
self
.
ps_parameters
=
tuple
(
ps_filter
(
x
)
for
x
in
parameters
)
self
.
ps_parameters
=
tuple
(
ps_filter
(
x
)
for
x
in
parameters
)
self
.
enable_parameter_server
=
any
(
self
.
ps_parameters
)
def
construct
(
self
,
grads
):
def
construct
(
self
,
grads
):
"""
"""
...
@@ -285,10 +342,18 @@ class DistributedGradReducer(Cell):
...
@@ -285,10 +342,18 @@ class DistributedGradReducer(Cell):
datatypes
=
self
.
map_
(
F
.
partial
(
_get_datatype
),
grads
)
datatypes
=
self
.
map_
(
F
.
partial
(
_get_datatype
),
grads
)
grads
=
self
.
map_
(
F
.
partial
(
_cast_datatype
,
mstype
.
float32
),
grads
)
grads
=
self
.
map_
(
F
.
partial
(
_cast_datatype
,
mstype
.
float32
),
grads
)
if
self
.
split_fusion
:
if
self
.
split_fusion
:
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
),
if
self
.
enable_parameter_server
:
self
.
opt_list
,
self
.
allreduce_filter
,
grads
,
self
.
ps_parameters
)
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
),
self
.
opt_list
,
self
.
allreduce_filter
,
grads
,
self
.
ps_parameters
)
else
:
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
),
self
.
opt_list
,
self
.
allreduce_filter
,
grads
)
else
:
else
:
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
,
if
self
.
enable_parameter_server
:
self
.
allreduce
),
self
.
allreduce_filter
,
grads
,
self
.
ps_parameters
)
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
,
self
.
allreduce
),
self
.
allreduce_filter
,
grads
,
self
.
ps_parameters
)
else
:
new_grad
=
self
.
map_
(
F
.
partial
(
reduce_opt
,
self
.
degree
,
self
.
mean
,
self
.
allgather
,
self
.
allreduce
),
self
.
allreduce_filter
,
grads
)
new_grad
=
self
.
map_
(
F
.
partial
(
_cast_datatype
),
datatypes
,
new_grad
)
new_grad
=
self
.
map_
(
F
.
partial
(
_cast_datatype
),
datatypes
,
new_grad
)
return
new_grad
return
new_grad
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录