Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
d4cfe55c
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d4cfe55c
编写于
9月 04, 2020
作者:
Y
yao_yf
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename mirror_mean to gradients_mean
上级
bc4c5afc
变更
81
隐藏空白更改
内联
并排
Showing
81 changed file
with
135 addition
and
134 deletion
+135
-134
mindspore/ccsrc/frontend/parallel/context.cc
mindspore/ccsrc/frontend/parallel/context.cc
+2
-2
mindspore/ccsrc/frontend/parallel/context.h
mindspore/ccsrc/frontend/parallel/context.h
+3
-3
mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
+1
-1
mindspore/ccsrc/frontend/parallel/step_parallel.cc
mindspore/ccsrc/frontend/parallel/step_parallel.cc
+1
-1
mindspore/ccsrc/pipeline/jit/init.cc
mindspore/ccsrc/pipeline/jit/init.cc
+2
-2
mindspore/context.py
mindspore/context.py
+5
-5
mindspore/nn/wrap/cell_wrapper.py
mindspore/nn/wrap/cell_wrapper.py
+2
-2
mindspore/nn/wrap/grad_reducer.py
mindspore/nn/wrap/grad_reducer.py
+1
-1
mindspore/nn/wrap/loss_scale.py
mindspore/nn/wrap/loss_scale.py
+2
-2
mindspore/parallel/_auto_parallel_context.py
mindspore/parallel/_auto_parallel_context.py
+13
-13
mindspore/parallel/_utils.py
mindspore/parallel/_utils.py
+3
-3
model_zoo/official/cv/deeplabv3/train.py
model_zoo/official/cv/deeplabv3/train.py
+1
-1
model_zoo/official/cv/faster_rcnn/train.py
model_zoo/official/cv/faster_rcnn/train.py
+1
-1
model_zoo/official/cv/googlenet/train.py
model_zoo/official/cv/googlenet/train.py
+2
-2
model_zoo/official/cv/inceptionv3/train.py
model_zoo/official/cv/inceptionv3/train.py
+1
-1
model_zoo/official/cv/maskrcnn/train.py
model_zoo/official/cv/maskrcnn/train.py
+1
-1
model_zoo/official/cv/mobilenetv2/src/utils.py
model_zoo/official/cv/mobilenetv2/src/utils.py
+2
-2
model_zoo/official/cv/mobilenetv2_quant/train.py
model_zoo/official/cv/mobilenetv2_quant/train.py
+2
-2
model_zoo/official/cv/mobilenetv3/train.py
model_zoo/official/cv/mobilenetv3/train.py
+1
-1
model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
+2
-2
model_zoo/official/cv/nasnet/train.py
model_zoo/official/cv/nasnet/train.py
+1
-1
model_zoo/official/cv/resnet/train.py
model_zoo/official/cv/resnet/train.py
+2
-2
model_zoo/official/cv/resnet50_quant/train.py
model_zoo/official/cv/resnet50_quant/train.py
+2
-2
model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
+1
-1
model_zoo/official/cv/resnet_thor/src/thor.py
model_zoo/official/cv/resnet_thor/src/thor.py
+3
-3
model_zoo/official/cv/resnet_thor/train.py
model_zoo/official/cv/resnet_thor/train.py
+2
-2
model_zoo/official/cv/resnext50/eval.py
model_zoo/official/cv/resnext50/eval.py
+1
-1
model_zoo/official/cv/resnext50/train.py
model_zoo/official/cv/resnext50/train.py
+1
-1
model_zoo/official/cv/shufflenetv2/train.py
model_zoo/official/cv/shufflenetv2/train.py
+1
-1
model_zoo/official/cv/ssd/src/ssd.py
model_zoo/official/cv/ssd/src/ssd.py
+1
-1
model_zoo/official/cv/ssd/train.py
model_zoo/official/cv/ssd/train.py
+1
-1
model_zoo/official/cv/vgg16/train.py
model_zoo/official/cv/vgg16/train.py
+1
-1
model_zoo/official/cv/warpctc/src/warpctc_for_train.py
model_zoo/official/cv/warpctc/src/warpctc_for_train.py
+2
-2
model_zoo/official/cv/warpctc/train.py
model_zoo/official/cv/warpctc/train.py
+1
-1
model_zoo/official/cv/yolov3_darknet53/eval.py
model_zoo/official/cv/yolov3_darknet53/eval.py
+1
-1
model_zoo/official/cv/yolov3_darknet53/src/yolo.py
model_zoo/official/cv/yolov3_darknet53/src/yolo.py
+1
-1
model_zoo/official/cv/yolov3_darknet53/train.py
model_zoo/official/cv/yolov3_darknet53/train.py
+1
-1
model_zoo/official/cv/yolov3_darknet53_quant/eval.py
model_zoo/official/cv/yolov3_darknet53_quant/eval.py
+1
-1
model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
+1
-1
model_zoo/official/cv/yolov3_darknet53_quant/train.py
model_zoo/official/cv/yolov3_darknet53_quant/train.py
+1
-1
model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
+1
-1
model_zoo/official/cv/yolov3_resnet18/train.py
model_zoo/official/cv/yolov3_resnet18/train.py
+1
-1
model_zoo/official/nlp/bert/run_pretrain.py
model_zoo/official/nlp/bert/run_pretrain.py
+1
-1
model_zoo/official/nlp/bert/src/bert_for_finetune.py
model_zoo/official/nlp/bert/src/bert_for_finetune.py
+2
-2
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+1
-1
model_zoo/official/nlp/bert_thor/run_pretrain.py
model_zoo/official/nlp/bert_thor/run_pretrain.py
+1
-1
model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
...l_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
+1
-1
model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
+1
-1
model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
+2
-2
model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
...fficial/nlp/mass/src/transformer/transformer_for_train.py
+2
-2
model_zoo/official/nlp/mass/train.py
model_zoo/official/nlp/mass/train.py
+1
-1
model_zoo/official/nlp/tinybert/run_general_distill.py
model_zoo/official/nlp/tinybert/run_general_distill.py
+1
-1
model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
+2
-2
model_zoo/official/nlp/transformer/src/transformer_for_train.py
...zoo/official/nlp/transformer/src/transformer_for_train.py
+3
-3
model_zoo/official/nlp/transformer/train.py
model_zoo/official/nlp/transformer/train.py
+1
-1
model_zoo/official/recommend/deepfm/train.py
model_zoo/official/recommend/deepfm/train.py
+2
-2
model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
...zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
+1
-1
model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py
...l/recommend/wide_and_deep/train_and_eval_auto_parallel.py
+2
-2
model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py
...cial/recommend/wide_and_deep/train_and_eval_distribute.py
+1
-1
model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py
...ecommend/wide_and_deep/train_and_eval_parameter_server.py
+1
-1
model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
...l/recommend/wide_and_deep_multitable/src/wide_and_deep.py
+1
-1
model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py
...end/wide_and_deep_multitable/train_and_eval_distribute.py
+1
-1
tests/st/auto_parallel/resnet50_expand_loss.py
tests/st/auto_parallel/resnet50_expand_loss.py
+1
-1
tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
...and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
+1
-1
tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
...o_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
+2
-2
tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
...wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
+1
-1
tests/st/model_zoo_tests/yolov3/src/yolov3.py
tests/st/model_zoo_tests/yolov3/src/yolov3.py
+1
-1
tests/st/nccl/test_nccl_lenet.py
tests/st/nccl/test_nccl_lenet.py
+1
-1
tests/st/networks/models/bert/src/bert_for_pre_training.py
tests/st/networks/models/bert/src/bert_for_pre_training.py
+1
-1
tests/st/networks/models/bert/src/utils.py
tests/st/networks/models/bert/src/utils.py
+1
-1
tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
...st/networks/models/resnet50/src_thor/grad_reducer_thor.py
+1
-1
tests/st/networks/models/resnet50/src_thor/thor.py
tests/st/networks/models/resnet50/src_thor/thor.py
+2
-2
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+2
-2
tests/st/ps/multi_full_ps/test_multi_full_ps.py
tests/st/ps/multi_full_ps/test_multi_full_ps.py
+2
-1
tests/ut/python/communication/test_data_parallel_dense.py
tests/ut/python/communication/test_data_parallel_dense.py
+1
-1
tests/ut/python/communication/test_data_parallel_lenet.py
tests/ut/python/communication/test_data_parallel_lenet.py
+1
-1
tests/ut/python/model/test_mix_precision.py
tests/ut/python/model/test_mix_precision.py
+1
-1
tests/ut/python/parallel/test_optimizer.py
tests/ut/python/parallel/test_optimizer.py
+1
-1
tests/ut/python/parallel/test_set_auto_parallel_context.py
tests/ut/python/parallel/test_set_auto_parallel_context.py
+8
-8
tests/ut/python/parallel/test_two_matmul.py
tests/ut/python/parallel/test_two_matmul.py
+2
-2
tests/ut/python/train/test_amp.py
tests/ut/python/train/test_amp.py
+1
-1
未找到文件。
mindspore/ccsrc/frontend/parallel/context.cc
浏览文件 @
d4cfe55c
...
...
@@ -45,7 +45,7 @@ std::shared_ptr<ParallelContext> ParallelContext::GetInstance() {
ParallelContext
::
ParallelContext
()
{
Reset
();
}
void
ParallelContext
::
Reset
()
{
mirror
_mean_
=
false
;
gradients
_mean_
=
false
;
full_batch_
=
false
;
gradient_fp32_sync_
=
true
;
loss_repeated_mean_
=
true
;
...
...
@@ -74,7 +74,7 @@ void ParallelContext::set_global_rank(int32_t global_rank) {
global_rank_is_set_
=
true
;
}
void
ParallelContext
::
set_
mirror_mean
(
bool
mirror_mean
)
{
mirror_mean_
=
mirror
_mean
;
}
void
ParallelContext
::
set_
gradients_mean
(
bool
gradients_mean
)
{
gradients_mean_
=
gradients
_mean
;
}
void
ParallelContext
::
set_full_batch
(
bool
full_batch
)
{
full_batch_
=
full_batch
;
}
...
...
mindspore/ccsrc/frontend/parallel/context.h
浏览文件 @
d4cfe55c
...
...
@@ -52,8 +52,8 @@ class ParallelContext {
static
std
::
shared_ptr
<
ParallelContext
>
GetInstance
();
void
set_
mirror_mean
(
bool
mirror
_mean
);
bool
mirror_mean
()
const
{
return
mirror
_mean_
;
}
void
set_
gradients_mean
(
bool
gradients
_mean
);
bool
gradients_mean
()
const
{
return
gradients
_mean_
;
}
void
set_full_batch
(
bool
full_batch
);
bool
full_batch
()
const
{
return
full_batch_
;
}
...
...
@@ -107,7 +107,7 @@ class ParallelContext {
private:
ParallelContext
();
static
std
::
shared_ptr
<
ParallelContext
>
inst_context_
;
bool
mirror
_mean_
;
bool
gradients
_mean_
;
bool
full_batch_
;
bool
gradient_fp32_sync_
;
bool
loss_repeated_mean_
;
...
...
mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc
浏览文件 @
d4cfe55c
...
...
@@ -251,7 +251,7 @@ OperatorVector CreateMirrorOps(const std::string &group_name, size_t dev_num) {
MS_LOG
(
EXCEPTION
)
<<
"Invalid dev num: "
<<
dev_num
;
}
OperatorVector
op_for_weight
;
bool
mean_flag
=
ParallelContext
::
GetInstance
()
->
mirror
_mean
();
bool
mean_flag
=
ParallelContext
::
GetInstance
()
->
gradients
_mean
();
OperatorName
operator_name
=
MIRROR_OPERATOR
;
ValuePtr
attr0_value
=
MakeValue
(
group_name
);
...
...
mindspore/ccsrc/frontend/parallel/step_parallel.cc
浏览文件 @
d4cfe55c
...
...
@@ -2488,7 +2488,7 @@ Status ParallelInit() {
}
MS_LOG
(
INFO
)
<<
"The parallel context: dev num: "
<<
device_num
<<
", global rank: "
<<
global_rank
<<
", backend: "
<<
backend
<<
",
mirror_mean: "
<<
ParallelContext
::
GetInstance
()
->
mirror
_mean
()
<<
", backend: "
<<
backend
<<
",
gradients_mean: "
<<
ParallelContext
::
GetInstance
()
->
gradients
_mean
()
<<
", gradient_fp32_sync: "
<<
ParallelContext
::
GetInstance
()
->
gradient_fp32_sync
();
return
SUCCESS
;
}
...
...
mindspore/ccsrc/pipeline/jit/init.cc
浏览文件 @
d4cfe55c
...
...
@@ -113,8 +113,8 @@ PYBIND11_MODULE(_c_expression, m) {
.
def
(
"get_global_rank"
,
&
ParallelContext
::
global_rank
,
"Get global rank."
)
.
def
(
"set_global_rank"
,
&
ParallelContext
::
set_global_rank
,
"Set global rank."
)
.
def
(
"get_global_rank_is_set"
,
&
ParallelContext
::
global_rank_is_set
,
"Get global rank is set."
)
.
def
(
"get_
mirror_mean"
,
&
ParallelContext
::
mirror
_mean
,
"Get mirror mean."
)
.
def
(
"set_
mirror_mean"
,
&
ParallelContext
::
set_mirror
_mean
,
"Set mirror mean."
)
.
def
(
"get_
gradients_mean"
,
&
ParallelContext
::
gradients
_mean
,
"Get mirror mean."
)
.
def
(
"set_
gradients_mean"
,
&
ParallelContext
::
set_gradients
_mean
,
"Set mirror mean."
)
.
def
(
"get_gradient_fp32_sync"
,
&
ParallelContext
::
gradient_fp32_sync
,
"Get cast before mirror."
)
.
def
(
"set_gradient_fp32_sync"
,
&
ParallelContext
::
set_gradient_fp32_sync
,
"Set cast before mirror."
)
.
def
(
"get_loss_repeated_mean"
,
&
ParallelContext
::
loss_repeated_mean
,
"Get loss repeated mean."
)
...
...
mindspore/context.py
浏览文件 @
d4cfe55c
...
...
@@ -323,7 +323,7 @@ def _context():
return
_k_context
@
args_type_check
(
device_num
=
int
,
global_rank
=
int
,
mirror
_mean
=
bool
,
gradient_fp32_sync
=
bool
,
parallel_mode
=
str
,
@
args_type_check
(
device_num
=
int
,
global_rank
=
int
,
gradients
_mean
=
bool
,
gradient_fp32_sync
=
bool
,
parallel_mode
=
str
,
auto_parallel_search_mode
=
str
,
parameter_broadcast
=
bool
,
strategy_ckpt_load_file
=
str
,
strategy_ckpt_save_file
=
str
,
full_batch
=
bool
,
enable_parallel_optimizer
=
bool
)
def
set_auto_parallel_context
(
**
kwargs
):
...
...
@@ -341,8 +341,8 @@ def set_auto_parallel_context(**kwargs):
Args:
device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
mirror
_mean (bool): Whether to perform mean operator after all-reduce of mirror.
"stand_alone" do not support
mirror
_mean. Default: False.
gradients
_mean (bool): Whether to perform mean operator after all-reduce of mirror.
"stand_alone" do not support
gradients
_mean. Default: False.
gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True..
"stand_alone", "data_parallel" and "hybrid_parallel" do not support
gradient_fp32_sync. Default: True.
...
...
@@ -380,7 +380,7 @@ def set_auto_parallel_context(**kwargs):
Examples:
>>> context.set_auto_parallel_context(device_num=8)
>>> context.set_auto_parallel_context(global_rank=0)
>>> context.set_auto_parallel_context(
mirror
_mean=True)
>>> context.set_auto_parallel_context(
gradients
_mean=True)
>>> context.set_auto_parallel_context(gradient_fp32_sync=False)
>>> context.set_auto_parallel_context(parallel_mode="auto_parallel")
>>> context.set_auto_parallel_context(parameter_broadcast=False)
...
...
@@ -412,7 +412,7 @@ def reset_auto_parallel_context():
- device_num: 1.
- global_rank: 0.
-
mirror
_mean: False.
-
gradients
_mean: False.
- gradient_fp32_sync: True.
- parallel_mode: "stand_alone".
- parameter_broadcast: False.
...
...
mindspore/nn/wrap/cell_wrapper.py
浏览文件 @
d4cfe55c
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# ============================================================================
"""Cell_wrapper."""
from
mindspore.parallel._utils
import
(
_get_device_num
,
_get_
mirror
_mean
,
from
mindspore.parallel._utils
import
(
_get_device_num
,
_get_
gradients
_mean
,
_get_parallel_mode
)
from
mindspore.context
import
ParallelMode
from
...common
import
dtype
as
mstype
...
...
@@ -190,7 +190,7 @@ class TrainOneStepCell(Cell):
if
parallel_mode
in
(
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
):
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
mindspore/nn/wrap/grad_reducer.py
浏览文件 @
d4cfe55c
...
...
@@ -279,7 +279,7 @@ class DistributedGradReducer(Cell):
>>> ParallelMode.HYBRID_PARALLEL]:
>>> self.reducer_flag = True
>>> if self.reducer_flag:
>>> mean = context.get_auto_parallel_context("
mirror
_mean")
>>> mean = context.get_auto_parallel_context("
gradients
_mean")
>>> if mean.get_device_num_is_set():
>>> degree = context.get_auto_parallel_context("device_num")
>>> else:
...
...
mindspore/nn/wrap/loss_scale.py
浏览文件 @
d4cfe55c
...
...
@@ -16,7 +16,7 @@
import
mindspore.context
as
context
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.context
import
ParallelMode
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
gradients
_mean
from
..cell
import
Cell
from
...common
import
Tensor
,
RowTensor
from
...common.parameter
import
Parameter
...
...
@@ -231,7 +231,7 @@ class TrainOneStepWithLossScaleCell(Cell):
self
.
grad_reducer
=
F
.
identity
self
.
reducer_flag
=
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
...
...
mindspore/parallel/_auto_parallel_context.py
浏览文件 @
d4cfe55c
...
...
@@ -95,23 +95,23 @@ class _AutoParallelContext:
self
.
check_context_handle
()
return
self
.
_context_handle
.
get_global_rank
()
def
set_
mirror_mean
(
self
,
mirror
_mean
):
def
set_
gradients_mean
(
self
,
gradients
_mean
):
"""
Set
mirror
_mean flag.
Set
gradients
_mean flag.
Note:
If
mirror
_mean is true, it will insert a div operator after parameter gradients allreduce.
If
gradients
_mean is true, it will insert a div operator after parameter gradients allreduce.
Args:
mirror_mean (bool): The mirror
_mean flag.
gradients_mean (bool): The gradients
_mean flag.
"""
self
.
check_context_handle
()
self
.
_context_handle
.
set_
mirror_mean
(
mirror
_mean
)
self
.
_context_handle
.
set_
gradients_mean
(
gradients
_mean
)
def
get_
mirror
_mean
(
self
):
"""Get
mirror
_mean flag."""
def
get_
gradients
_mean
(
self
):
"""Get
gradients
_mean flag."""
self
.
check_context_handle
()
return
self
.
_context_handle
.
get_
mirror
_mean
()
return
self
.
_context_handle
.
get_
gradients
_mean
()
def
set_gradient_fp32_sync
(
self
,
gradient_fp32_sync
):
"""
...
...
@@ -453,7 +453,7 @@ def auto_parallel_context():
_set_auto_parallel_context_func_map
=
{
"device_num"
:
auto_parallel_context
().
set_device_num
,
"global_rank"
:
auto_parallel_context
().
set_global_rank
,
"
mirror_mean"
:
auto_parallel_context
().
set_mirror
_mean
,
"
gradients_mean"
:
auto_parallel_context
().
set_gradients
_mean
,
"gradient_fp32_sync"
:
auto_parallel_context
().
set_gradient_fp32_sync
,
"loss_repeated_mean"
:
auto_parallel_context
().
set_loss_repeated_mean
,
"parallel_mode"
:
auto_parallel_context
().
set_parallel_mode
,
...
...
@@ -468,7 +468,7 @@ _set_auto_parallel_context_func_map = {
_get_auto_parallel_context_func_map
=
{
"device_num"
:
auto_parallel_context
().
get_device_num
,
"global_rank"
:
auto_parallel_context
().
get_global_rank
,
"
mirror_mean"
:
auto_parallel_context
().
get_mirror
_mean
,
"
gradients_mean"
:
auto_parallel_context
().
get_gradients
_mean
,
"gradient_fp32_sync"
:
auto_parallel_context
().
get_gradient_fp32_sync
,
"loss_repeated_mean"
:
auto_parallel_context
().
get_loss_repeated_mean
,
"parallel_mode"
:
auto_parallel_context
().
get_parallel_mode
,
...
...
@@ -480,7 +480,7 @@ _get_auto_parallel_context_func_map = {
"enable_parallel_optimizer"
:
auto_parallel_context
().
get_enable_parallel_optimizer
}
@
args_type_check
(
device_num
=
int
,
global_rank
=
int
,
mirror
_mean
=
bool
,
gradient_fp32_sync
=
bool
,
@
args_type_check
(
device_num
=
int
,
global_rank
=
int
,
gradients
_mean
=
bool
,
gradient_fp32_sync
=
bool
,
loss_repeated_mean
=
bool
,
parallel_mode
=
str
,
auto_parallel_search_mode
=
str
,
parameter_broadcast
=
bool
,
strategy_ckpt_load_file
=
str
,
strategy_ckpt_save_file
=
str
,
full_batch
=
bool
,
enable_parallel_optimizer
=
bool
)
...
...
@@ -495,7 +495,7 @@ def _set_auto_parallel_context(**kwargs):
Args:
device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
mirror
_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False.
gradients
_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False.
loss_repeated_mean (bool): Whether to perform mean operator in backward in the case of repeated
calculations. Default: True.
gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True.
...
...
@@ -562,7 +562,7 @@ def _reset_auto_parallel_context():
- device_num: 1.
- global_rank: 0.
-
mirror
_mean: False.
-
gradients
_mean: False.
- gradient_fp32_sync: True.
- parallel_mode: "stand_alone".
- parameter_broadcast: False.
...
...
mindspore/parallel/_utils.py
浏览文件 @
d4cfe55c
...
...
@@ -88,9 +88,9 @@ def _to_full_tensor(elem, device_num, global_rank, scaling_sens=None):
lst
.
append
(
Tensor
(
scaling_sens
,
mstype
.
float32
))
return
tuple
(
lst
)
def
_get_
mirror
_mean
():
"""Get if using
mirror
_mean."""
return
auto_parallel_context
().
get_
mirror
_mean
()
def
_get_
gradients
_mean
():
"""Get if using
gradients
_mean."""
return
auto_parallel_context
().
get_
gradients
_mean
()
def
_get_device_num
():
...
...
model_zoo/official/cv/deeplabv3/train.py
浏览文件 @
d4cfe55c
...
...
@@ -66,7 +66,7 @@ def model_fine_tune(flags, train_net, fix_weight_layer):
para
.
requires_grad
=
False
if
__name__
==
"__main__"
:
if
args_opt
.
distribute
==
"true"
:
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
)
init
()
args_opt
.
base_size
=
config
.
crop_size
args_opt
.
crop_size
=
config
.
crop_size
...
...
model_zoo/official/cv/faster_rcnn/train.py
浏览文件 @
d4cfe55c
...
...
@@ -54,7 +54,7 @@ if __name__ == '__main__':
rank
=
args_opt
.
rank_id
device_num
=
args_opt
.
device_num
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
parameter_broadcast
=
True
)
gradients
_mean
=
True
,
parameter_broadcast
=
True
)
init
()
else
:
rank
=
0
...
...
model_zoo/official/cv/googlenet/train.py
浏览文件 @
d4cfe55c
...
...
@@ -78,7 +78,7 @@ if __name__ == '__main__':
if
device_num
>
1
:
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
init
()
elif
device_target
==
"GPU"
:
init
()
...
...
@@ -86,7 +86,7 @@ if __name__ == '__main__':
if
device_num
>
1
:
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
else
:
raise
ValueError
(
"Unsupported platform."
)
...
...
model_zoo/official/cv/inceptionv3/train.py
浏览文件 @
d4cfe55c
...
...
@@ -58,7 +58,7 @@ if __name__ == '__main__':
cfg
.
group_size
=
get_group_size
()
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
device_num
=
cfg
.
group_size
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
cfg
.
rank
=
0
cfg
.
group_size
=
1
...
...
model_zoo/official/cv/maskrcnn/train.py
浏览文件 @
d4cfe55c
...
...
@@ -58,7 +58,7 @@ if __name__ == '__main__':
rank
=
args_opt
.
rank_id
device_num
=
args_opt
.
device_num
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
parameter_broadcast
=
True
)
gradients
_mean
=
True
,
parameter_broadcast
=
True
)
init
()
else
:
rank
=
0
...
...
model_zoo/official/cv/mobilenetv2/src/utils.py
浏览文件 @
d4cfe55c
...
...
@@ -39,7 +39,7 @@ def context_device_init(config):
init
(
"nccl"
)
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
elif
config
.
platform
==
"Ascend"
:
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
config
.
platform
,
device_id
=
config
.
device_id
,
...
...
@@ -47,7 +47,7 @@ def context_device_init(config):
if
config
.
run_distribute
:
context
.
set_auto_parallel_context
(
device_num
=
config
.
rank_size
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
140
])
init
()
else
:
...
...
model_zoo/official/cv/mobilenetv2_quant/train.py
浏览文件 @
d4cfe55c
...
...
@@ -57,7 +57,7 @@ elif args_opt.device_target == "GPU":
init
()
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"GPU"
,
save_graphs
=
False
)
...
...
@@ -77,7 +77,7 @@ def train_on_ascend():
context
.
set_auto_parallel_context
(
device_num
=
rank_size
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
init
()
# define network
...
...
model_zoo/official/cv/mobilenetv3/train.py
浏览文件 @
d4cfe55c
...
...
@@ -55,7 +55,7 @@ if args_opt.device_target == "GPU":
init
()
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
else
:
raise
ValueError
(
"Unsupported device_target."
)
...
...
model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
浏览文件 @
d4cfe55c
...
...
@@ -24,7 +24,7 @@ import mindspore.ops.composite as C
import
mindspore.common.dtype
as
mstype
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.train.parallel_utils
import
ParallelMode
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
gradients
_mean
GRADIENT_CLIP_TYPE
=
1
...
...
@@ -921,7 +921,7 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell):
if
parallel_mode
in
(
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
):
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/cv/nasnet/train.py
浏览文件 @
d4cfe55c
...
...
@@ -58,7 +58,7 @@ if __name__ == '__main__':
cfg
.
group_size
=
get_group_size
()
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
device_num
=
cfg
.
group_size
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
cfg
.
rank
=
0
cfg
.
group_size
=
1
...
...
model_zoo/official/cv/resnet/train.py
浏览文件 @
d4cfe55c
...
...
@@ -76,7 +76,7 @@ if __name__ == '__main__':
device_id
=
int
(
os
.
getenv
(
'DEVICE_ID'
))
context
.
set_context
(
device_id
=
device_id
,
enable_auto_mixed_precision
=
True
)
context
.
set_auto_parallel_context
(
device_num
=
args_opt
.
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
if
args_opt
.
net
==
"resnet50"
or
args_opt
.
net
==
"se-resnet50"
:
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
85
,
160
])
else
:
...
...
@@ -86,7 +86,7 @@ if __name__ == '__main__':
else
:
init
()
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
if
args_opt
.
net
==
"resnet50"
:
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
85
,
160
])
ckpt_save_dir
=
config
.
save_checkpoint_path
+
"ckpt_"
+
str
(
get_rank
())
+
"/"
...
...
model_zoo/official/cv/resnet50_quant/train.py
浏览文件 @
d4cfe55c
...
...
@@ -76,11 +76,11 @@ if __name__ == '__main__':
context
.
set_auto_parallel_context
(
device_num
=
rank_size
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
init
()
context
.
set_auto_parallel_context
(
device_num
=
args_opt
.
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
,
160
])
# define network
...
...
model_zoo/official/cv/resnet_thor/src/grad_reducer_thor.py
浏览文件 @
d4cfe55c
...
...
@@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell):
>>> ParallelMode.HYBRID_PARALLEL]:
>>> self.reducer_flag = True
>>> if self.reducer_flag:
>>> mean = context.get_auto_parallel_context("
mirror
_mean")
>>> mean = context.get_auto_parallel_context("
gradients
_mean")
>>> if mean.get_device_num_is_set():
>>> degree = context.get_auto_parallel_context("device_num")
>>> else:
...
...
model_zoo/official/cv/resnet_thor/src/thor.py
浏览文件 @
d4cfe55c
...
...
@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype
from
mindspore._checkparam
import
check_bool
from
mindspore._checkparam
import
Validator
as
validator
from
mindspore.nn.optim.optimizer
import
Optimizer
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
gradients
_mean
from
src.grad_reducer_thor
import
DistributedGradReducerThor
_momentum_opt
=
C
.
MultitypeFuncGraph
(
"momentum_opt"
)
...
...
@@ -85,7 +85,7 @@ class THOR_GPU(Optimizer):
self
.
assign
=
P
.
Assign
()
self
.
mul
=
P
.
Mul
()
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer_thorA
=
DistributedGradReducerThor
(
self
.
parameters
,
0
,
mean
,
degree
)
self
.
grad_reducer_thorG
=
DistributedGradReducerThor
(
self
.
parameters
,
0
,
mean
,
degree
)
...
...
@@ -191,7 +191,7 @@ class THOR(Optimizer):
1.0
/
196
,
1.0
/
196
,
1.0
/
196
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
]
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer_Amax
=
DistributedGradReducerThor
(
self
.
parameters
,
2
,
mean
,
degree
)
self
.
grad_reducer_Gmax
=
DistributedGradReducerThor
(
self
.
parameters
,
5
,
mean
,
degree
)
...
...
model_zoo/official/cv/resnet_thor/train.py
浏览文件 @
d4cfe55c
...
...
@@ -94,7 +94,7 @@ if __name__ == '__main__':
device_id
=
int
(
os
.
getenv
(
'DEVICE_ID'
))
context
.
set_context
(
device_id
=
device_id
,
enable_auto_mixed_precision
=
True
)
context
.
set_auto_parallel_context
(
device_num
=
args_opt
.
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
],
"hccl_world_groupsum1"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum2"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum3"
)
...
...
@@ -105,7 +105,7 @@ if __name__ == '__main__':
else
:
init
()
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
])
ckpt_save_dir
=
config
.
save_checkpoint_path
+
"ckpt_"
+
str
(
get_rank
())
+
"/"
...
...
model_zoo/official/cv/resnext50/eval.py
浏览文件 @
d4cfe55c
...
...
@@ -117,7 +117,7 @@ def test(cloud_args=None):
args
.
group_size
=
get_group_size
()
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
device_num
=
args
.
group_size
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
args
.
rank
=
0
args
.
group_size
=
1
...
...
model_zoo/official/cv/resnext50/train.py
浏览文件 @
d4cfe55c
...
...
@@ -179,7 +179,7 @@ def train(cloud_args=None):
args
.
group_size
=
get_group_size
()
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
device_num
=
args
.
group_size
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
args
.
rank
=
0
args
.
group_size
=
1
...
...
model_zoo/official/cv/shufflenetv2/train.py
浏览文件 @
d4cfe55c
...
...
@@ -60,7 +60,7 @@ if __name__ == '__main__':
cfg
.
group_size
=
get_group_size
()
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
device_num
=
cfg
.
group_size
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
cfg
.
rank
=
0
cfg
.
group_size
=
1
...
...
model_zoo/official/cv/ssd/src/ssd.py
浏览文件 @
d4cfe55c
...
...
@@ -392,7 +392,7 @@ class TrainingWrapper(nn.Cell):
if
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
...
...
model_zoo/official/cv/ssd/train.py
浏览文件 @
d4cfe55c
...
...
@@ -60,7 +60,7 @@ def main():
if
args_opt
.
distribute
:
device_num
=
args_opt
.
device_num
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
device_num
)
init
()
rank
=
args_opt
.
device_id
%
device_num
...
...
model_zoo/official/cv/vgg16/train.py
浏览文件 @
d4cfe55c
...
...
@@ -140,7 +140,7 @@ if __name__ == '__main__':
device_num
=
args
.
group_size
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
parameter_broadcast
=
True
,
mirror
_mean
=
True
)
parameter_broadcast
=
True
,
gradients
_mean
=
True
)
else
:
context
.
set_context
(
device_id
=
args
.
device_id
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
args
.
device_target
)
...
...
model_zoo/official/cv/warpctc/src/warpctc_for_train.py
浏览文件 @
d4cfe55c
...
...
@@ -14,7 +14,7 @@
# ============================================================================
"""Automatic differentiation with grad clip."""
import
numpy
as
np
from
mindspore.parallel._utils
import
(
_get_device_num
,
_get_
mirror
_mean
,
from
mindspore.parallel._utils
import
(
_get_device_num
,
_get_
gradients
_mean
,
_get_parallel_mode
)
from
mindspore.context
import
ParallelMode
from
mindspore.common
import
dtype
as
mstype
...
...
@@ -93,7 +93,7 @@ class TrainOneStepCellWithGradClip(Cell):
if
parallel_mode
in
(
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
):
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/cv/warpctc/train.py
浏览文件 @
d4cfe55c
...
...
@@ -64,7 +64,7 @@ if __name__ == '__main__':
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
else
:
device_num
=
1
rank
=
0
...
...
model_zoo/official/cv/yolov3_darknet53/eval.py
浏览文件 @
d4cfe55c
...
...
@@ -255,7 +255,7 @@ def test():
context
.
reset_auto_parallel_context
()
parallel_mode
=
ParallelMode
.
STAND_ALONE
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
mirror
_mean
=
True
,
device_num
=
1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
gradients
_mean
=
True
,
device_num
=
1
)
args
.
logger
.
info
(
'Creating Network....'
)
network
=
YOLOV3DarkNet53
(
is_training
=
False
)
...
...
model_zoo/official/cv/yolov3_darknet53/src/yolo.py
浏览文件 @
d4cfe55c
...
...
@@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell):
if
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
...
...
model_zoo/official/cv/yolov3_darknet53/train.py
浏览文件 @
d4cfe55c
...
...
@@ -178,7 +178,7 @@ def train():
else
:
parallel_mode
=
ParallelMode
.
STAND_ALONE
degree
=
1
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
mirror
_mean
=
True
,
device_num
=
degree
)
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
gradients
_mean
=
True
,
device_num
=
degree
)
network
=
YOLOV3DarkNet53
(
is_training
=
True
)
# default is kaiming-normal
...
...
model_zoo/official/cv/yolov3_darknet53_quant/eval.py
浏览文件 @
d4cfe55c
...
...
@@ -254,7 +254,7 @@ def test():
context
.
reset_auto_parallel_context
()
parallel_mode
=
ParallelMode
.
STAND_ALONE
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
mirror
_mean
=
True
,
device_num
=
1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
gradients
_mean
=
True
,
device_num
=
1
)
args
.
logger
.
info
(
'Creating Network....'
)
network
=
YOLOV3DarkNet53
(
is_training
=
False
)
...
...
model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
浏览文件 @
d4cfe55c
...
...
@@ -421,7 +421,7 @@ class TrainingWrapper(nn.Cell):
if
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
...
...
model_zoo/official/cv/yolov3_darknet53_quant/train.py
浏览文件 @
d4cfe55c
...
...
@@ -162,7 +162,7 @@ def train():
else
:
parallel_mode
=
ParallelMode
.
STAND_ALONE
degree
=
1
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
mirror
_mean
=
True
,
device_num
=
degree
)
context
.
set_auto_parallel_context
(
parallel_mode
=
parallel_mode
,
gradients
_mean
=
True
,
device_num
=
degree
)
network
=
YOLOV3DarkNet53
(
is_training
=
True
)
# default is kaiming-normal
...
...
model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
浏览文件 @
d4cfe55c
...
...
@@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell):
if
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
...
...
model_zoo/official/cv/yolov3_resnet18/train.py
浏览文件 @
d4cfe55c
...
...
@@ -92,7 +92,7 @@ def main():
if
args_opt
.
distribute
:
device_num
=
args_opt
.
device_num
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
device_num
)
init
()
rank
=
args_opt
.
device_id
%
device_num
...
...
model_zoo/official/nlp/bert/run_pretrain.py
浏览文件 @
d4cfe55c
...
...
@@ -85,7 +85,7 @@ def run_pretrain():
ckpt_save_dir
=
args_opt
.
save_checkpoint_path
+
'ckpt_'
+
str
(
rank
)
+
'/'
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
device_num
)
from
mindspore.parallel._auto_parallel_context
import
auto_parallel_context
if
bert_net_cfg
.
num_hidden_layers
==
12
:
...
...
model_zoo/official/nlp/bert/src/bert_for_finetune.py
浏览文件 @
d4cfe55c
...
...
@@ -66,7 +66,7 @@ class BertFinetuneCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
@@ -167,7 +167,7 @@ class BertSquadCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
浏览文件 @
d4cfe55c
...
...
@@ -283,7 +283,7 @@ class BertTrainOneStepCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/nlp/bert_thor/run_pretrain.py
浏览文件 @
d4cfe55c
...
...
@@ -87,7 +87,7 @@ def run_pretrain():
ckpt_save_dir
=
args_opt
.
save_checkpoint_path
+
'ckpt_'
+
str
(
rank
)
+
'/'
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
device_num
)
from
mindspore.parallel._auto_parallel_context
import
auto_parallel_context
if
bert_net_cfg
.
num_hidden_layers
==
12
:
...
...
model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
浏览文件 @
d4cfe55c
...
...
@@ -301,7 +301,7 @@ class BertTrainOneStepCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/nlp/bert_thor/src/grad_reducer_thor.py
浏览文件 @
d4cfe55c
...
...
@@ -129,7 +129,7 @@ class DistributedGradReducerThor(Cell):
>>> ParallelMode.HYBRID_PARALLEL]:
>>> self.reducer_flag = True
>>> if self.reducer_flag:
>>> mean = context.get_auto_parallel_context("
mirror
_mean")
>>> mean = context.get_auto_parallel_context("
gradients
_mean")
>>> if mean.get_device_num_is_set():
>>> degree = context.get_auto_parallel_context("device_num")
>>> else:
...
...
model_zoo/official/nlp/bert_thor/src/thor_for_bert_arg.py
浏览文件 @
d4cfe55c
...
...
@@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn.optim.optimizer
import
Optimizer
from
mindspore.ops
import
functional
as
F
,
composite
as
C
,
operations
as
P
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
gradients
_mean
from
.grad_reducer_thor
import
DistributedGradReducerThor
momentum_opt
=
C
.
MultitypeFuncGraph
(
"momentum_opt"
)
...
...
@@ -83,7 +83,7 @@ class THOR(Optimizer):
self
.
damping
=
damping
self
.
one
=
Tensor
(
1
,
mstype
.
int32
)
self
.
cov_step
=
Parameter
(
initializer
(
0
,
[
1
],
mstype
.
int32
),
name
=
"cov_step"
,
requires_grad
=
False
)
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer_g
=
DistributedGradReducerThor
(
self
.
parameters
,
3
,
mean
,
degree
)
...
...
model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
浏览文件 @
d4cfe55c
...
...
@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter
from
mindspore.common
import
dtype
as
mstype
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.context
import
ParallelMode
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
gradients
_mean
from
.transformer
import
Transformer
from
.grad_clip
import
GRADIENT_CLIP_TYPE
,
GRADIENT_CLIP_VALUE
,
ClipGradients
...
...
@@ -251,7 +251,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
model_zoo/official/nlp/mass/train.py
浏览文件 @
d4cfe55c
...
...
@@ -234,7 +234,7 @@ def _setup_parallel_env(platform):
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
device_num
=
MultiAscend
.
get_group_size
(),
parameter_broadcast
=
True
,
mirror
_mean
=
True
gradients
_mean
=
True
)
...
...
model_zoo/official/nlp/tinybert/run_general_distill.py
浏览文件 @
d4cfe55c
...
...
@@ -81,7 +81,7 @@ def run_general_distill():
rank
=
D
.
get_rank
()
save_ckpt_dir
=
save_ckpt_dir
+
'_ckpt_'
+
str
(
rank
)
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
device_num
)
else
:
rank
=
0
...
...
model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
浏览文件 @
d4cfe55c
...
...
@@ -318,7 +318,7 @@ class BertTrainCell(nn.Cell):
self
.
grad_reducer
=
F
.
identity
self
.
degree
=
1
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
self
.
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
self
.
degree
)
self
.
cast
=
P
.
Cast
()
...
...
@@ -568,7 +568,7 @@ class BertEvaluationCell(nn.Cell):
self
.
grad_reducer
=
F
.
identity
self
.
degree
=
1
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
self
.
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
self
.
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
model_zoo/official/nlp/transformer/src/transformer_for_train.py
浏览文件 @
d4cfe55c
...
...
@@ -23,7 +23,7 @@ from mindspore.common.parameter import Parameter, ParameterTuple
from
mindspore.common
import
dtype
as
mstype
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.context
import
ParallelMode
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
gradients
_mean
from
mindspore.communication.management
import
get_group_size
from
mindspore
import
context
from
.transformer_model
import
TransformerModel
...
...
@@ -168,7 +168,7 @@ class TransformerTrainOneStepCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
@@ -256,7 +256,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
model_zoo/official/nlp/transformer/train.py
浏览文件 @
d4cfe55c
...
...
@@ -118,7 +118,7 @@ def run_transformer_train():
if
args
.
distribute
==
"true"
:
device_num
=
args
.
device_num
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
parameter_broadcast
=
True
,
device_num
=
device_num
)
D
.
init
()
rank_id
=
args
.
device_id
%
device_num
...
...
model_zoo/official/recommend/deepfm/train.py
浏览文件 @
d4cfe55c
...
...
@@ -56,7 +56,7 @@ if __name__ == '__main__':
device_id
=
int
(
os
.
getenv
(
'DEVICE_ID'
))
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
args_opt
.
device_target
,
device_id
=
device_id
)
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
)
init
()
rank_id
=
int
(
os
.
environ
.
get
(
'RANK_ID'
))
elif
args_opt
.
device_target
==
"GPU"
:
...
...
@@ -65,7 +65,7 @@ if __name__ == '__main__':
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
device_num
=
get_group_size
(),
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
gradients
_mean
=
True
)
rank_id
=
get_rank
()
else
:
print
(
"Unsupported device_target "
,
args_opt
.
device_target
)
...
...
model_zoo/official/recommend/wide_and_deep/src/wide_and_deep.py
浏览文件 @
d4cfe55c
...
...
@@ -367,7 +367,7 @@ class TrainStepWrap(nn.Cell):
self
.
reducer_flag
=
parallel_mode
in
(
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
)
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
self
.
grad_reducer_w
=
DistributedGradReducer
(
self
.
optimizer_w
.
parameters
,
mean
,
degree
)
self
.
grad_reducer_d
=
DistributedGradReducer
(
self
.
optimizer_d
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py
浏览文件 @
d4cfe55c
...
...
@@ -147,8 +147,8 @@ if __name__ == "__main__":
init
()
if
wide_deep_config
.
host_device_mix
==
1
:
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
SEMI_AUTO_PARALLEL
,
mirror
_mean
=
True
)
parallel_mode
=
ParallelMode
.
SEMI_AUTO_PARALLEL
,
gradients
_mean
=
True
)
else
:
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
AUTO_PARALLEL
,
mirror
_mean
=
True
)
parallel_mode
=
ParallelMode
.
AUTO_PARALLEL
,
gradients
_mean
=
True
)
train_and_eval
(
wide_deep_config
)
model_zoo/official/recommend/wide_and_deep/train_and_eval_distribute.py
浏览文件 @
d4cfe55c
...
...
@@ -119,7 +119,7 @@ if __name__ == "__main__":
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
wide_deep_config
.
device_target
,
save_graphs
=
True
)
init
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
get_group_size
())
train_and_eval
(
wide_deep_config
)
model_zoo/official/recommend/wide_and_deep/train_and_eval_parameter_server.py
浏览文件 @
d4cfe55c
...
...
@@ -119,7 +119,7 @@ if __name__ == "__main__":
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
wide_deep_config
.
device_target
)
init
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
get_group_size
())
train_and_eval
(
wide_deep_config
)
model_zoo/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py
浏览文件 @
d4cfe55c
...
...
@@ -554,7 +554,7 @@ class TrainStepWrap(nn.Cell):
ParallelMode
.
HYBRID_PARALLEL
):
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
self
.
grad_reducer_w
=
DistributedGradReducer
(
self
.
optimizer_w
.
parameters
,
mean
,
degree
)
...
...
model_zoo/official/recommend/wide_and_deep_multitable/train_and_eval_distribute.py
浏览文件 @
d4cfe55c
...
...
@@ -113,6 +113,6 @@ if __name__ == "__main__":
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Davinci"
,
save_graphs
=
True
)
init
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
get_group_size
())
train_and_eval
(
wide_and_deep_config
)
tests/st/auto_parallel/resnet50_expand_loss.py
浏览文件 @
d4cfe55c
...
...
@@ -34,7 +34,7 @@ from mindspore.context import ParallelMode
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
)
context
.
set_context
(
device_id
=
int
(
os
.
getenv
(
'DEVICE_ID'
)))
init
()
context
.
set_auto_parallel_context
(
mirror
_mean
=
True
,
parallel_mode
=
ParallelMode
.
AUTO_PARALLEL
)
context
.
set_auto_parallel_context
(
gradients
_mean
=
True
,
parallel_mode
=
ParallelMode
.
AUTO_PARALLEL
)
np
.
random
.
seed
(
10
)
...
...
tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/train_and_test_multinpu_ci.py
浏览文件 @
d4cfe55c
...
...
@@ -31,7 +31,7 @@ from src.config import WideDeepConfig
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
save_graphs
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
SEMI_AUTO_PARALLEL
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
SEMI_AUTO_PARALLEL
,
gradients
_mean
=
True
)
init
()
...
...
tests/st/model_zoo_tests/wide_and_deep/python_file_for_ci/wide_and_deep.py
浏览文件 @
d4cfe55c
...
...
@@ -24,7 +24,7 @@ from mindspore.nn.optim import Adam, FTRL
# from mindspore.nn.metrics import Metric
from
mindspore.common.initializer
import
Uniform
,
initializer
# from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
,
_get_
gradients
_mean
from
mindspore.context
import
ParallelMode
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.communication.management
import
get_group_size
...
...
@@ -299,7 +299,7 @@ class TrainStepWrap(nn.Cell):
self
.
reducer_flag
=
parallel_mode
in
(
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
)
if
self
.
reducer_flag
:
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer_w
=
DistributedGradReducer
(
self
.
optimizer_w
.
parameters
,
mean
,
degree
)
self
.
grad_reducer_d
=
DistributedGradReducer
(
self
.
optimizer_d
.
parameters
,
mean
,
degree
)
...
...
tests/st/model_zoo_tests/wide_and_deep/train_and_test_multinpu_ci_data_parallel.py
浏览文件 @
d4cfe55c
...
...
@@ -30,7 +30,7 @@ from src.config import WideDeepConfig
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
save_graphs
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
)
init
()
...
...
tests/st/model_zoo_tests/yolov3/src/yolov3.py
浏览文件 @
d4cfe55c
...
...
@@ -656,7 +656,7 @@ class TrainingWrapper(nn.Cell):
if
self
.
parallel_mode
in
[
ParallelMode
.
DATA_PARALLEL
,
ParallelMode
.
HYBRID_PARALLEL
]:
self
.
reducer_flag
=
True
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
if
auto_parallel_context
().
get_device_num_is_set
():
degree
=
context
.
get_auto_parallel_context
(
"device_num"
)
else
:
...
...
tests/st/nccl/test_nccl_lenet.py
浏览文件 @
d4cfe55c
...
...
@@ -78,7 +78,7 @@ def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32):
def
test_lenet_nccl
():
context
.
set_auto_parallel_context
(
parallel_mode
=
"data_parallel"
,
mirror
_mean
=
True
,
device_num
=
get_group_size
())
context
.
set_auto_parallel_context
(
parallel_mode
=
"data_parallel"
,
gradients
_mean
=
True
,
device_num
=
get_group_size
())
net
=
LeNet
()
net
.
set_train
()
...
...
tests/st/networks/models/bert/src/bert_for_pre_training.py
浏览文件 @
d4cfe55c
...
...
@@ -279,7 +279,7 @@ class BertTrainOneStepCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
...
...
tests/st/networks/models/bert/src/utils.py
浏览文件 @
d4cfe55c
...
...
@@ -61,7 +61,7 @@ class BertFinetuneCell(nn.Cell):
self
.
reducer_flag
=
True
self
.
grad_reducer
=
None
if
self
.
reducer_flag
:
mean
=
context
.
get_auto_parallel_context
(
"
mirror
_mean"
)
mean
=
context
.
get_auto_parallel_context
(
"
gradients
_mean"
)
degree
=
get_group_size
()
self
.
grad_reducer
=
DistributedGradReducer
(
optimizer
.
parameters
,
mean
,
degree
)
self
.
is_distributed
=
(
self
.
parallel_mode
!=
ParallelMode
.
STAND_ALONE
)
...
...
tests/st/networks/models/resnet50/src_thor/grad_reducer_thor.py
浏览文件 @
d4cfe55c
...
...
@@ -130,7 +130,7 @@ class DistributedGradReducerThor(Cell):
>>> ParallelMode.HYBRID_PARALLEL]:
>>> self.reducer_flag = True
>>> if self.reducer_flag:
>>> mean = context.get_auto_parallel_context("
mirror
_mean")
>>> mean = context.get_auto_parallel_context("
gradients
_mean")
>>> if mean.get_device_num_is_set():
>>> degree = context.get_auto_parallel_context("device_num")
>>> else:
...
...
tests/st/networks/models/resnet50/src_thor/thor.py
浏览文件 @
d4cfe55c
...
...
@@ -20,7 +20,7 @@ from mindspore.common.parameter import ParameterTuple
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn.optim.optimizer
import
Optimizer
from
mindspore.ops
import
functional
as
F
,
composite
as
C
,
operations
as
P
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
mirror
_mean
from
mindspore.parallel._utils
import
_get_device_num
,
_get_
gradients
_mean
from
.grad_reducer_thor
import
DistributedGradReducerThor
...
...
@@ -87,7 +87,7 @@ class THOR(Optimizer):
1.0
/
196
,
1.0
/
196
,
1.0
/
196
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
/
49
,
1.0
]
mean
=
_get_
mirror
_mean
()
mean
=
_get_
gradients
_mean
()
degree
=
_get_device_num
()
self
.
grad_reducer_Amax
=
DistributedGradReducerThor
(
self
.
parameters
,
2
,
mean
,
degree
)
self
.
grad_reducer_Gmax
=
DistributedGradReducerThor
(
self
.
parameters
,
5
,
mean
,
degree
)
...
...
tests/st/networks/models/resnet50/test_resnet50_imagenet.py
浏览文件 @
d4cfe55c
...
...
@@ -137,7 +137,7 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl):
os
.
environ
[
'RANK_SIZE'
]
=
str
(
device_num
)
if
enable_hccl
:
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
parameter_broadcast
=
True
)
gradients
_mean
=
True
,
parameter_broadcast
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
,
160
])
init
()
...
...
@@ -240,7 +240,7 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
os
.
environ
[
'RANK_SIZE'
]
=
str
(
device_num
)
if
enable_hccl
:
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
parameter_broadcast
=
True
)
gradients
_mean
=
True
,
parameter_broadcast
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
],
"hccl_world_groupsum1"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum2"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum3"
)
...
...
tests/st/ps/multi_full_ps/test_multi_full_ps.py
浏览文件 @
d4cfe55c
...
...
@@ -97,7 +97,8 @@ if __name__ == "__main__":
criterion
=
nn
.
SoftmaxCrossEntropyWithLogits
(
sparse
=
True
,
reduction
=
"mean"
)
net_opt
=
nn
.
Momentum
(
network
.
trainable_params
(),
0.01
,
0.9
)
if
device_target
==
"GPU"
:
context
.
set_auto_parallel_context
(
parallel_mode
=
"data_parallel"
,
mirror_mean
=
True
,
device_num
=
get_group_size
())
context
.
set_auto_parallel_context
(
parallel_mode
=
"data_parallel"
,
gradients_mean
=
True
,
device_num
=
get_group_size
())
net_with_criterion
=
WithLossCell
(
network
,
criterion
)
train_network
=
TrainOneStepCell
(
net_with_criterion
,
net_opt
)
train_network
.
set_train
()
...
...
tests/ut/python/communication/test_data_parallel_dense.py
浏览文件 @
d4cfe55c
...
...
@@ -58,7 +58,7 @@ def test_data_parallel_dense():
"""test_data_parallel_dense"""
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
device_num
=
8
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
8
)
inp
=
Tensor
(
np
.
ones
([
32
,
128
]).
astype
(
np
.
float32
)
*
0.01
)
label
=
Tensor
(
np
.
zeros
([
32
,
768
]).
astype
(
np
.
float32
))
net
=
DenseMMNet
()
...
...
tests/ut/python/communication/test_data_parallel_lenet.py
浏览文件 @
d4cfe55c
...
...
@@ -80,7 +80,7 @@ def test_lenet5_train_step_training_pynative():
context
.
set_context
(
mode
=
context
.
PYNATIVE_MODE
)
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
device_num
=
8
,
mirror
_mean
=
True
)
device_num
=
8
,
gradients
_mean
=
True
)
predict
=
Tensor
(
np
.
ones
([
1
,
1
,
32
,
32
]).
astype
(
np
.
float32
)
*
0.01
)
label
=
Tensor
(
np
.
zeros
([
1
,
10
]).
astype
(
np
.
float32
))
DatasetLenet
(
predict
,
label
,
2
)
...
...
tests/ut/python/model/test_mix_precision.py
浏览文件 @
d4cfe55c
...
...
@@ -97,7 +97,7 @@ def test_on_momentum():
def
test_data_parallel_with_cast
():
"""test_data_parallel_with_cast"""
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror
_mean
=
True
,
device_num
=
8
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
8
)
predict
=
Tensor
(
np
.
ones
([
1
,
1
,
32
,
32
]).
astype
(
np
.
float32
)
*
0.01
)
label
=
Tensor
(
np
.
zeros
([
1
,
10
]).
astype
(
np
.
float32
))
net
=
LeNet5
()
...
...
tests/ut/python/parallel/test_optimizer.py
浏览文件 @
d4cfe55c
...
...
@@ -46,7 +46,7 @@ class Net(nn.Cell):
def
test_dense_gen_graph
():
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
reset_auto_parallel_context
()
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
HYBRID_PARALLEL
,
mirror
_mean
=
True
,
device_num
=
8
)
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
HYBRID_PARALLEL
,
gradients
_mean
=
True
,
device_num
=
8
)
init
()
network
=
Net
(
512
,
128
)
...
...
tests/ut/python/parallel/test_set_auto_parallel_context.py
浏览文件 @
d4cfe55c
...
...
@@ -20,17 +20,17 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context
def
test_set_auto_parallel_context
():
context
.
set_auto_parallel_context
(
device_num
=
4
,
global_rank
=
3
,
mirror
_mean
=
True
,
gradient_fp32_sync
=
False
,
context
.
set_auto_parallel_context
(
device_num
=
4
,
global_rank
=
3
,
gradients
_mean
=
True
,
gradient_fp32_sync
=
False
,
parallel_mode
=
"auto_parallel"
,
parameter_broadcast
=
False
)
device_num
=
context
.
get_auto_parallel_context
(
"device_num"
)
global_rank
=
context
.
get_auto_parallel_context
(
"global_rank"
)
mirror_mean
=
context
.
get_auto_parallel_context
(
"mirror
_mean"
)
gradients_mean
=
context
.
get_auto_parallel_context
(
"gradients
_mean"
)
gradient_fp32_sync
=
context
.
get_auto_parallel_context
(
"gradient_fp32_sync"
)
parallel_mode
=
context
.
get_auto_parallel_context
(
"parallel_mode"
)
parameter_broadcast
=
context
.
get_auto_parallel_context
(
"parameter_broadcast"
)
assert
device_num
==
4
assert
global_rank
==
3
assert
mirror
_mean
assert
gradients
_mean
assert
not
gradient_fp32_sync
assert
parallel_mode
==
"auto_parallel"
assert
not
parameter_broadcast
...
...
@@ -45,9 +45,9 @@ def test_set_auto_parallel_context():
global_rank
=
auto_parallel_context
().
get_global_rank
()
assert
global_rank
==
4
auto_parallel_context
().
set_
mirror
_mean
(
True
)
mirror_mean
=
auto_parallel_context
().
get_mirror
_mean
()
assert
mirror
_mean
auto_parallel_context
().
set_
gradients
_mean
(
True
)
gradients_mean
=
auto_parallel_context
().
get_gradients
_mean
()
assert
gradients
_mean
auto_parallel_context
().
set_gradient_fp32_sync
(
False
)
gradient_fp32_sync
=
auto_parallel_context
().
get_gradient_fp32_sync
()
...
...
@@ -86,7 +86,7 @@ def test_reset_auto_parallel_context():
context
.
reset_auto_parallel_context
()
device_num
=
context
.
get_auto_parallel_context
(
"device_num"
)
global_rank
=
context
.
get_auto_parallel_context
(
"global_rank"
)
mirror_mean
=
context
.
get_auto_parallel_context
(
"mirror
_mean"
)
gradients_mean
=
context
.
get_auto_parallel_context
(
"gradients
_mean"
)
gradient_fp32_sync
=
context
.
get_auto_parallel_context
(
"gradient_fp32_sync"
)
parallel_mode
=
context
.
get_auto_parallel_context
(
"parallel_mode"
)
parameter_broadcast
=
context
.
get_auto_parallel_context
(
"parameter_broadcast"
)
...
...
@@ -94,7 +94,7 @@ def test_reset_auto_parallel_context():
parameter_broadcast_is_set
=
auto_parallel_context
().
get_parameter_broadcast_is_set
()
assert
device_num
==
1
assert
global_rank
==
0
assert
not
mirror
_mean
assert
not
gradients
_mean
assert
gradient_fp32_sync
assert
parallel_mode
==
"stand_alone"
assert
not
parameter_broadcast
...
...
tests/ut/python/parallel/test_two_matmul.py
浏览文件 @
d4cfe55c
...
...
@@ -65,7 +65,7 @@ def test_two_matmul():
out
=
self
.
matmul2
(
out
,
b
)
return
out
context
.
set_auto_parallel_context
(
device_num
=
8
,
global_rank
=
0
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
device_num
=
8
,
global_rank
=
0
,
gradients
_mean
=
True
)
strategy1
=
((
4
,
2
),
(
2
,
1
))
strategy2
=
((
2
,
4
),
(
4
,
1
))
net
=
GradWrap
(
NetWithLoss
(
Net
(
strategy1
,
strategy2
)))
...
...
@@ -90,7 +90,7 @@ def test_two_matmul_repeated_calculation1():
out
=
self
.
matmul2
(
out
,
b
)
return
out
context
.
set_auto_parallel_context
(
device_num
=
64
,
global_rank
=
5
,
mirror
_mean
=
True
)
context
.
set_auto_parallel_context
(
device_num
=
64
,
global_rank
=
5
,
gradients
_mean
=
True
)
strategy1
=
((
2
,
4
),
(
4
,
8
))
strategy2
=
((
1
,
1
),
(
1
,
1
))
net
=
GradWrap
(
NetWithLoss
(
Net
(
strategy1
,
strategy2
)))
...
...
tests/ut/python/train/test_amp.py
浏览文件 @
d4cfe55c
...
...
@@ -148,7 +148,7 @@ def test_compile_model_train_O2_parallel():
dataset_shapes
=
((
16
,
16
),
(
16
,
16
))
context
.
set_auto_parallel_context
(
global_rank
=
0
,
device_num
=
8
,
mirror
_mean
=
True
,
parameter_broadcast
=
True
,
gradients
_mean
=
True
,
parameter_broadcast
=
True
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
)
dataset
=
MindDataSet
(
dataset_types
,
dataset_shapes
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录