Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
26a65bee
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
26a65bee
编写于
3月 30, 2020
作者:
L
lichenever
提交者:
高东海
4月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix auto parallel st
上级
d3400cde
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
22 addition
and
41 deletion
+22
-41
tests/st/auto_parallel/onehot_model_parallel.py
tests/st/auto_parallel/onehot_model_parallel.py
+2
-7
tests/st/auto_parallel/run_auto_parallel_loss_expand.sh
tests/st/auto_parallel/run_auto_parallel_loss_expand.sh
+1
-2
tests/st/auto_parallel/run_auto_parallel_resnet50_expand_loss.sh
...t/auto_parallel/run_auto_parallel_resnet50_expand_loss.sh
+1
-1
tests/st/auto_parallel/run_onehot_model_parallel.sh
tests/st/auto_parallel/run_onehot_model_parallel.sh
+1
-1
tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
+14
-26
tests/st/auto_parallel/test_expand_loss.py
tests/st/auto_parallel/test_expand_loss.py
+3
-1
tests/st/auto_parallel/test_model_parallel_onehot.py
tests/st/auto_parallel/test_model_parallel_onehot.py
+0
-3
未找到文件。
tests/st/auto_parallel/onehot_model_parallel.py
浏览文件 @
26a65bee
...
...
@@ -130,9 +130,7 @@ class OneHotFactory:
context
.
reset_auto_parallel_context
()
assert
np
.
allclose
(
out_mindspore_single
,
out_mindspore_parallel
,
0.0001
,
0.0001
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_reid_onehot_forward_int32_128_depth1024_model_parallel
():
fact
=
OneHotFactory
(
batch_size
=
128
,
classes
=
1024
,
...
...
@@ -142,9 +140,7 @@ def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
strategy
=
((
1
,
device_num
),(),()))
fact
.
forward_cmp
()
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_reid_onehot_forward_int32_1024_depth128_model_parallel
():
fact
=
OneHotFactory
(
batch_size
=
1024
,
classes
=
128
,
...
...
@@ -153,4 +149,3 @@ def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
axis
=-
1
,
strategy
=
((
1
,
device_num
),(),()))
fact
.
forward_cmp
()
tests/st/auto_parallel/run_auto_parallel_loss_expand.sh
浏览文件 @
26a65bee
...
...
@@ -18,7 +18,6 @@ BASE_PATH=$(cd "$(dirname $0)"; pwd)
CONFIG_PATH
=
/home/workspace/mindspore_config
export
DEVICE_NUM
=
8
export
RANK_SIZE
=
$DEVICE_NUM
ulimit
-n
65535
source
${
BASE_PATH
}
/env.sh
unset
SLOG_PRINT_TO_STDOUT
export
MINDSPORE_HCCL_CONFIG_PATH
=
$CONFIG_PATH
/hccl/rank_table_
${
DEVICE_NUM
}
p.json
...
...
@@ -27,7 +26,7 @@ process_pid=()
for
((
i
=
0
;
i<
$DEVICE_NUM
;
i++
))
;
do
rm
-rf
${
BASE_PATH
}
/loss_expand
${
i
}
mkdir
${
BASE_PATH
}
/loss_expand
${
i
}
cp
-r
soft_entropy_loss_expand_parallel.py
${
BASE_PATH
}
/loss_expand
${
i
}
/
cp
-r
${
BASE_PATH
}
/
soft_entropy_loss_expand_parallel.py
${
BASE_PATH
}
/loss_expand
${
i
}
/
cd
${
BASE_PATH
}
/loss_expand
${
i
}
export
RANK_ID
=
${
i
}
export
DEVICE_ID
=
${
i
}
...
...
tests/st/auto_parallel/run_auto_parallel_resnet50_expand_loss.sh
浏览文件 @
26a65bee
...
...
@@ -27,7 +27,7 @@ process_pid=()
for
((
i
=
0
;
i<
$DEVICE_NUM
;
i++
))
;
do
rm
-rf
${
BASE_PATH
}
/resnet50_expand_loss
${
i
}
mkdir
${
BASE_PATH
}
/resnet50_expand_loss
${
i
}
cp
-r
resnet50_expand_loss.py
${
BASE_PATH
}
/resnet50_expand_loss
${
i
}
/
cp
-r
${
BASE_PATH
}
/
resnet50_expand_loss.py
${
BASE_PATH
}
/resnet50_expand_loss
${
i
}
/
cd
${
BASE_PATH
}
/resnet50_expand_loss
${
i
}
export
RANK_ID
=
${
i
}
export
DEVICE_ID
=
${
i
}
...
...
tests/st/auto_parallel/run_onehot_model_parallel.sh
浏览文件 @
26a65bee
...
...
@@ -27,7 +27,7 @@ process_pid=()
for
((
i
=
0
;
i<
$DEVICE_NUM
;
i++
))
;
do
rm
-rf
${
BASE_PATH
}
/onehot_model_parallel
${
i
}
mkdir
${
BASE_PATH
}
/onehot_model_parallel
${
i
}
cp
-r
onehot_model_parallel.py
${
BASE_PATH
}
/onehot_model_parallel
${
i
}
/
cp
-r
${
BASE_PATH
}
/
onehot_model_parallel.py
${
BASE_PATH
}
/onehot_model_parallel
${
i
}
/
cd
${
BASE_PATH
}
/onehot_model_parallel
${
i
}
export
RANK_ID
=
${
i
}
export
DEVICE_ID
=
${
i
}
...
...
tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
浏览文件 @
26a65bee
...
...
@@ -118,6 +118,9 @@ class Dataset():
def
get_dataset_size
(
self
):
return
self
.
length
def
get_repeat_count
(
self
):
return
self
.
length
class
ModelCallback
(
Callback
):
def
__init__
(
self
):
super
(
ModelCallback
,
self
).
__init__
()
...
...
@@ -177,7 +180,6 @@ class LossFactory():
dataGen
=
DataGenerator
()
self
.
input_full
,
self
.
input_part
=
dataGen
.
input_data
((
batch_size
,
embed
))
self
.
label_full
,
self
.
label_part
=
dataGen
.
label_data
((
batch_size
,),
embed
)
self
.
expect_out
=
np
.
array
([
0.9205861
,
0.9205861
,
0.9205861
,
0.9201946
,
0.91951686
,
0.919343
])
def
single_matmul_trains
(
self
):
single_callback
=
ModelCallback
()
...
...
@@ -187,7 +189,8 @@ class LossFactory():
epoch_size
=
6
dataset
=
Dataset
(
self
.
input_full
,
self
.
label_full
)
model
.
train
(
epoch_size
,
dataset
,
callbacks
=
single_callback
,
dataset_sink_mode
=
False
)
print
(
"---loss---"
,
single_callback
.
loss_list
)
loss_value
=
np
.
array
(
single_callback
.
loss_list
)
return
loss_value
def
data_parallel_matmul_trains
(
self
):
parallel_callback
=
ModelCallback
()
...
...
@@ -199,7 +202,7 @@ class LossFactory():
dataset
=
Dataset
(
self
.
input_part
,
self
.
label_part
)
model
.
train
(
epoch_size
,
dataset
,
callbacks
=
parallel_callback
,
dataset_sink_mode
=
False
)
loss_value
=
np
.
array
(
parallel_callback
.
loss_list
)
assert
allclose
(
loss_value
,
self
.
expect_out
,
0.00001
,
0.00001
)
return
loss_value
def
model_parallel_matmul_trains
(
self
):
parallel_callback
=
ModelCallback
()
...
...
@@ -224,7 +227,7 @@ class LossFactory():
dataset
=
Dataset
(
self
.
input_part
,
self
.
label_part
)
model
.
train
(
epoch_size
,
dataset
,
callbacks
=
parallel_callback
,
dataset_sink_mode
=
False
)
loss_value
=
np
.
array
(
parallel_callback
.
loss_list
)
assert
allclose
(
loss_value
,
self
.
expect_out
,
0.00001
,
0.00001
)
return
loss_value
def
mix_parallel_matmul_trains
(
self
):
parallel_callback
=
ModelCallback
()
...
...
@@ -249,28 +252,13 @@ class LossFactory():
dataset
=
Dataset
(
self
.
input_part
,
self
.
label_part
)
model
.
train
(
epoch_size
,
dataset
,
callbacks
=
parallel_callback
,
dataset_sink_mode
=
False
)
loss_value
=
np
.
array
(
parallel_callback
.
loss_list
)
assert
allclose
(
loss_value
,
self
.
expect_out
,
0.00001
,
0.00001
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_matmul_loss_data_parallel_trains
():
loss_factory
=
LossFactory
()
context
.
reset_auto_parallel_context
()
loss_factory
.
data_parallel_matmul_trains
()
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_matmul_loss_model_parallel_trains
():
loss_factory
=
LossFactory
()
context
.
reset_auto_parallel_context
()
loss_factory
.
model_parallel_matmul_trains
()
return
loss_value
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_matmul_loss_mix_parallel_trains
():
def
test_all_trains
():
loss_factory
=
LossFactory
()
context
.
reset_auto_parallel_context
()
loss_factory
.
mix_parallel_matmul_trains
()
single_loss
=
loss_factory
.
single_matmul_trains
()
model_parallel_loss
=
loss_factory
.
model_parallel_matmul_trains
()
mix_parallel_loss
=
loss_factory
.
mix_parallel_matmul_trains
()
assert
allclose
(
single_loss
,
model_parallel_loss
)
assert
allclose
(
single_loss
,
mix_parallel_loss
)
tests/st/auto_parallel/test_expand_loss.py
浏览文件 @
26a65bee
...
...
@@ -18,7 +18,9 @@ import pytest
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
env_single
def
test_expand_loss
():
ret
=
os
.
system
(
"sh run_auto_parallel_loss_expand.sh"
)
sh_path
=
os
.
path
.
split
(
os
.
path
.
realpath
(
__file__
))[
0
]
ret
=
os
.
system
(
f
"sh
{
sh_path
}
/run_auto_parallel_loss_expand.sh"
)
assert
(
ret
==
0
)
tests/st/auto_parallel/test_model_parallel_onehot.py
浏览文件 @
26a65bee
...
...
@@ -16,9 +16,6 @@
import
os
import
pytest
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_single
def
test_expand_loss
():
ret
=
os
.
system
(
"sh run_onehot_model_parallel.sh"
)
assert
(
ret
==
0
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录