Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
d402b944
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d402b944
编写于
5月 22, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 22, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1370 delete parallel end-to-end test cases
Merge pull request !1370 from yihuaijie/master
上级
f967700e
1e6ee838
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
0 addition
and
3614 deletion
+0
-3614
tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
...arallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
+0
-178
tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh
...llel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
...lel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
+0
-356
tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh
.../parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh
tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh
+0
-36
tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
.../parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
+0
-120
tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh
...rallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
...n/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
+0
-154
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
...n/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
+0
-175
tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh
.../python/parallel/parallel_end_to_end/hcom/allgather_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh
.../python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
...l_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
+0
-206
tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh
...arallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/log/README.MD
tests/ut/python/parallel/parallel_end_to_end/log/README.MD
+0
-1
tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
...rallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
+0
-195
tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh
...hon/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
...el/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
+0
-329
tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh
...parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
...parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
+0
-213
tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh
...ython/parallel/parallel_end_to_end/max/max_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh
...llel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
..._end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
+0
-200
tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
...el/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
+0
-147
tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh
...parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
...llel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
+0
-206
tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh
...n/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
...lel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
+0
-252
tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh
.../parallel_end_to_end/reducemean/reducemean_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
.../parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
+0
-206
tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh
...rallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh
+0
-27
tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
...allel_end_to_end/transpose/_test_transpose_parallel_4p.py
+0
-235
tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh
...el/parallel_end_to_end/transpose/transpose_parallel_4p.sh
+0
-27
未找到文件。
tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
pytest
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
AddRelu
(
Cell
):
def
__init__
(
self
,
strategy0
=
None
,
strategy1
=
None
):
super
(
AddRelu
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
relu
=
P
.
ReLU
(
strategy
=
strategy1
)
def
construct
(
self
,
x
,
z
):
out
=
self
.
add
(
x
,
z
)
return
self
.
relu
(
out
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
AddReluFactory
:
def
__init__
(
self
,
input_shape
,
strategy0
,
strategy1
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
1.0
self
.
output_grad_np
=
np
.
reshape
((
np
.
arange
(
0
,
size
)
%
(
number_range
-
10
)
-
number_range
/
2
)
*
0.1
,
input_shape
).
astype
(
np
.
float32
)
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
need_dev_num
=
1
need_dev_num_
=
1
for
s
in
strategy0
[
1
]:
need_dev_num
=
need_dev_num
*
s
for
s
in
strategy1
[
1
]:
need_dev_num_
=
need_dev_num_
*
s
self
.
x_id
=
device_id
%
need_dev_num
self
.
y_id
=
device_id
%
need_dev_num
self
.
out_id
=
device_id
%
need_dev_num_
def
forward_mindspore_impl
(
self
):
net
=
AddRelu
()
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
net
=
AddRelu
(
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
output_grad
=
Tensor
(
self
.
output_grad_np
)
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
net
=
AddRelu
()
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
output_grads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
strategy1
[
1
])
output_grad
=
Tensor
(
output_grads
[
self
.
out_id
])
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
net
=
AddRelu
(
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad
])
return
input_grad
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
strategy1
[
1
])
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
_
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
_
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
assert
np
.
allclose
(
input_grad_mindspore1
,
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
@
pytest
.
mark
.
reid_forward
def
test_reid_add_relu_input_256_64
():
stra0
=
(
0
,
(
2
,
2
),
())
stra1
=
(
0
,
(
2
,
2
))
fact
=
AddReluFactory
(
input_shape
=
(
256
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
forward_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_grad_add_relu_input_256_64
():
stra0
=
(
0
,
(
2
,
2
),
())
stra1
=
(
0
,
(
2
,
2
))
fact
=
AddReluFactory
(
input_shape
=
(
256
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_add_relu_parallel_4p.py>../../log/test_add_relu_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
from
numpy
import
allclose
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore._checkparam
import
check_bool
,
twice
from
mindspore.common.initializer
import
initializer
from
mindspore.common.parameter
import
Parameter
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
_Conv
(
Cell
):
r
"""Applies a N-D convolution over an input signal composed of several input
planes.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
pad_mode
,
padding
,
dilation
,
group
,
has_bias
,
weight_init
,
bias_init
):
super
(
_Conv
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
kernel_size
=
kernel_size
self
.
stride
=
stride
self
.
pad_mode
=
pad_mode
self
.
padding
=
padding
self
.
dilation
=
dilation
self
.
group
=
group
self
.
has_bias
=
has_bias
if
not
(
isinstance
(
in_channels
,
int
)
and
in_channels
>
0
):
raise
ValueError
(
'Attr
\'
in_channels
\'
of
\'
Conv2D
\'
Op passed '
+
str
(
in_channels
)
+
', should be a int and greater than 0.'
)
if
(
not
isinstance
(
kernel_size
,
tuple
))
or
len
(
kernel_size
)
!=
2
or
\
(
not
isinstance
(
kernel_size
[
0
],
int
))
or
(
not
isinstance
(
kernel_size
[
1
],
int
))
or
\
kernel_size
[
0
]
<
1
or
kernel_size
[
1
]
<
1
:
raise
ValueError
(
'Attr
\'
kernel_size
\'
of
\'
Conv2D
\'
Op passed '
+
str
(
self
.
kernel_size
)
+
', should be a int or tuple and equal to or greater than 1.'
)
if
in_channels
%
group
!=
0
:
raise
ValueError
(
'Attr
\'
in_channels
\'
of
\'
Conv2D
\'
Op must be divisible by '
'attr
\'
group
\'
of
\'
Conv2D
\'
Op.'
)
if
out_channels
%
group
!=
0
:
raise
ValueError
(
'Attr
\'
out_channels
\'
of
\'
Conv2D
\'
Op must be divisible by '
'attr
\'
group
\'
of
\'
Conv2D
\'
Op.'
)
self
.
weight
=
Parameter
(
initializer
(
weight_init
,
[
out_channels
,
in_channels
//
group
,
*
kernel_size
]),
name
=
'weight'
)
if
check_bool
(
has_bias
):
self
.
bias
=
Parameter
(
initializer
(
bias_init
,
[
out_channels
]),
name
=
'bias'
)
else
:
if
bias_init
!=
'zeros'
:
print
(
"Value of 'has_bias' is False, value of 'bias_init' will be ignored."
)
self
.
bias
=
None
def
construct
(
self
,
*
inputs
):
raise
NotImplementedError
class
Conv2d
(
_Conv
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
pad_mode
=
'same'
,
padding
=
0
,
dilation
=
1
,
group
=
1
,
has_bias
=
False
,
weight_init
=
'normal'
,
bias_init
=
'zeros'
,
strategy
=
None
):
kernel_size
=
twice
(
kernel_size
)
super
(
Conv2d
,
self
).
__init__
(
in_channels
,
out_channels
,
kernel_size
,
stride
,
pad_mode
,
padding
,
dilation
,
group
,
has_bias
,
weight_init
,
bias_init
)
self
.
add
=
P
.
TensorAdd
(
strategy
)
self
.
conv2d
=
P
.
Conv2D
(
out_channel
=
self
.
out_channels
,
kernel_size
=
self
.
kernel_size
,
mode
=
1
,
pad_mode
=
self
.
pad_mode
,
pad
=
self
.
padding
,
stride
=
self
.
stride
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
strategy
=
None
)
self
.
bias_add
=
P
.
BiasAdd
()
def
construct
(
self
,
input1
,
input2
):
x
=
self
.
add
(
input1
,
input2
)
if
self
.
has_bias
:
return
self
.
bias_add
(
self
.
conv2d
(
x
,
self
.
weight
),
self
.
bias
)
return
self
.
conv2d
(
x
,
self
.
weight
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
input1
,
input2
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
input1
,
input2
,
output_grad
)
class
Conv2dFactory
:
def
__init__
(
self
,
input_shape
,
filter_shape
,
stride
,
pad_mode
,
padding
,
dilation
,
group
,
has_bias
):
self
.
in_n
,
self
.
in_c
,
self
.
in_h
,
self
.
in_w
=
input_shape
self
.
out_c
,
self
.
kernel_c
,
self
.
kernel_h
,
self
.
kernel_w
=
filter_shape
self
.
stride
=
stride
self
.
pad_mode
=
pad_mode
self
.
padding
=
padding
self
.
dilation
=
dilation
self
.
group
=
group
self
.
strategy0
=
(
0
,
(
4
,
1
,
1
,
1
),
(
1
,
1
,
1
,
1
))
prefix
=
""
input_size
=
1
filter_size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
+
"_"
input_size
=
input_size
*
s
self
.
prefix
=
prefix
for
s
in
filter_shape
:
filter_size
=
filter_size
*
s
number_range1
=
min
(
10
,
input_size
)
number_range2
=
min
(
10
,
filter_size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
input_size
)
%
number_range1
-
number_range1
/
2
,
input_shape
).
astype
(
np
.
float16
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
input_size
)
%
number_range1
-
number_range1
/
4
,
input_shape
).
astype
(
np
.
float16
)
self
.
weight_np
=
np
.
reshape
(
np
.
arange
(
0
,
filter_size
)
%
number_range2
-
number_range2
/
2
,
filter_shape
).
astype
(
np
.
float16
)
self
.
has_bias
=
has_bias
if
self
.
has_bias
is
True
:
self
.
bias_np
=
np
.
arange
(
0
,
self
.
out_c
).
astype
(
np
.
float16
)
self
.
out_shape
=
(
128
,
64
,
56
,
56
)
out_size
=
1
for
s
in
self
.
out_shape
:
out_size
=
out_size
*
s
number_range3
=
min
(
10
,
out_size
)
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
out_size
)
%
number_range3
-
number_range3
/
2
,
self
.
out_shape
).
astype
(
np
.
float16
)
self
.
x_id
=
device_id
%
4
self
.
y_id
=
device_id
%
4
self
.
out_strategy
=
self
.
strategy0
[
1
]
self
.
out_id
=
device_id
%
4
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_conv2d_mindspore_impl
(
self
):
input1
=
Tensor
(
self
.
input_np1
)
input2
=
Tensor
(
self
.
input_np2
)
weight
=
Tensor
(
self
.
weight_np
)
if
self
.
has_bias
:
bias
=
Tensor
(
self
.
bias_np
)
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
True
,
weight_init
=
weight
,
bias_init
=
bias
)
else
:
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
False
,
weight_init
=
weight
)
out
=
net
(
input1
,
input2
)
return
out
.
asnumpy
()
def
forward_conv2d_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
weight
=
Tensor
(
self
.
weight_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
if
self
.
has_bias
:
bias
=
Tensor
(
self
.
bias_np
)
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
True
,
weight_init
=
weight
,
bias_init
=
bias
,
strategy
=
(
self
.
strategy0
[
0
],
self
.
strategy0
[
1
],
self
.
strategy0
[
1
]))
else
:
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
False
,
weight_init
=
weight
,
strategy
=
(
self
.
strategy0
[
0
],
self
.
strategy0
[
1
],
self
.
strategy0
[
1
]))
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_conv2d_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
weight
=
Tensor
(
self
.
weight_np
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
if
self
.
has_bias
:
bias
=
Tensor
(
self
.
bias_np
)
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
True
,
weight_init
=
weight
,
bias_init
=
bias
,)
else
:
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
False
,
weight_init
=
weight
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
out_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
out_grad
def
grad_conv2d_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
weight
=
Tensor
(
self
.
weight_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad
=
Tensor
(
self
.
output_grad_np
)
output_grads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
output_grad1
=
Tensor
(
output_grads
[
self
.
out_id
])
if
self
.
has_bias
:
bias
=
Tensor
(
self
.
bias_np
)
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
True
,
weight_init
=
weight
,
bias_init
=
bias
,
strategy
=
(
self
.
strategy0
[
0
],
self
.
strategy0
[
1
],
self
.
strategy0
[
1
]))
else
:
net
=
Conv2d
(
in_channels
=
self
.
in_c
,
out_channels
=
self
.
out_c
,
kernel_size
=
(
self
.
kernel_h
,
self
.
kernel_w
),
stride
=
self
.
stride
,
pad_mode
=
self
.
pad_mode
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
group
=
self
.
group
,
has_bias
=
False
,
weight_init
=
weight
,
strategy
=
(
self
.
strategy0
[
0
],
self
.
strategy0
[
1
],
self
.
strategy0
[
1
]))
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_train
()
grad_net
.
set_auto_parallel
()
out_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
return
out_grad
def
forward_conv2d_cmp
(
self
):
out_mindspore
=
self
.
forward_conv2d_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_conv2d_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.001
,
0.001
)
def
grad_conv2d_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_conv2d_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_conv2d_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
1
])
assert
allclose
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.001
,
0.001
)
assert
allclose
(
input_grad_blocks_1
[
self
.
x_id
],
input_grad_mindspore_parallel1
,
0.001
,
0.001
)
def
test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true
():
fact
=
Conv2dFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
filter_shape
=
(
64
,
64
,
1
,
1
),
stride
=
2
,
pad_mode
=
'valid'
,
padding
=
0
,
dilation
=
1
,
group
=
1
,
has_bias
=
False
)
fact
.
forward_conv2d_cmp
()
def
test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true
():
fact
=
Conv2dFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
filter_shape
=
(
64
,
64
,
1
,
1
),
stride
=
2
,
pad_mode
=
'valid'
,
padding
=
0
,
dilation
=
1
,
group
=
1
,
has_bias
=
False
)
fact
.
grad_conv2d_cmp
()
tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_conv2d_parallel_4p.py>../../log/test_conv2d_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
export
SLOG_PRINT_TO_STDOUT
=
1
source
/root/miniconda3/bin/activate ci3.6
export
RANK_SIZE
=
4
export
RANK_TABLE_FILE
=
../../rank_table_4p.json
export
RANK_ID
=
$1
export
DEVICE_ID
=
$1
export
HCCL_FLAG
=
1
export
DEPLOY_MODE
=
0
export
AICPU_FLAG
=
1
export
DUMP_OP
=
1
export
PYTHONPATH
=
../../../../../../../../mindspore:/usr/local/HiAI/runtime/python3.6/site-packages/topi.egg/:/usr/local/HiAI/runtime/python3.6/site-packages/te.egg/:/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/
export
OPTION_EXEC_EXTERN_PLUGIN_PATH
=
/usr/local/HiAI/runtime/lib64/libhccl.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so
export
LD_LIBRARY_PATH
=
/usr/local/HiAI/runtime/lib64
export
FE_FLAG
=
1
export
PATH
=
/usr/local/HiAI/runtime/ccec_compiler/bin:
$PATH
if
[
$1
-eq
0
]
;
then
export
DUMP_GE_GRAPH
=
true
export
ME_DRAW_GRAPH
=
1
fi
tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.nn
import
Dropout
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Net
(
Cell
):
def
__init__
(
self
,
keep_prob
,
seed0
,
seed1
,
strategy
=
None
):
super
(
Net
,
self
).
__init__
()
self
.
drop
=
Dropout
(
keep_prob
,
seed0
,
seed1
,
dtype
=
ms
.
float32
,
strategy
=
strategy
)
def
construct
(
self
,
input_
):
x
=
self
.
drop
(
input_
)
return
x
# pylint: disable=comparison-with-itself
class
DropoutFactory
:
def
__init__
(
self
,
input_shape
,
keep_prob
,
seed0
,
seed1
,
strategy0
=
None
):
size
=
1
prefix
=
""
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
10
,
size
)
self
.
input_np
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
,
input_shape
).
astype
(
np
.
float32
)
self
.
keep_prob
=
keep_prob
self
.
seed0
=
seed0
self
.
seed1
=
seed1
self
.
strategy0
=
strategy0
need_dev_num
=
1
for
s
in
strategy0
[
1
]:
need_dev_num
=
need_dev_num
*
s
self
.
x_id
=
device_id
%
need_dev_num
self
.
out_id
=
device_id
%
need_dev_num
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
d4_tensor_compare
(
self
,
input_
,
out_me
):
[
a
,
b
,
c
,
d
]
=
input_
.
shape
for
i
in
range
(
a
):
for
j
in
range
(
b
):
for
k
in
range
(
c
):
for
e
in
range
(
d
):
if
out_me
[
i
,
j
,
k
,
e
]
==
0
:
assert
True
else
:
assert
np
.
allclose
(
out_me
[
i
,
j
,
k
,
e
],
input_
[
i
,
j
,
k
,
e
]
*
(
1
/
0.4
),
0.0001
,
0.0001
)
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
net
=
Net
(
0.4
,
0
,
0
,
strategy
=
self
.
strategy0
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
parallel_inputs_compile
=
[
x
],
parallel_inputs_run
=
[
x1
])
return
out
.
asnumpy
()
def
forward_cmp
(
self
):
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
input_blocks
=
self
.
get_parallel_blocks
(
self
.
input_np
,
self
.
strategy0
[
1
])
self
.
d4_tensor_compare
(
input_blocks
[
self
.
out_id
],
out_mindspore_parallel
)
def
test_reid_dropout_forward_seed_F32_64_512_8_8
():
fact
=
DropoutFactory
(
input_shape
=
(
64
,
512
,
8
,
8
),
keep_prob
=
0.4
,
seed0
=
0
,
seed1
=
0
,
strategy0
=
(
0
,
(
4
,
1
,
1
,
1
)))
fact
.
forward_cmp
()
def
test_reid_dropout_forward_seed_F32_64_512_8_8_repeat
():
fact
=
DropoutFactory
(
input_shape
=
(
64
,
512
,
8
,
8
),
keep_prob
=
0.4
,
seed0
=
0
,
seed1
=
0
,
strategy0
=
(
0
,
(
2
,
1
,
1
,
1
)))
fact
.
forward_cmp
()
tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_dropout_parallel_4p.py>../../log/test_dropout_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
MatmulSingle
(
Cell
):
def
__init__
(
self
,
transpose_a
=
False
,
transpose_b
=
False
):
super
(
MatmulSingle
,
self
).
__init__
()
self
.
matmul
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
pow
=
P
.
Pow
()
self
.
reduce_sum
=
P
.
ReduceSum
()
def
construct
(
self
,
x
,
y
):
out
=
self
.
matmul
(
x
,
y
)
out
=
self
.
pow
(
out
,
2.0
)
out
=
self
.
reduce_sum
(
out
,
None
)
return
out
class
MatmulAllgather
(
Cell
):
def
__init__
(
self
,
group
,
transpose_a
=
False
,
transpose_b
=
False
):
super
(
MatmulAllgather
,
self
).
__init__
()
self
.
allgather
=
P
.
AllGather
(
group
=
group
)
self
.
matmul
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
pow
=
P
.
Pow
()
self
.
reduce_sum
=
P
.
ReduceSum
()
self
.
allreduce
=
P
.
AllReduce
(
group
=
group
)
def
construct
(
self
,
x
,
y
):
x
=
self
.
allgather
(
x
)
out
=
self
.
matmul
(
x
,
y
)
out
=
self
.
pow
(
out
,
2.0
)
out
=
self
.
reduce_sum
(
out
,
None
)
out
=
self
.
allreduce
(
out
)
return
out
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
sens
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
sens
)
class
MatmulAllgatherFactory
:
def
__init__
(
self
,
inputx_shape
,
inputy_shape
,
x_stra
,
y_stra
):
self
.
inputx
=
self
.
gen_value
(
inputx_shape
,
10
)
self
.
inputy
=
self
.
gen_value
(
inputy_shape
,
20
)
self
.
x_stra
=
x_stra
self
.
y_stra
=
y_stra
stra_size
=
1
for
s
in
x_stra
:
stra_size
=
stra_size
*
s
self
.
stra_size
=
stra_size
def
gen_value
(
self
,
input_shape
,
delta
):
size
=
1
for
s
in
input_shape
:
size
=
size
*
s
number_range
=
min
(
100
,
size
)
input_np
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
delta
,
input_shape
).
astype
(
np
.
float32
)
return
input_np
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
grad_mindspore_impl_single
(
self
):
x
=
Tensor
(
self
.
inputx
)
y
=
Tensor
(
self
.
inputy
)
sens
=
Tensor
(
1.0
,
dtype
=
ms
.
float32
)
net
=
MatmulSingle
()
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
sens
)
return
input_grad
def
grad_mindspore_impl_reduce
(
self
):
inputxs
=
self
.
get_parallel_blocks
(
self
.
inputx
,
self
.
x_stra
)
inputys
=
self
.
get_parallel_blocks
(
self
.
inputy
,
self
.
y_stra
)
x
=
Tensor
(
inputxs
[
device_id
%
self
.
stra_size
])
y
=
Tensor
(
inputys
[
device_id
%
self
.
stra_size
])
repeat_num
=
device_num
/
self
.
stra_size
v
=
self
.
stra_size
*
repeat_num
*
repeat_num
*
repeat_num
sens
=
Tensor
(
1.0
/
v
,
dtype
=
ms
.
float32
)
net
=
MatmulAllgather
(
"hccl_world_group"
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
sens
)
return
input_grad
def
grad_cmp
(
self
):
single_results
=
self
.
grad_mindspore_impl_single
()
reduce_results
=
self
.
grad_mindspore_impl_reduce
()
single_result0
=
self
.
get_parallel_blocks
(
single_results
[
0
].
asnumpy
(),
self
.
x_stra
)[
device_id
%
self
.
stra_size
]
reduce_result0
=
reduce_results
[
0
].
asnumpy
()
single_result1
=
self
.
get_parallel_blocks
(
single_results
[
1
].
asnumpy
(),
self
.
y_stra
)[
device_id
%
self
.
stra_size
]
reduce_result1
=
reduce_results
[
1
].
asnumpy
()
assert
np
.
allclose
(
single_result0
,
reduce_result0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
single_result1
,
reduce_result1
,
0.0001
,
0.0001
)
def
test_reduce_grad
():
inputx_shape
=
(
64
,
32
)
inputy_shape
=
(
32
,
64
)
fact
=
MatmulAllgatherFactory
(
inputx_shape
,
inputy_shape
,
(
4
,
1
),
(
1
,
4
))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
MatmulSingle
(
Cell
):
def
__init__
(
self
,
transpose_a
=
False
,
transpose_b
=
False
):
super
(
MatmulSingle
,
self
).
__init__
()
self
.
matmul1
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
matmul2
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
pow
=
P
.
Pow
()
self
.
reduce_sum
=
P
.
ReduceSum
()
def
construct
(
self
,
x
,
y
,
z
):
out
=
self
.
matmul1
(
x
,
y
)
out
=
self
.
matmul2
(
out
,
z
)
out
=
self
.
pow
(
out
,
2.0
)
out
=
self
.
reduce_sum
(
out
,
None
)
return
out
class
MatmulReduce
(
Cell
):
def
__init__
(
self
,
group
,
transpose_a
=
False
,
transpose_b
=
False
):
super
(
MatmulReduce
,
self
).
__init__
()
self
.
matmul1
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
allreduce1
=
P
.
AllReduce
(
group
=
group
)
self
.
matmul2
=
P
.
MatMul
(
transpose_a
,
transpose_b
)
self
.
pow
=
P
.
Pow
()
self
.
reduce_sum
=
P
.
ReduceSum
()
self
.
allreduce2
=
P
.
AllReduce
(
group
=
group
)
def
construct
(
self
,
x
,
y
,
z
):
out
=
self
.
matmul1
(
x
,
y
)
out
=
self
.
allreduce1
(
out
)
out
=
self
.
matmul2
(
out
,
z
)
out
=
self
.
pow
(
out
,
2.0
)
out
=
self
.
reduce_sum
(
out
,
None
)
out
=
self
.
allreduce2
(
out
)
return
out
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
z
,
sens
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
z
,
sens
)
class
MatmulReduceFactory
:
def
__init__
(
self
,
inputx_shape
,
inputy_shape
,
inputz_shape
,
x_stra
,
y_stra
,
z_stra
):
self
.
inputx
=
self
.
gen_value
(
inputx_shape
,
10
)
self
.
inputy
=
self
.
gen_value
(
inputy_shape
,
20
)
self
.
inputz
=
self
.
gen_value
(
inputz_shape
,
30
)
self
.
x_stra
=
x_stra
self
.
y_stra
=
y_stra
self
.
z_stra
=
z_stra
stra_size
=
1
for
s
in
x_stra
:
stra_size
=
stra_size
*
s
self
.
stra_size
=
stra_size
def
gen_value
(
self
,
input_shape
,
delta
):
size
=
1
for
s
in
input_shape
:
size
=
size
*
s
number_range
=
min
(
100
,
size
)
input_np
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
delta
,
input_shape
).
astype
(
np
.
float32
)
return
input_np
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
grad_mindspore_impl_single
(
self
):
x
=
Tensor
(
self
.
inputx
)
y
=
Tensor
(
self
.
inputy
)
z
=
Tensor
(
self
.
inputz
)
sens
=
Tensor
(
1.0
,
dtype
=
ms
.
float32
)
net
=
MatmulSingle
()
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
z
,
sens
)
return
input_grad
def
grad_mindspore_impl_reduce
(
self
):
inputxs
=
self
.
get_parallel_blocks
(
self
.
inputx
,
self
.
x_stra
)
inputys
=
self
.
get_parallel_blocks
(
self
.
inputy
,
self
.
y_stra
)
inputzs
=
self
.
get_parallel_blocks
(
self
.
inputz
,
self
.
z_stra
)
x
=
Tensor
(
inputxs
[
device_id
%
self
.
stra_size
])
y
=
Tensor
(
inputys
[
device_id
%
self
.
stra_size
])
z
=
Tensor
(
inputzs
[
device_id
%
self
.
stra_size
])
repeat_num
=
device_num
/
self
.
stra_size
v
=
self
.
stra_size
*
repeat_num
*
repeat_num
*
repeat_num
sens
=
Tensor
(
1.0
/
v
,
dtype
=
ms
.
float32
)
net
=
MatmulReduce
(
"hccl_world_group"
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
z
,
sens
)
return
input_grad
def
grad_cmp
(
self
):
single_results
=
self
.
grad_mindspore_impl_single
()
reduce_results
=
self
.
grad_mindspore_impl_reduce
()
single_result0
=
self
.
get_parallel_blocks
(
single_results
[
0
].
asnumpy
(),
self
.
x_stra
)[
device_id
%
self
.
stra_size
]
reduce_result0
=
reduce_results
[
0
].
asnumpy
()
single_result1
=
self
.
get_parallel_blocks
(
single_results
[
1
].
asnumpy
(),
self
.
y_stra
)[
device_id
%
self
.
stra_size
]
reduce_result1
=
reduce_results
[
1
].
asnumpy
()
single_result2
=
self
.
get_parallel_blocks
(
single_results
[
2
].
asnumpy
(),
self
.
z_stra
)[
device_id
%
self
.
stra_size
]
reduce_result2
=
reduce_results
[
2
].
asnumpy
()
assert
np
.
allclose
(
single_result0
,
reduce_result0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
single_result1
,
reduce_result1
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
single_result2
,
reduce_result2
,
0.0001
,
0.0001
)
def
test_reduce_grad
():
inputx_shape
=
(
32
,
64
)
inputy_shape
=
(
64
,
64
)
inputz_shape
=
(
64
,
32
)
fact
=
MatmulReduceFactory
(
inputx_shape
,
inputy_shape
,
inputz_shape
,
(
1
,
4
),
(
4
,
1
),
(
1
,
4
))
fact
.
grad_cmp
()
def
test_reduce_grad_repeat
():
inputx_shape
=
(
32
,
64
)
inputy_shape
=
(
64
,
64
)
inputz_shape
=
(
64
,
32
)
fact
=
MatmulReduceFactory
(
inputx_shape
,
inputy_shape
,
inputz_shape
,
(
1
,
2
),
(
2
,
1
),
(
1
,
2
))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_allgather_4p.py>../../log/test_allgather_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_allreduce_4p.py>../../log/test_allreduce_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
L2normalize
(
Cell
):
def
__init__
(
self
,
axis
=
0
,
epsilon
=
1e-4
,
strategy0
=
None
,
strategy1
=
None
):
super
(
L2normalize
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
l2norm
=
P
.
L2Normalize
(
axis
,
epsilon
,
strategy1
)
def
construct
(
self
,
x
,
y
):
out
=
self
.
add
(
x
,
y
)
out
=
self
.
l2norm
(
out
)
return
out
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
L2normalizeFactory
:
def
__init__
(
self
,
input_shape
,
axis
,
strategy0
,
strategy1
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
4
,
input_shape
).
astype
(
np
.
float32
)
target_shape
=
input_shape
self
.
target_shape
=
target_shape
target_size
=
1
for
s
in
target_shape
:
target_size
=
target_size
*
s
number_range
=
min
(
1000
,
target_size
)
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
target_size
)
%
number_range
-
number_range
/
2
,
target_shape
).
astype
(
np
.
float32
)
self
.
axis
=
axis
self
.
epsilon
=
1e-4
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
out_strategy
=
strategy1
[
1
]
self
.
out_strategy
=
out_strategy
need_dev_num0
=
1
need_dev_num1
=
1
for
s
in
strategy0
[
1
]:
need_dev_num0
=
need_dev_num0
*
s
for
s
in
out_strategy
:
need_dev_num1
=
need_dev_num1
*
s
self
.
x_id
=
device_id
%
need_dev_num0
self
.
y_id
=
device_id
%
need_dev_num0
self
.
out_id
=
device_id
%
need_dev_num1
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
net
=
L2normalize
(
self
.
axis
,
self
.
epsilon
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
net
=
L2normalize
(
self
.
axis
,
self
.
epsilon
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
net
=
L2normalize
(
self
.
axis
,
self
.
epsilon
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
outgrads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad1
=
Tensor
(
outgrads
[
self
.
out_id
])
net
=
L2normalize
(
self
.
axis
,
self
.
epsilon
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
return
input_grad
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
assert
np
.
allclose
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
def
test_reid_l2normalize_input_128_512
():
input_shape
=
(
128
,
512
)
axis
=
0
fact
=
L2normalizeFactory
(
input_shape
,
axis
,
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
1
,
4
)))
fact
.
forward_cmp
()
def
test_reid_l2normalize_grad_input_128_512
():
input_shape
=
(
128
,
512
)
axis
=
0
fact
=
L2normalizeFactory
(
input_shape
,
axis
,
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
1
,
4
)))
fact
.
grad_cmp
()
def
test_reid_l2normalize_input_128_512_repeat
():
input_shape
=
(
128
,
512
)
axis
=
0
fact
=
L2normalizeFactory
(
input_shape
,
axis
,
strategy0
=
(
0
,
(
1
,
2
),
(
1
,
2
)),
strategy1
=
(
0
,
(
1
,
2
)))
fact
.
forward_cmp
()
def
test_reid_l2normalize_grad_input_128_512_repeat
():
input_shape
=
(
128
,
512
)
axis
=
0
fact
=
L2normalizeFactory
(
input_shape
,
axis
,
strategy0
=
(
0
,
(
1
,
2
),
(
1
,
2
)),
strategy1
=
(
0
,
(
1
,
2
)))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_l2normalize_parallel_4p.py>../../log/test_l2normalize_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/log/README.MD
已删除
100644 → 0
浏览文件 @
f967700e
log files for auto parallel end to end test cases
tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
AddRelu
(
Cell
):
def
__init__
(
self
,
strategy0
=
None
,
strategy1
=
None
):
super
(
AddRelu
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
relu
=
P
.
ReLU
(
strategy
=
strategy1
)
def
construct
(
self
,
x
,
y
):
out
=
self
.
add
(
x
,
y
)
out
=
self
.
relu
(
out
)
return
out
class
NetWithLoss
(
Cell
):
def
__init__
(
self
,
network
,
strategy2
=
None
):
super
(
NetWithLoss
,
self
).
__init__
()
self
.
loss
=
P
.
SoftmaxCrossEntropyWithLogits
(
strategy
=
strategy2
)
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
b
):
predict
=
self
.
network
(
x
,
y
)
return
self
.
loss
(
predict
,
b
)[
0
]
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
b
):
return
grad_all
(
self
.
network
)(
x
,
y
,
b
)
class
AddReluFactory
:
def
__init__
(
self
,
input_shape
,
strategy0
,
strategy1
,
strategy2
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
4
,
input_shape
).
astype
(
np
.
float32
)
target_shape
=
input_shape
self
.
target_shape
=
target_shape
target_size
=
1
for
s
in
target_shape
:
target_size
=
target_size
*
s
number_range
=
min
(
10
,
target_size
)
self
.
output_grad_np
=
np
.
reshape
((
np
.
arange
(
0
,
target_size
)
%
number_range
)
*
0.1
,
target_shape
).
astype
(
np
.
float32
)
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
self
.
strategy2
=
strategy2
out_strategy
=
strategy1
[
1
]
self
.
out_strategy
=
out_strategy
need_dev_num0
=
1
need_dev_num1
=
1
for
s
in
strategy0
[
1
]:
need_dev_num0
=
need_dev_num0
*
s
for
s
in
out_strategy
:
need_dev_num1
=
need_dev_num1
*
s
self
.
x_id
=
device_id
%
need_dev_num0
self
.
y_id
=
device_id
%
need_dev_num0
self
.
out_id
=
device_id
%
need_dev_num1
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
grad_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
net
=
AddRelu
()
net_with_loss
=
NetWithLoss
(
net
)
grad_net
=
Grad
(
net_with_loss
)
grad_net
.
set_train
()
input_grads
=
[]
for
i
in
range
(
0
,
3
):
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
input_grads
.
append
(
input_grad
)
return
input_grads
def
grad_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
outgrads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad1
=
Tensor
(
outgrads
[
self
.
out_id
])
net
=
AddRelu
(
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
net_with_loss
=
NetWithLoss
(
net
,
strategy2
=
self
.
strategy2
)
grad_net
=
Grad
(
net_with_loss
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grads
=
[]
for
i
in
range
(
0
,
3
):
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
input_grads
.
append
(
input_grad
)
return
input_grads
def
grad_cmp
(
self
):
input_grad_mindspores
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallels
=
self
.
grad_mindspore_parallel_impl
()
for
i
in
range
(
0
,
len
(
input_grad_mindspores
)):
input_grad_mindspore
=
input_grad_mindspores
[
i
]
input_grad_mindspore_parallel
=
input_grad_mindspore_parallels
[
i
]
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
np
.
save
(
path
+
str
(
i
)
+
"_"
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_single0.npy"
,
input_grad_blocks_0
[
self
.
x_id
])
np
.
save
(
path
+
str
(
i
)
+
"_"
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_single1.npy"
,
input_grad_blocks_1
[
self
.
y_id
])
np
.
save
(
path
+
str
(
i
)
+
"_"
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_parallel0.npy"
,
input_grad_mindspore_parallel0
)
np
.
save
(
path
+
str
(
i
)
+
"_"
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_parallel1.npy"
,
input_grad_mindspore_parallel1
)
assert
np
.
allclose
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
def
test_reid_l2normalize_grad_input_128_512
():
input_shape
=
(
128
,
512
)
fact
=
AddReluFactory
(
input_shape
,
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
4
,
1
)),
strategy2
=
(
0
,
(
4
,
1
),
(
4
,
1
)))
fact
.
grad_cmp
()
def
test_reid_l2normalize_grad_input_128_512_stridesplit
():
input_shape
=
(
128
,
512
)
fact
=
AddReluFactory
(
input_shape
,
strategy0
=
(
0
,
(
1
,
1
),
(
1
,
1
)),
strategy1
=
(
0
,
(
4
,
1
)),
strategy2
=
(
0
,
(
4
,
1
),
(
4
,
1
)))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_loss_parallel_4p.py>../../log/test_loss_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
from
numpy
import
allclose
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Matmul
(
Cell
):
def
__init__
(
self
,
transpose_a
=
False
,
transpose_b
=
False
,
strategy0
=
None
,
strategy1
=
None
):
super
(
Matmul
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy1
)
self
.
matmul
=
P
.
MatMul
(
transpose_a
,
transpose_b
,
strategy
=
strategy0
)
def
construct
(
self
,
x
,
w
,
z
):
out
=
self
.
add
(
x
,
z
)
return
self
.
matmul
(
out
,
w
)
class
BatchMatMul
(
Cell
):
def
__init__
(
self
,
transpose_a
=
False
,
transpose_b
=
False
,
strategy0
=
None
,
strategy1
=
None
):
super
(
BatchMatMul
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy1
)
self
.
batchmatmul
=
P
.
BatchMatMul
(
transpose_a
,
transpose_b
,
strategy
=
strategy0
)
def
construct
(
self
,
x
,
w
,
z
):
out
=
self
.
add
(
x
,
z
)
return
self
.
batchmatmul
(
out
,
w
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
inputa
,
inputb
,
inputz
,
output_grad
):
gout
=
grad_all_with_sens
(
self
.
network
)(
inputa
,
inputb
,
inputz
,
output_grad
)
return
gout
class
BatchmatmulFactory
:
def
__init__
(
self
,
inputa_shape
,
inputb_shape
,
transpose_a
,
transpose_b
,
strategy
,
strategy_
):
self
.
strategy
=
strategy
self
.
strategy_
=
strategy_
inputa_size
=
1
inputb_size
=
1
prefix
=
""
for
s
in
inputa_shape
:
prefix
=
prefix
+
str
(
s
)
+
"_"
inputa_size
=
inputa_size
*
s
prefix
=
prefix
+
"and"
for
s
in
inputb_shape
:
prefix
=
prefix
+
str
(
s
)
+
"_"
inputb_size
=
inputb_size
*
s
number_rangea
=
min
(
1000
,
inputa_size
)
number_rangeb
=
min
(
1000
,
inputb_size
)
self
.
inputa
=
np
.
reshape
(
np
.
arange
(
0
,
inputa_size
)
%
number_rangea
-
number_rangea
/
2
,
inputa_shape
).
astype
(
np
.
float32
)
self
.
inputb
=
np
.
reshape
(
np
.
arange
(
0
,
inputb_size
)
%
number_rangeb
-
number_rangeb
/
2
,
inputb_shape
).
astype
(
np
.
float32
)
self
.
inputz
=
np
.
zeros
(
self
.
inputa
.
shape
).
astype
(
np
.
float32
)
self
.
transpose_a
=
transpose_a
self
.
transpose_b
=
transpose_b
out_shape
=
[]
device_matrix
=
[]
out_strategy
=
[]
if
transpose_a
:
temp
=
inputa_shape
[
-
1
]
inputa_shape
[
-
1
]
=
inputa_shape
[
-
2
]
inputa_shape
[
-
2
]
=
temp
if
transpose_b
:
temp
=
inputb_shape
[
-
1
]
inputb_shape
[
-
1
]
=
inputb_shape
[
-
2
]
inputb_shape
[
-
2
]
=
temp
if
len
(
inputa_shape
)
>=
len
(
inputb_shape
):
out_shape
=
list
(
inputa_shape
)
out_shape
[
-
1
]
=
inputb_shape
[
-
1
]
else
:
out_shape
=
list
(
inputb_shape
)
out_shape
[
-
2
]
=
inputa_shape
[
-
2
]
strategy1
=
list
(
self
.
strategy
[
1
])
strategy2
=
list
(
self
.
strategy
[
2
])
if
transpose_a
:
temp
=
strategy1
[
-
1
]
strategy1
[
-
1
]
=
strategy1
[
-
2
]
strategy1
[
-
2
]
=
temp
if
transpose_b
:
temp
=
strategy2
[
-
1
]
strategy2
[
-
1
]
=
strategy2
[
-
2
]
strategy2
[
-
2
]
=
temp
if
len
(
strategy1
)
>=
len
(
strategy2
):
out_strategy
=
strategy1
.
copy
()
out_strategy
[
-
1
]
=
strategy2
[
-
1
]
else
:
out_strategy
=
strategy2
.
copy
()
out_strategy
[
-
2
]
=
strategy1
[
-
2
]
device_matrix
=
out_strategy
.
copy
()
device_matrix
.
insert
(
-
1
,
strategy1
[
-
1
])
self
.
out_strategy
=
out_strategy
need_dev_num
=
1
for
s
in
device_matrix
:
need_dev_num
=
need_dev_num
*
s
self
.
need_dev_num
=
need_dev_num
self
.
device_matrix
=
device_matrix
out_size
=
1
for
s
in
out_shape
:
out_size
=
out_size
*
s
number_range
=
min
(
1000
,
out_size
)
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
out_size
)
%
number_range
-
number_range
/
2
,
out_shape
).
astype
(
np
.
float32
)
device_index
=
self
.
id_to_list
(
device_id
%
need_dev_num
,
self
.
device_matrix
)
x_index
=
device_index
[:
-
1
].
copy
()
if
transpose_a
:
temp
=
x_index
[
-
1
]
x_index
[
-
1
]
=
x_index
[
-
2
]
x_index
[
-
2
]
=
temp
y_index
=
device_index
[:
-
3
].
copy
()
y_index
.
append
(
device_index
[
-
2
])
y_index
.
append
(
device_index
[
-
1
])
if
transpose_b
:
temp
=
y_index
[
-
1
]
y_index
[
-
1
]
=
y_index
[
-
2
]
y_index
[
-
2
]
=
temp
out_index
=
device_index
[:
-
2
].
copy
()
out_index
.
append
(
device_index
[
-
1
])
print
(
device_matrix
)
print
(
device_index
)
need_dev_num_
=
1
for
s
in
strategy_
[
1
]:
need_dev_num_
=
need_dev_num_
*
s
self
.
x_id
=
device_id
%
need_dev_num_
self
.
y_id
=
self
.
list_to_id
(
y_index
,
self
.
strategy
[
2
])
self
.
out_id
=
self
.
list_to_id
(
out_index
,
self
.
out_strategy
)
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
id_to_list
(
self
,
id_
,
shape
):
"""
shape:每一维的上限,如(2,4,8)
"""
result
=
[]
r
=
id_
for
i
in
range
(
0
,
len
(
shape
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
shape
)):
v
=
v
*
shape
[
j
]
result
.
append
(
r
//
v
)
r
=
r
%
v
return
result
def
list_to_id
(
self
,
id_list
,
shape
):
result
=
0
for
i
in
range
(
0
,
len
(
id_list
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
id_list
)):
v
=
v
*
shape
[
j
]
result
=
result
+
id_list
[
i
]
*
v
return
result
def
forward_mindspore_impl
(
self
):
if
len
(
self
.
inputa
.
shape
)
>
2
:
matmul
=
BatchMatMul
(
self
.
transpose_a
,
self
.
transpose_b
)
else
:
matmul
=
Matmul
(
self
.
transpose_a
,
self
.
transpose_b
)
matmul
.
set_train
()
out_me
=
matmul
(
Tensor
(
self
.
inputa
),
Tensor
(
self
.
inputb
),
Tensor
(
self
.
inputz
))
return
out_me
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
if
len
(
self
.
inputa
.
shape
)
>
2
:
matmul
=
BatchMatMul
(
self
.
transpose_a
,
self
.
transpose_b
,
strategy0
=
self
.
strategy
,
strategy1
=
self
.
strategy_
)
else
:
matmul
=
Matmul
(
self
.
transpose_a
,
self
.
transpose_b
,
strategy0
=
self
.
strategy
,
strategy1
=
self
.
strategy_
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
x
=
Tensor
(
self
.
inputa
)
y
=
Tensor
(
self
.
inputb
)
z
=
Tensor
(
self
.
inputz
)
xs
=
self
.
get_parallel_blocks
(
self
.
inputa
,
self
.
strategy_
[
1
])
ys
=
self
.
get_parallel_blocks
(
self
.
inputb
,
self
.
strategy
[
2
])
zs
=
self
.
get_parallel_blocks
(
self
.
inputz
,
self
.
strategy_
[
1
])
x1
=
Tensor
(
xs
[
self
.
x_id
])
#
y1
=
Tensor
(
ys
[
self
.
y_id
])
# 需要从设备矩阵推导
z1
=
Tensor
(
zs
[
self
.
x_id
])
matmul
.
set_train
()
matmul
.
set_auto_parallel
()
out_me
=
matmul
(
x
,
y
,
z
,
parallel_inputs_compile
=
[
x
,
y
,
z
],
parallel_inputs_run
=
[
x1
,
y1
,
z1
])
return
out_me
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
inputa
)
y
=
Tensor
(
self
.
inputb
)
z
=
Tensor
(
self
.
inputz
)
if
len
(
self
.
inputa
.
shape
)
>
2
:
matmul
=
BatchMatMul
(
self
.
transpose_a
,
self
.
transpose_b
)
else
:
matmul
=
Matmul
(
self
.
transpose_a
,
self
.
transpose_b
)
net_me
=
Grad
(
matmul
)
net_me
.
set_train
()
out_grad_me
=
Tensor
(
self
.
output_grad_np
)
out_grad
=
net_me
(
x
,
y
,
z
,
out_grad_me
)
return
out_grad
def
grad_mindspore_parallel_impl
(
self
):
if
len
(
self
.
inputa
.
shape
)
>
2
:
matmul
=
BatchMatMul
(
self
.
transpose_a
,
self
.
transpose_b
,
strategy0
=
self
.
strategy
,
strategy1
=
self
.
strategy_
)
else
:
matmul
=
Matmul
(
self
.
transpose_a
,
self
.
transpose_b
,
strategy0
=
self
.
strategy
,
strategy1
=
self
.
strategy_
)
x
=
Tensor
(
self
.
inputa
)
y
=
Tensor
(
self
.
inputb
)
z
=
Tensor
(
self
.
inputz
)
out_grad_me
=
Tensor
(
self
.
output_grad_np
)
xs
=
self
.
get_parallel_blocks
(
self
.
inputa
,
self
.
strategy_
[
1
])
ys
=
self
.
get_parallel_blocks
(
self
.
inputb
,
self
.
strategy
[
2
])
zs
=
self
.
get_parallel_blocks
(
self
.
inputz
,
self
.
strategy_
[
1
])
out_grads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
xs
[
self
.
x_id
])
# 需要从设备矩阵推导
y1
=
Tensor
(
ys
[
self
.
y_id
])
#
z1
=
Tensor
(
zs
[
self
.
x_id
])
out_grad1
=
Tensor
(
out_grads
[
self
.
out_id
])
net_me
=
Grad
(
matmul
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net_me
.
set_auto_parallel
()
net_me
.
set_train
()
out_grad
=
net_me
(
x
,
y
,
z
,
out_grad_me
,
parallel_inputs_compile
=
[
x
,
y
,
z
,
out_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
z1
,
out_grad1
])
return
out_grad
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspores
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
assert
allclose
(
out_mindspores
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.0001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspores0
=
self
.
get_parallel_blocks
(
input_grad_mindspore
[
0
].
asnumpy
(),
self
.
strategy_
[
1
])
input_grad_mindspores1
=
self
.
get_parallel_blocks
(
input_grad_mindspore
[
1
].
asnumpy
(),
self
.
strategy
[
2
])
input_grad_mindspores2
=
self
.
get_parallel_blocks
(
input_grad_mindspore
[
2
].
asnumpy
(),
self
.
strategy_
[
1
])
assert
allclose
(
input_grad_mindspores0
[
self
.
x_id
],
input_grad_mindspore_parallel
[
0
].
asnumpy
(),
0.0001
,
0.0001
)
assert
allclose
(
input_grad_mindspores1
[
self
.
y_id
],
input_grad_mindspore_parallel
[
1
].
asnumpy
(),
0.0001
,
0.0001
)
assert
allclose
(
input_grad_mindspores2
[
self
.
x_id
],
input_grad_mindspore_parallel
[
2
].
asnumpy
(),
0.0001
,
0.0001
)
def
test_reid_batchmatmul_inputa_128_512_inputb_2000_512
():
inputa
=
[
128
,
512
]
inputb
=
[
2000
,
512
]
fact
=
BatchmatmulFactory
(
inputa
,
inputb
,
False
,
True
,
(
0
,
(
2
,
2
),
(
1
,
2
)),
(
0
,
(
2
,
2
),
(
2
,
2
)))
fact
.
forward_cmp
()
def
test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512
():
inputa
=
[
128
,
512
]
inputb
=
[
2000
,
512
]
fact
=
BatchmatmulFactory
(
inputa
,
inputb
,
False
,
True
,
(
0
,
(
2
,
2
),
(
1
,
2
)),
(
0
,
(
2
,
2
),
(
2
,
2
)))
fact
.
grad_cmp
()
def
test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution
():
inputa
=
[
128
,
512
]
inputb
=
[
2000
,
512
]
fact
=
BatchmatmulFactory
(
inputa
,
inputb
,
False
,
True
,
(
0
,
(
1
,
2
),
(
1
,
2
)),
(
0
,
(
2
,
2
),
(
2
,
2
)))
fact
.
forward_cmp
()
def
test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution
():
inputa
=
[
128
,
512
]
inputb
=
[
2000
,
512
]
fact
=
BatchmatmulFactory
(
inputa
,
inputb
,
False
,
True
,
(
0
,
(
1
,
2
),
(
1
,
2
)),
(
0
,
(
2
,
2
),
(
2
,
2
)))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_matmul_parallel_4p.py
>
../../log/test_matmul_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
input1
,
input2
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
input1
,
input2
,
output_grad
)
class
Max
(
Cell
):
def
__init__
(
self
,
axis
,
keep_dims
,
strategy0
=
None
,
strategy1
=
None
):
super
(
Max
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
reduce_max
=
P
.
ReduceMax
(
keep_dims
=
keep_dims
).
set_strategy
(
strategy
=
strategy1
)
self
.
axis
=
axis
def
construct
(
self
,
input1
,
input2
):
out
=
self
.
add
(
input1
,
input2
)
return
self
.
reduce_max
(
out
,
self
.
axis
)
class
MaxFactory
:
def
__init__
(
self
,
input_shape
,
axis
,
keep_dims
,
strategy0
,
strategy1
):
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
self
.
axis
=
axis
self
.
keep_dims
=
keep_dims
input_size
=
1
prefix
=
""
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
+
"_"
input_size
=
input_size
*
s
number_range
=
min
(
1000
,
input_size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
input_size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
self
.
input_np1
.
copy
()
self
.
out_grad_np
=
None
out_shape
=
list
(
input_shape
)
out_shape
.
pop
(
axis
)
out_size
=
input_size
/
input_shape
[
axis
]
number_range_
=
min
(
1000
,
out_size
)
self
.
out_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
out_size
)
%
number_range_
-
number_range_
/
2
,
out_shape
).
astype
(
np
.
float32
)
out_strategy
=
list
(
strategy1
[
1
])
out_strategy
.
pop
(
axis
)
self
.
out_strategy
=
out_strategy
need_dev_num
=
1
need_dev_num_
=
1
for
s
in
strategy0
[
1
]:
need_dev_num
=
need_dev_num
*
s
for
s
in
out_strategy
:
need_dev_num_
=
need_dev_num_
*
s
self
.
x_id
=
device_id
%
need_dev_num
self
.
y_id
=
device_id
%
need_dev_num
self
.
out_id
=
device_id
%
need_dev_num_
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_mindspore_impl
(
self
):
input1
=
Tensor
(
self
.
input_np1
)
input2
=
Tensor
(
self
.
input_np2
)
net
=
Max
(
axis
=
self
.
axis
,
keep_dims
=
self
.
keep_dims
)
out
=
net
(
input1
,
input2
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
xs
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
ys
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
xs
[
self
.
x_id
])
y1
=
Tensor
(
ys
[
self
.
y_id
])
net
=
Max
(
axis
=
self
.
axis
,
keep_dims
=
self
.
keep_dims
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
input1
=
Tensor
(
self
.
input_np1
)
input2
=
Tensor
(
self
.
input_np2
)
out_grad
=
Tensor
(
self
.
out_grad_np
)
net
=
Max
(
axis
=
self
.
axis
,
keep_dims
=
self
.
keep_dims
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
input1
,
input2
,
out_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grads
=
self
.
get_parallel_blocks
(
self
.
out_grad_np
,
self
.
out_strategy
)
out_grad
=
Tensor
(
output_grads
[
self
.
out_id
])
xs
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
ys
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
xs
[
self
.
x_id
])
y1
=
Tensor
(
ys
[
self
.
y_id
])
net
=
Max
(
axis
=
self
.
axis
,
keep_dims
=
self
.
keep_dims
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
out_grad
,
parallel_inputs_compile
=
[
x
,
y
,
out_grad
],
parallel_inputs_run
=
[
x1
,
y1
,
out_grad
])
return
input_grad
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
print
(
out_mindspore
)
print
(
out_mindspore_parallel
)
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
assert
np
.
allclose
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
def
test_reid_max_forward_input_256_64
():
fact
=
MaxFactory
(
input_shape
=
(
256
,
64
),
axis
=
1
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
4
,
1
)))
fact
.
forward_cmp
()
def
test_reid_max_grad_input_256_64
():
fact
=
MaxFactory
(
input_shape
=
(
256
,
64
),
axis
=
1
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
4
,
1
)))
fact
.
grad_cmp
()
def
test_reid_max_forward_input_128_64_32_32
():
fact
=
MaxFactory
(
input_shape
=
(
128
,
64
,
32
,
32
),
axis
=
3
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
2
,
1
,
2
,
1
),
(
2
,
1
,
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
,
2
,
1
)))
fact
.
forward_cmp
()
def
test_reid_max_grad_input_128_64_32_32
():
fact
=
MaxFactory
(
input_shape
=
(
128
,
64
,
32
,
32
),
axis
=
3
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
2
,
1
,
2
,
1
),
(
2
,
1
,
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
,
2
,
1
)))
fact
.
grad_cmp
()
def
test_reid_max_forward_input_256_64_repeat
():
fact
=
MaxFactory
(
input_shape
=
(
256
,
64
),
axis
=
1
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
forward_cmp
()
def
test_reid_max_grad_input_256_64_repeat
():
fact
=
MaxFactory
(
input_shape
=
(
256
,
64
),
axis
=
1
,
keep_dims
=
False
,
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_max_parallel_4p.py>../../log/test_max_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_mul_softmax_parallel_4p.py>../../log/test_mul_softmax_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
pytest
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
MulSoftmax
(
Cell
):
def
__init__
(
self
,
strategy0
=
None
,
strategy1
=
None
,
axis
=
0
):
super
(
MulSoftmax
,
self
).
__init__
()
self
.
mul
=
P
.
Mul
(
strategy
=
strategy0
)
self
.
softmax
=
P
.
Softmax
(
axis
=
axis
,
strategy
=
strategy1
)
def
construct
(
self
,
x
,
z
):
out
=
self
.
mul
(
x
,
z
)
return
self
.
softmax
(
out
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
MulSoftmaxFactory
:
def
__init__
(
self
,
input_shape
,
strategy0
,
strategy1
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
1.0
self
.
output_grad_np
=
np
.
reshape
((
np
.
arange
(
0
,
size
)
%
(
number_range
-
10
)
-
number_range
/
2
)
*
0.1
,
input_shape
).
astype
(
np
.
float32
)
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
need_dev_num
=
1
need_dev_num_
=
1
for
s
in
strategy0
[
1
]:
need_dev_num
=
need_dev_num
*
s
for
s
in
strategy1
[
1
]:
need_dev_num_
=
need_dev_num_
*
s
self
.
x_id
=
device_id
%
need_dev_num
self
.
y_id
=
device_id
%
need_dev_num
self
.
out_id
=
device_id
%
need_dev_num_
def
forward_mindspore_impl
(
self
):
net
=
MulSoftmax
()
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
net
=
MulSoftmax
(
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
output_grad
=
Tensor
(
self
.
output_grad_np
)
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
net
=
MulSoftmax
()
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
output_grads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
strategy1
[
1
])
output_grad
=
Tensor
(
output_grads
[
self
.
out_id
])
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
net
=
MulSoftmax
(
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_train
()
grad_net
.
set_auto_parallel
()
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad
])
return
input_grad
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
np
.
save
(
path
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_forward_parallel.npy"
,
out_mindspore_parallel
)
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
strategy1
[
1
])
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
np
.
save
(
path
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_parallel0.npy"
,
input_grad_mindspore_parallel0
)
np
.
save
(
path
+
str
(
device_id
)
+
"_"
+
self
.
prefix
+
"_grad_parallel1.npy"
,
input_grad_mindspore_parallel1
)
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
# 这里由于TensorMul两个输入X1没做广播,X2做了广播
assert
np
.
allclose
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input_grad_mindspore1
,
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
@
pytest
.
mark
.
reid_forward
def
test_reid_mul_softmax_input_128x64
():
stra0
=
(
0
,
(
1
,
4
),
())
stra1
=
(
0
,
(
1
,
4
))
fact
=
MulSoftmaxFactory
(
input_shape
=
(
128
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
forward_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_grad_mul_softmax_input_128x64
():
stra0
=
(
0
,
(
1
,
4
),
())
stra1
=
(
0
,
(
1
,
4
))
fact
=
MulSoftmaxFactory
(
input_shape
=
(
128
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
grad_cmp
()
@
pytest
.
mark
.
reid_forward
def
test_reid_mul_softmax_input_128x64_all_to_all
():
stra0
=
(
0
,
(
4
,
1
),
())
stra1
=
(
0
,
(
1
,
4
))
fact
=
MulSoftmaxFactory
(
input_shape
=
(
128
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
forward_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_grad_mul_softmax_input_128x64_all_to_all
():
stra0
=
(
0
,
(
4
,
1
),
())
stra1
=
(
0
,
(
1
,
4
))
fact
=
MulSoftmaxFactory
(
input_shape
=
(
128
,
64
),
strategy0
=
stra0
,
strategy1
=
stra1
)
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Onehot
(
Cell
):
def
__init__
(
self
,
axis
=-
1
,
depth
=
1
,
on_value
=
1.0
,
off_value
=
0.0
,
strategy
=
None
):
super
(
Onehot
,
self
).
__init__
()
self
.
onehot
=
P
.
OneHot
(
axis
,
strategy
=
strategy
)
self
.
depth
=
depth
self
.
on_value
=
Tensor
(
on_value
,
ms
.
float32
)
self
.
off_value
=
Tensor
(
off_value
,
ms
.
float32
)
def
construct
(
self
,
indices
):
return
self
.
onehot
(
indices
,
self
.
depth
,
self
.
on_value
,
self
.
off_value
)
class
OneHotFactory
:
def
__init__
(
self
,
input_shape
,
depth
,
on_value
=
1.0
,
off_value
=
0.0
,
axis
=
None
,
dtype
=
None
,
strategy0
=
None
):
size
=
1
prefix
=
""
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
10
,
size
)
self
.
input_np
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
,
input_shape
).
astype
(
np
.
int32
)
self
.
depth
=
depth
self
.
on_value
=
on_value
self
.
off_value
=
off_value
self
.
axis
=
axis
self
.
dtype
=
dtype
self
.
strategy0
=
strategy0
need_dev_num
=
1
for
s
in
strategy0
[
1
]:
need_dev_num
=
need_dev_num
*
s
self
.
x_id
=
device_id
%
need_dev_num
self
.
out_id
=
device_id
%
need_dev_num
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
grad_mindspore_impl
(
self
):
output_grad
=
Tensor
(
self
.
output_grad_np
)
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
,
ms
.
float32
)
net
=
AddRelu
()
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
forward_mindspore_impl
(
self
):
indices
=
Tensor
(
self
.
input_np
)
net
=
Onehot
(
axis
=
self
.
axis
,
depth
=
self
.
depth
,
on_value
=
self
.
on_value
,
off_value
=
self
.
off_value
)
out
=
net
(
indices
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
net
=
Onehot
(
axis
=
self
.
axis
,
depth
=
self
.
depth
,
on_value
=
self
.
on_value
,
off_value
=
self
.
off_value
,
strategy
=
self
.
strategy0
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
parallel_inputs_compile
=
[
x
],
parallel_inputs_run
=
[
x1
])
return
out
.
asnumpy
()
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
strategy0
[
1
])
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.0001
)
def
test_reid_onehot_forward_int32_128_depth13000
():
fact
=
OneHotFactory
(
input_shape
=
(
128
,),
depth
=
131072
,
on_value
=
1.000000
,
off_value
=
0.000000
,
axis
=-
1
,
dtype
=
"float32"
,
strategy0
=
(
0
,
(
2
,)))
fact
.
forward_cmp
()
def
test_reid_onehot_forward_int32_131072_depth127
():
fact
=
OneHotFactory
(
input_shape
=
(
131072
,),
depth
=
127
,
on_value
=
1.000000
,
off_value
=
0.000000
,
axis
=-
1
,
dtype
=
"float32"
,
strategy0
=
(
0
,
(
4
,)))
fact
.
forward_cmp
()
tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_onehot_parallel_4p.py>../../log/test_onehot_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
pytest
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
PReLU
(
Cell
):
def
__init__
(
self
,
channel
=
1
,
w
=
0.25
,
strategy_
=
None
,
strategy1_
=
None
):
super
(
PReLU
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy1_
)
self
.
prelu
=
P
.
PReLU
(
strategy
=
strategy_
)
self
.
channel
=
channel
def
construct
(
self
,
x
,
z
,
w
):
out
=
self
.
add
(
x
,
z
)
return
self
.
prelu
(
out
,
w
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
input_
,
z
,
w
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
input_
,
z
,
w
,
output_grad
)
class
PReLUFactory
:
def
__init__
(
self
,
input_shape
,
strategy
):
n
,
c
=
input_shape
[:
2
]
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
output_grad_np
=
np
.
reshape
((
np
.
arange
(
0
,
size
)
%
(
number_range
-
10
)
-
number_range
/
2
)
*
0.1
,
input_shape
).
astype
(
np
.
float32
)
self
.
channel
=
c
self
.
weight
=
np
.
array
([
np
.
float32
(
0.25
)]
*
c
)
self
.
strategy
=
strategy
def
forward_mindspore_impl
(
self
):
net
=
PReLU
(
channel
=
self
.
channel
,
w
=
self
.
weight
)
x
=
Tensor
(
self
.
input_np
)
z
=
Tensor
(
np
.
zeros
(
self
.
input_np
.
shape
),
ms
.
float32
)
w
=
Tensor
(
self
.
weight
)
out
=
net
(
x
,
z
,
w
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
net
=
PReLU
(
channel
=
self
.
channel
,
w
=
self
.
weight
,
strategy_
=
self
.
strategy
,
strategy1_
=
(
self
.
strategy
[
0
],
self
.
strategy
[
1
],
self
.
strategy
[
1
]))
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
x
=
Tensor
(
self
.
input_np
)
z
=
Tensor
(
np
.
zeros
(
self
.
input_np
.
shape
),
ms
.
float32
)
w
=
Tensor
(
self
.
weight
)
inputs
=
self
.
get_parallel_blocks
(
self
.
input_np
,
self
.
strategy
[
1
])
block_id
=
device_id
%
len
(
inputs
)
x1
=
Tensor
(
inputs
[
block_id
])
z1
=
Tensor
(
np
.
zeros
(
inputs
[
block_id
].
shape
),
ms
.
float32
)
w1
=
Tensor
(
self
.
weight
)
out
=
net
(
x
,
z
,
w
,
parallel_inputs_compile
=
[
x
,
z
,
w
],
parallel_inputs_run
=
[
x1
,
z1
,
w1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
output_grad
=
Tensor
(
self
.
output_grad_np
)
x
=
Tensor
(
self
.
input_np
)
z
=
Tensor
(
np
.
zeros
(
self
.
input_np
.
shape
),
ms
.
float32
)
w
=
Tensor
(
self
.
weight
)
net
=
PReLU
(
channel
=
self
.
channel
,
w
=
self
.
weight
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
z
,
w
,
output_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
output_grads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
strategy
[
1
])
block_id
=
device_id
%
len
(
output_grads
)
output_grad
=
Tensor
(
output_grads
[
block_id
])
x
=
Tensor
(
self
.
input_np
)
z
=
Tensor
(
np
.
zeros
(
self
.
input_np
.
shape
),
ms
.
float32
)
w
=
Tensor
(
self
.
weight
)
net
=
PReLU
(
channel
=
self
.
channel
,
w
=
self
.
weight
,
strategy_
=
self
.
strategy
,
strategy1_
=
(
self
.
strategy
[
0
],
self
.
strategy
[
1
],
self
.
strategy
[
1
]))
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
inputs
=
self
.
get_parallel_blocks
(
self
.
input_np
,
self
.
strategy
[
1
])
x1
=
Tensor
(
inputs
[
block_id
])
z1
=
Tensor
(
np
.
zeros
(
inputs
[
block_id
].
shape
),
ms
.
float32
)
w1
=
Tensor
(
self
.
weight
)
input_grad
=
grad_net
(
x
,
z
,
w
,
output_grad
,
parallel_inputs_compile
=
[
x
,
z
,
w
,
output_grad
],
parallel_inputs_run
=
[
x1
,
z1
,
w1
,
output_grad
])
return
input_grad
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
strategy
[
1
])
block_id
=
device_id
%
len
(
out_blocks
)
assert
np
.
allclose
(
out_blocks
[
block_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore2
=
input_grad_mindspore
[
2
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_mindspore_parallel2
=
input_grad_mindspore_parallel
[
2
].
asnumpy
()
input_grad_blocks
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy
[
1
])
input1_grad_blocks
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy
[
1
])
block_id
=
device_id
%
len
(
input_grad_blocks
)
assert
np
.
allclose
(
input_grad_blocks
[
block_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input_grad_mindspore2
,
input_grad_mindspore_parallel2
,
0.0001
,
0.0001
)
assert
np
.
allclose
(
input1_grad_blocks
[
block_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
@
pytest
.
mark
.
reid_grad
def
test_reid_prelu_input_128x64x112x112_repeat
():
stra
=
(
0
,
(
1
,
1
,
2
,
1
),
(
1
))
fact
=
PReLUFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
strategy
=
stra
)
fact
.
forward_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_grad_prelu_input_128x64x112x112_repeat
():
stra
=
(
0
,
(
1
,
1
,
2
,
1
),
(
1
))
fact
=
PReLUFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
strategy
=
stra
)
fact
.
grad_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_prelu_input_128x64x112x112_mix
():
stra
=
(
0
,
(
2
,
1
,
1
,
2
),
(
1
))
fact
=
PReLUFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
strategy
=
stra
)
fact
.
forward_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_grad_prelu_input_128x64x112x112_mix
():
stra
=
(
0
,
(
2
,
1
,
1
,
2
),
(
1
))
fact
=
PReLUFactory
(
input_shape
=
(
128
,
64
,
112
,
112
),
strategy
=
stra
)
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_prelu_parallel_4p.py
>
../../log/test_prelu_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
from
numpy
import
allclose
as
allclose_nparray
import
mindspore
as
ms
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
GradScalar
(
Cell
):
def
__init__
(
self
,
network
):
super
(
GradScalar
,
self
).
__init__
()
self
.
network
=
network
self
.
sens
=
Tensor
([
1.0
],
dtype
=
ms
.
float32
)
def
construct
(
self
,
x
,
y
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
self
.
sens
)
class
ReduceMean
(
Cell
):
def
__init__
(
self
,
keep_dims
,
axis
,
strategy0
=
None
,
strategy1
=
None
):
super
(
ReduceMean
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
reduce_mean
=
P
.
ReduceMean
(
keep_dims
=
keep_dims
).
set_strategy
(
strategy
=
strategy1
)
self
.
axis
=
axis
def
construct
(
self
,
x
,
y
):
out
=
self
.
add
(
x
,
y
)
return
self
.
reduce_mean
(
out
,
self
.
axis
)
class
ReduceMeanFactory
:
def
__init__
(
self
,
input_shape
,
keep_dims
,
axis
,
strategy0
=
None
,
strategy1
=
None
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
4
,
input_shape
).
astype
(
np
.
float32
)
self
.
keep_dims
=
keep_dims
self
.
axis
=
axis
target_shape
=
self
.
input_np1
.
mean
(
axis
=
axis
,
keepdims
=
keep_dims
).
shape
target_size
=
1
for
s
in
target_shape
:
target_size
=
target_size
*
s
number_range
=
min
(
1000
,
target_size
)
self
.
output_grad_np
=
np
.
array
([
1.0
],
dtype
=
np
.
float32
)
if
len
(
target_shape
)
>
0
:
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
target_size
)
%
number_range
,
target_shape
).
astype
(
np
.
float32
)
+
1.0
self
.
shape
=
target_shape
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
out_strategy
=
[]
axis_
=
list
(
axis
)
if
axis_
[
0
]
==
-
1
:
axis_
[
0
]
=
len
(
input_shape
)
-
1
for
i
in
range
(
0
,
len
(
input_shape
)):
if
i
in
axis_
:
if
keep_dims
:
out_strategy
.
append
(
1
)
else
:
out_strategy
.
append
(
strategy1
[
1
][
i
])
self
.
out_strategy
=
out_strategy
need_dev_num0
=
1
need_dev_num1
=
1
for
s
in
strategy0
[
1
]:
need_dev_num0
=
need_dev_num0
*
s
for
s
in
out_strategy
:
need_dev_num1
=
need_dev_num1
*
s
self
.
x_id
=
device_id
%
need_dev_num0
self
.
y_id
=
device_id
%
need_dev_num0
block_id
=
device_id
%
need_dev_num0
device_index
=
self
.
id_to_list
(
block_id
,
self
.
strategy1
[
1
])
print
(
device_index
)
for
i
in
axis
:
device_index
[
i
]
=
0
print
(
device_index
)
self
.
out_id
=
self
.
list_to_id
(
device_index
,
self
.
out_strategy
)
print
(
self
.
out_id
)
def
id_to_list
(
self
,
id_
,
shape
):
result
=
[]
r
=
id_
for
i
in
range
(
0
,
len
(
shape
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
shape
)):
v
=
v
*
shape
[
j
]
result
.
append
(
r
//
v
)
r
=
r
%
v
return
result
def
list_to_id
(
self
,
id_list
,
shape
):
result
=
0
for
i
in
range
(
0
,
len
(
id_list
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
id_list
)):
v
=
v
*
shape
[
j
]
result
=
result
+
id_list
[
i
]
*
v
return
result
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
net
=
ReduceMean
(
keep_dims
=
self
.
keep_dims
,
axis
=
self
.
axis
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
net
=
ReduceMean
(
keep_dims
=
self
.
keep_dims
,
axis
=
self
.
axis
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
out_grad
=
Tensor
(
self
.
output_grad_np
)
net
=
ReduceMean
(
keep_dims
=
self
.
keep_dims
,
axis
=
self
.
axis
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
out_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
outgrads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad1
=
Tensor
(
outgrads
[
self
.
out_id
])
net
=
ReduceMean
(
keep_dims
=
self
.
keep_dims
,
axis
=
self
.
axis
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
return
input_grad
def
forward_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
assert
allclose_nparray
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
allclose_nparray
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
def
test_reid_reducemean_input_64x16
():
fact
=
ReduceMeanFactory
(
input_shape
=
(
64
*
16
,),
keep_dims
=
False
,
axis
=
(
-
1
,),
strategy0
=
(
0
,
(
4
,),
(
4
,)),
strategy1
=
(
0
,
(
4
,)))
fact
.
forward_cmp
()
def
test_grad_reid_reducemean_input_64x16
():
fact
=
ReduceMeanFactory
(
input_shape
=
(
64
*
16
,),
keep_dims
=
False
,
axis
=
(
-
1
,),
strategy0
=
(
0
,
(
4
,),
(
4
,)),
strategy1
=
(
0
,
(
4
,)))
fact
.
grad_cmp
()
def
test_reid_reducemean_input_64x128x28x28
():
fact
=
ReduceMeanFactory
(
input_shape
=
(
64
,
128
,
32
,
32
),
keep_dims
=
True
,
axis
=
(
2
,
3
),
strategy0
=
(
0
,
(
2
,
1
,
2
,
1
),
(
2
,
1
,
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
,
2
,
1
)))
fact
.
forward_cmp
()
def
test_grad_reid_reducemean_input_64x128x28x28
():
fact
=
ReduceMeanFactory
(
input_shape
=
(
64
,
128
,
32
,
32
),
keep_dims
=
True
,
axis
=
(
2
,
3
),
strategy0
=
(
0
,
(
2
,
1
,
2
,
1
),
(
2
,
1
,
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
,
2
,
1
)))
fact
.
grad_cmp
()
tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_reducemean_parallel_4p.py>../../log/test_reducemean_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
pytest
from
numpy
import
allclose
as
allclose_nparray
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
Reshape
(
Cell
):
def
__init__
(
self
,
target_shape
,
strategy0
=
None
,
strategy1
=
None
):
super
(
Reshape
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
reshape
=
P
.
Reshape
(
strategy
=
strategy1
)
self
.
shape
=
tuple
(
target_shape
)
def
construct
(
self
,
input1
,
input2
):
x
=
self
.
add
(
input1
,
input2
)
return
self
.
reshape
(
x
,
self
.
shape
)
class
ReshapeFactory
:
def
__init__
(
self
,
input_shape
,
target_shape
,
strategy0
,
strategy1
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
4
,
input_shape
).
astype
(
np
.
float32
)
target_size
=
1
for
s
in
target_shape
:
target_size
=
target_size
*
s
number_range
=
min
(
1000
,
target_size
)
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
target_size
)
%
number_range
-
number_range
/
2
,
target_shape
).
astype
(
np
.
float32
)
self
.
target_shape
=
target_shape
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
out_strategy
=
[
1
]
*
len
(
target_shape
)
out_strategy
[
0
]
=
strategy1
[
1
][
0
]
self
.
out_strategy
=
out_strategy
need_dev_num0
=
1
need_dev_num1
=
1
for
s
in
strategy0
[
1
]:
need_dev_num0
=
need_dev_num0
*
s
for
s
in
out_strategy
:
need_dev_num1
=
need_dev_num1
*
s
self
.
x_id
=
device_id
%
need_dev_num0
self
.
y_id
=
device_id
%
need_dev_num0
self
.
out_id
=
device_id
%
need_dev_num1
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
forward_reshape_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
net
=
Reshape
(
self
.
target_shape
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_reshape_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
net
=
Reshape
(
self
.
target_shape
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_reshape_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
net
=
Reshape
(
self
.
target_shape
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
grad_reshape_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
outgrads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad1
=
Tensor
(
outgrads
[
self
.
out_id
])
net
=
Reshape
(
self
.
target_shape
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
return
input_grad
def
forward_reshape_cmp
(
self
):
out_mindspore
=
self
.
forward_reshape_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_reshape_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_reshape_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_reshape_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_reshape_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
assert
allclose_nparray
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
allclose_nparray
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
@
pytest
.
mark
.
reid_forward
def
test_reid_reshape_input_128x512x7x7_target_128x25088
():
fact
=
ReshapeFactory
(
input_shape
=
(
128
,
512
,
7
,
7
),
target_shape
=
(
128
,
25088
),
strategy0
=
(
0
,
(
4
,
1
,
1
,
1
),
(
4
,
1
,
1
,
1
)),
strategy1
=
(
0
,
(
4
,
1
,
1
,
1
)))
fact
.
forward_reshape_cmp
()
def
test_reid_reshape_grad_input_128x512x7x7_target_128x25088
():
fact
=
ReshapeFactory
(
input_shape
=
(
128
,
512
,
7
,
7
),
target_shape
=
(
128
,
25088
),
strategy0
=
(
0
,
(
4
,
1
,
1
,
1
),
(
4
,
1
,
1
,
1
)),
strategy1
=
(
0
,
(
4
,
1
,
1
,
1
)))
fact
.
grad_reshape_cmp
()
@
pytest
.
mark
.
reid_forward
def
test_reid_reshape_input_128x64_target_128x64x1x1
():
fact
=
ReshapeFactory
(
input_shape
=
(
128
,
64
),
target_shape
=
(
128
,
64
,
1
,
1
),
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
forward_reshape_cmp
()
@
pytest
.
mark
.
reid_grad
def
test_reid_reshape_grad_input_128x64_target_128x64x1x1
():
fact
=
ReshapeFactory
(
input_shape
=
(
128
,
64
),
target_shape
=
(
128
,
64
,
1
,
1
),
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
grad_reshape_cmp
()
tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_reshape_parallel_4p.py>../../log/test_reshape_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py
已删除
100644 → 0
浏览文件 @
f967700e
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
from
numpy
import
allclose
as
allclose_nparray
import
mindspore.communication.management
as
distributedTool
from
mindspore
import
context
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn
import
Cell
from
mindspore.ops
import
operations
as
P
from
mindspore.ops.composite
import
grad_all_with_sens
device_num
=
4
device_id
=
int
(
os
.
environ
[
"RANK_ID"
])
path
=
"./output/"
def
setup_module
():
print
(
"~~~~~~~~~~~set up~~~~~~~~~~~~~"
)
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
)
context
.
set_auto_parallel_context
(
device_num
=
device_num
,
global_rank
=
device_id
)
distributedTool
.
init
()
distributedTool
.
create_group
(
"0-3"
,
[
0
,
1
,
2
,
3
])
print
(
"~~~~~~~~~~~set up finished~~~~~~~~~~~~~"
)
def
teardown_module
():
print
(
"~~~~~~~~~~~~tear down~~~~~~~~~~"
)
class
Net
(
Cell
):
def
__init__
(
self
,
perm_in
,
strategy0
=
None
,
strategy1
=
None
):
super
(
Net
,
self
).
__init__
()
self
.
add
=
P
.
TensorAdd
(
strategy
=
strategy0
)
self
.
transpose
=
P
.
Transpose
(
strategy
=
strategy1
)
self
.
perm_in
=
perm_in
def
construct
(
self
,
x
,
y
):
out
=
self
.
add
(
x
,
y
)
return
self
.
transpose
(
out
,
self
.
perm_in
)
class
Grad
(
Cell
):
def
__init__
(
self
,
network
):
super
(
Grad
,
self
).
__init__
()
self
.
network
=
network
def
construct
(
self
,
x
,
y
,
output_grad
):
return
grad_all_with_sens
(
self
.
network
)(
x
,
y
,
output_grad
)
class
TransposeFactory
:
def
__init__
(
self
,
input_shape
,
perm_in
,
strategy0
,
strategy1
):
prefix
=
""
size
=
1
for
s
in
input_shape
:
prefix
=
prefix
+
str
(
s
)
size
=
size
*
s
self
.
prefix
=
prefix
number_range
=
min
(
1000
,
size
)
self
.
input_np1
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
2
,
input_shape
).
astype
(
np
.
float32
)
self
.
input_np2
=
np
.
reshape
(
np
.
arange
(
0
,
size
)
%
number_range
-
number_range
/
4
,
input_shape
).
astype
(
np
.
float32
)
target_shape
=
self
.
input_np1
.
transpose
(
perm_in
).
shape
target_size
=
1
for
s
in
target_shape
:
target_size
=
target_size
*
s
number_range
=
min
(
1000
,
target_size
)
self
.
target_shape
=
target_shape
self
.
output_grad_np
=
np
.
reshape
(
np
.
arange
(
0
,
target_size
)
%
number_range
-
number_range
/
2
,
target_shape
).
astype
(
np
.
float32
)
self
.
perm_in
=
perm_in
self
.
strategy0
=
strategy0
self
.
strategy1
=
strategy1
out_strategy
=
[]
for
i
in
perm_in
:
out_strategy
.
append
(
strategy1
[
1
][
i
])
self
.
out_strategy
=
out_strategy
need_dev_num0
=
1
need_dev_num1
=
1
for
s
in
strategy0
[
1
]:
need_dev_num0
=
need_dev_num0
*
s
for
s
in
out_strategy
:
need_dev_num1
=
need_dev_num1
*
s
self
.
x_id
=
device_id
%
need_dev_num0
self
.
y_id
=
device_id
%
need_dev_num0
device_index
=
self
.
id_to_list
(
device_id
%
need_dev_num1
,
self
.
strategy1
[
1
])
# encoding to get the index before transpose
device_index_transpose
=
[]
for
i
in
perm_in
:
device_index_transpose
.
append
(
device_index
[
i
])
self
.
out_id
=
self
.
list_to_id
(
device_index_transpose
,
self
.
out_strategy
)
def
get_parallel_blocks
(
self
,
input_
,
strategy
):
blocks
=
[
input_
]
i
=
0
for
stra
in
strategy
:
temp
=
[]
while
len
(
blocks
)
>
0
:
block
=
blocks
.
pop
(
0
)
temp
.
extend
(
np
.
split
(
block
,
stra
,
axis
=
i
))
blocks
.
extend
(
temp
)
i
+=
1
return
blocks
def
id_to_list
(
self
,
id_
,
shape
):
result
=
[]
r
=
id_
for
i
in
range
(
0
,
len
(
shape
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
shape
)):
v
=
v
*
shape
[
j
]
result
.
append
(
r
//
v
)
r
=
r
%
v
return
result
def
list_to_id
(
self
,
id_list
,
shape
):
result
=
0
for
i
in
range
(
0
,
len
(
id_list
)):
v
=
1
for
j
in
range
(
i
+
1
,
len
(
id_list
)):
v
=
v
*
shape
[
j
]
result
=
result
+
id_list
[
i
]
*
v
return
result
def
forward_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
net
=
Net
(
self
.
perm_in
)
out
=
net
(
x
,
y
)
return
out
.
asnumpy
()
def
forward_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
net
=
Net
(
self
.
perm_in
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
net
.
set_auto_parallel
()
out
=
net
(
x
,
y
,
parallel_inputs_compile
=
[
x
,
y
],
parallel_inputs_run
=
[
x1
,
y1
])
return
out
.
asnumpy
()
def
grad_mindspore_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
net
=
Net
(
self
.
perm_in
)
grad_net
=
Grad
(
net
)
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
)
return
input_grad
def
grad_mindspore_parallel_impl
(
self
):
x
=
Tensor
(
self
.
input_np1
)
y
=
Tensor
(
self
.
input_np2
)
output_grad
=
Tensor
(
self
.
output_grad_np
)
inputs_x
=
self
.
get_parallel_blocks
(
self
.
input_np1
,
self
.
strategy0
[
1
])
inputs_y
=
self
.
get_parallel_blocks
(
self
.
input_np2
,
self
.
strategy0
[
1
])
outgrads
=
self
.
get_parallel_blocks
(
self
.
output_grad_np
,
self
.
out_strategy
)
x1
=
Tensor
(
inputs_x
[
self
.
x_id
])
y1
=
Tensor
(
inputs_y
[
self
.
y_id
])
output_grad1
=
Tensor
(
outgrads
[
self
.
out_id
])
net
=
Net
(
self
.
perm_in
,
strategy0
=
self
.
strategy0
,
strategy1
=
self
.
strategy1
)
grad_net
=
Grad
(
net
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
)
grad_net
.
set_auto_parallel
()
grad_net
.
set_train
()
input_grad
=
grad_net
(
x
,
y
,
output_grad
,
parallel_inputs_compile
=
[
x
,
y
,
output_grad1
],
parallel_inputs_run
=
[
x1
,
y1
,
output_grad1
])
return
input_grad
def
forward_transpose_cmp
(
self
):
out_mindspore
=
self
.
forward_mindspore_impl
()
out_mindspore_parallel
=
self
.
forward_mindspore_parallel_impl
()
out_blocks
=
self
.
get_parallel_blocks
(
out_mindspore
,
self
.
out_strategy
)
assert
np
.
allclose
(
out_blocks
[
self
.
out_id
],
out_mindspore_parallel
,
0.0001
,
0.001
)
def
grad_transpose_cmp
(
self
):
input_grad_mindspore
=
self
.
grad_mindspore_impl
()
input_grad_mindspore_parallel
=
self
.
grad_mindspore_parallel_impl
()
input_grad_mindspore0
=
input_grad_mindspore
[
0
].
asnumpy
()
input_grad_mindspore1
=
input_grad_mindspore
[
1
].
asnumpy
()
input_grad_mindspore_parallel0
=
input_grad_mindspore_parallel
[
0
].
asnumpy
()
input_grad_mindspore_parallel1
=
input_grad_mindspore_parallel
[
1
].
asnumpy
()
input_grad_blocks_0
=
self
.
get_parallel_blocks
(
input_grad_mindspore0
,
self
.
strategy0
[
1
])
input_grad_blocks_1
=
self
.
get_parallel_blocks
(
input_grad_mindspore1
,
self
.
strategy0
[
2
])
assert
allclose_nparray
(
input_grad_blocks_0
[
self
.
x_id
],
input_grad_mindspore_parallel0
,
0.0001
,
0.0001
)
assert
allclose_nparray
(
input_grad_blocks_1
[
self
.
y_id
],
input_grad_mindspore_parallel1
,
0.0001
,
0.0001
)
def
test_reid_transpose_input_256x512_output_512x256_perm_1x0
():
fact
=
TransposeFactory
((
256
,
512
),
(
1
,
0
),
strategy0
=
(
0
,
(
2
,
2
),
(
2
,
2
)),
strategy1
=
(
0
,
(
2
,
2
)))
fact
.
forward_transpose_cmp
()
def
test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0
():
fact
=
TransposeFactory
((
256
,
512
),
(
1
,
0
),
strategy0
=
(
0
,
(
2
,
2
),
(
2
,
2
)),
strategy1
=
(
0
,
(
2
,
2
)))
fact
.
grad_transpose_cmp
()
def
test_reid_transpose_input_512x256_output_256x512_perm_1x0
():
fact
=
TransposeFactory
((
512
,
256
),
(
1
,
0
),
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
1
,
4
)))
fact
.
forward_transpose_cmp
()
def
test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0
():
fact
=
TransposeFactory
((
512
,
256
),
(
1
,
0
),
strategy0
=
(
0
,
(
4
,
1
),
(
4
,
1
)),
strategy1
=
(
0
,
(
1
,
4
)))
fact
.
grad_transpose_cmp
()
def
test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat
():
fact
=
TransposeFactory
((
512
,
256
),
(
1
,
0
),
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
forward_transpose_cmp
()
def
test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat
():
fact
=
TransposeFactory
((
512
,
256
),
(
1
,
0
),
strategy0
=
(
0
,
(
2
,
1
),
(
2
,
1
)),
strategy1
=
(
0
,
(
2
,
1
)))
fact
.
grad_transpose_cmp
()
tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh
已删除
100644 → 0
浏览文件 @
f967700e
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for
((
i
=
0
;
i<4
;
i++
))
;
do
rm
-rf
device
$i
mkdir
device
$i
cd
device
$i
mkdir
output
source
../../dist_env_4p.sh
$i
env
>
log
$i
.log
pytest
-s
../test_transpose_parallel_4p.py>../../log/test_transpose_parallel_4p_log
$i
.log 2>&1 &
cd
..
done
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录