Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
兔爷不爱我
mindspore
提交
b10d4d6e
M
mindspore
项目概览
兔爷不爱我
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b10d4d6e
编写于
7月 30, 2020
作者:
Z
ZPaC
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add ps ci test cases.
上级
22927dc4
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
480 addition
and
24 deletion
+480
-24
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
.../kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
+3
-3
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
...end/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
+5
-2
mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
...pore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
+10
-10
model_zoo/official/recommend/wide_and_deep/script/run_parameter_server_train_cluster.sh
...ide_and_deep/script/run_parameter_server_train_cluster.sh
+75
-0
tests/st/ps/cmp_sparse_embedding/shell_run_test.sh
tests/st/ps/cmp_sparse_embedding/shell_run_test.sh
+55
-0
tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
...s/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
+106
-0
tests/st/ps/cmp_sparse_embedding/test_entry_cmp_sparse_embedding.py
...s/cmp_sparse_embedding/test_entry_cmp_sparse_embedding.py
+25
-0
tests/st/ps/full_ps/shell_run_test.sh
tests/st/ps/full_ps/shell_run_test.sh
+56
-0
tests/st/ps/full_ps/test_entry_full_ps_lenet.py
tests/st/ps/full_ps/test_entry_full_ps_lenet.py
+27
-0
tests/st/ps/multi_worker_full_ps/entry.py
tests/st/ps/multi_worker_full_ps/entry.py
+8
-6
tests/st/ps/multi_worker_full_ps/shell_run_test.sh
tests/st/ps/multi_worker_full_ps/shell_run_test.sh
+3
-3
tests/st/ps/multi_worker_full_ps/test_multi_worker_full_ps_lenet.py
...s/multi_worker_full_ps/test_multi_worker_full_ps_lenet.py
+107
-0
未找到文件。
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
浏览文件 @
b10d4d6e
...
...
@@ -25,9 +25,7 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
auto
input_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
0
);
auto
indices_shape
=
AnfAlgo
::
GetPrevNodeOutputInferShape
(
kernel_node
,
1
);
auto
output_shape
=
AnfAlgo
::
GetOutputInferShape
(
kernel_node
,
0
);
size_t
axis
=
kShape4dDims
-
input_shape
.
size
();
CPUKernelUtils
::
ExpandDimsTo4
(
&
input_shape
);
CPUKernelUtils
::
ExpandDimsTo4
(
&
output_shape
);
size_t
axis
=
kShape2dDims
-
input_shape
.
size
();
for
(
auto
dim
:
input_shape
)
{
input_dims_
*=
dim
;
}
...
...
@@ -40,6 +38,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
values
.
insert
(
values
.
end
(),
input_shape
.
begin
(),
input_shape
.
end
());
values
.
insert
(
values
.
end
(),
indices_shape
.
begin
(),
indices_shape
.
end
());
values
.
insert
(
values
.
end
(),
output_shape
.
begin
(),
output_shape
.
end
());
MS_LOG
(
INFO
)
<<
"Init embedding lookup proxy kernel, input shape:"
<<
input_shape
<<
", indices_shape:"
<<
indices_shape
<<
", output_shape:"
<<
output_shape
;
std
::
vector
<
int
>
lens
{
SizeToInt
(
input_shape
.
size
()),
SizeToInt
(
indices_shape
.
size
()),
SizeToInt
(
output_shape
.
size
())};
const
char
*
env_role
=
getenv
(
mindspore
::
parallel
::
ps
::
kEnvRole
);
if
(
env_role
!=
nullptr
&&
strcmp
(
env_role
,
mindspore
::
parallel
::
ps
::
kEnvRoleOfWorker
)
==
0
)
{
...
...
mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
浏览文件 @
b10d4d6e
...
...
@@ -25,11 +25,15 @@ namespace mindspore {
namespace
kernel
{
namespace
ps
{
using
mindspore
::
parallel
::
ps
::
Util
;
constexpr
int
kAxis
=
2
;
constexpr
int
kAxis
=
0
;
void
EmbeddingLookUpPSKernel
::
InitKernel
(
const
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
std
::
vector
<
size_t
>>>>
&
shapes
)
{
const
std
::
vector
<
std
::
shared_ptr
<
std
::
vector
<
size_t
>>>
&
shape_vec
=
*
shapes
;
input_shape_
=
*
(
shape_vec
[
0
]);
first_dim_size_
=
input_shape_
[
0
];
for
(
size_t
i
=
1
;
i
<
input_shape_
.
size
();
++
i
)
{
outer_dim_size_
*=
input_shape_
[
i
];
}
auto
indices_shape
=
*
(
shape_vec
[
1
]);
indices_lens_
=
1
;
for
(
auto
shape
:
indices_shape
)
{
...
...
@@ -49,7 +53,6 @@ void EmbeddingLookUpPSKernel::InitKernel(
size_t
output_size
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
sizeof
(
float
),
std
::
multiplies
<
size_t
>
());
output_size_list_
.
emplace_back
(
output_size
);
CPUKernelUtils
::
ExpandDimsTo4
(
&
input_shape_
);
}
void
EmbeddingLookUpPSKernel
::
ReInit
(
const
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
std
::
vector
<
size_t
>>>>
&
shapes
)
{
...
...
mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
浏览文件 @
b10d4d6e
...
...
@@ -77,7 +77,7 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
size_t
worker_num
)
{
AddressPtr
weight_addr
=
std
::
make_shared
<
kernel
::
Address
>
();
weight_addr
->
addr
=
weight
->
data
();
weight_addr
->
size
=
weight
->
size
();
weight_addr
->
size
=
weight
->
size
()
*
sizeof
(
float
)
;
AddressPtr
m
=
std
::
make_shared
<
kernel
::
Address
>
();
m
->
addr
=
new
float
[
weight
->
size
()];
m
->
size
=
weight
->
size
()
*
sizeof
(
float
);
...
...
@@ -156,7 +156,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
size_t
worker_num
)
{
AddressPtr
weight_addr
=
std
::
make_shared
<
kernel
::
Address
>
();
weight_addr
->
addr
=
weight
->
data
();
weight_addr
->
size
=
weight
->
size
();
weight_addr
->
size
=
weight
->
size
()
*
sizeof
(
float
)
;
AddressPtr
accum
=
std
::
make_shared
<
kernel
::
Address
>
();
accum
->
addr
=
new
float
[
weight
->
size
()];
accum
->
size
=
weight
->
size
()
*
sizeof
(
float
);
...
...
@@ -166,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
}
AddressPtr
linear
=
std
::
make_shared
<
kernel
::
Address
>
();
linear
->
addr
=
new
float
[
weight
->
size
()];
auto
ret
=
memset_s
(
linear
->
addr
,
weight
->
size
()
*
sizeof
(
float
),
0x00
,
weight
->
size
()
*
sizeof
(
float
));
int
ret
=
memset_s
(
linear
->
addr
,
weight
->
size
()
*
sizeof
(
float
),
0x00
,
weight
->
size
()
*
sizeof
(
float
));
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"memset_s error, errorno("
<<
ret
<<
")"
;
}
...
...
@@ -176,9 +176,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
size_t
total_grad_size
=
std
::
accumulate
((
*
grad_shape
).
begin
(),
(
*
grad_shape
).
end
(),
1
,
std
::
multiplies
<
size_t
>
());
AddressPtr
grad
=
std
::
make_shared
<
kernel
::
Address
>
();
grad
->
addr
=
new
float
[
total_grad_size
*
worker_num
];
auto
ret1
=
memcpy_s
(
grad
->
addr
,
lens
[
0
]
*
sizeof
(
float
),
values
.
data
(),
lens
[
0
]
*
sizeof
(
float
));
if
(
ret
1
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"memcpy_s error, errorno("
<<
ret
1
<<
")"
;
ret
=
memcpy_s
(
grad
->
addr
,
lens
[
0
]
*
sizeof
(
float
),
values
.
data
(),
lens
[
0
]
*
sizeof
(
float
));
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"memcpy_s error, errorno("
<<
ret
<<
")"
;
}
grad
->
size
=
lens
[
0
]
*
sizeof
(
float
);
...
...
@@ -187,10 +187,10 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
std
::
accumulate
((
*
indices_shape
).
begin
(),
(
*
indices_shape
).
end
(),
1
,
std
::
multiplies
<
size_t
>
());
AddressPtr
indices
=
std
::
make_shared
<
kernel
::
Address
>
();
indices
->
addr
=
new
float
[
total_indice_size
*
worker_num
];
auto
ret2
=
memcpy_s
(
indices
->
addr
,
lens
[
1
]
*
sizeof
(
float
),
reinterpret_cast
<
float
*>
(
values
.
data
())
+
lens
[
0
],
lens
[
1
]
*
sizeof
(
float
));
if
(
ret
2
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"memcpy_s error, errorno("
<<
ret
2
<<
")"
;
ret
=
memcpy_s
(
indices
->
addr
,
lens
[
1
]
*
sizeof
(
float
),
reinterpret_cast
<
float
*>
(
values
.
data
())
+
lens
[
0
],
lens
[
1
]
*
sizeof
(
float
));
if
(
ret
!=
0
)
{
MS_LOG
(
EXCEPTION
)
<<
"memcpy_s error, errorno("
<<
ret
<<
")"
;
}
indices
->
size
=
lens
[
1
]
*
sizeof
(
int
);
...
...
model_zoo/official/recommend/wide_and_deep/script/run_parameter_server_train_cluster.sh
0 → 100644
浏览文件 @
b10d4d6e
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
execute_path
=
$(
pwd
)
script_self
=
$(
readlink
-f
"
$0
"
)
self_path
=
$(
dirname
"
${
script_self
}
"
)
#bash run_parameter_server_train_cluster.sh RANK_SIZE EPOCHS DATASET RANK_TABLE_FILE
# LOCAL_WORKER_NUM LOCAL_SERVER_NUM SERVER_NUM
# SCHED_HOST SCHED_PORT ROLE
export
RANK_SIZE
=
$1
export
EPOCH_SIZE
=
$2
export
DATASET
=
$3
export
RANK_TABLE_FILE
=
$4
export
MS_COMM_TYPE
=
zmq
export
MS_SCHED_NUM
=
1
export
MS_WORKER_NUM
=
$RANK_SIZE
export
LOCAL_WORKER_NUM
=
$5
export
LOCAL_SERVER_NUM
=
$6
export
MS_SERVER_NUM
=
$7
export
MS_SCHED_HOST
=
$8
export
MS_SCHED_PORT
=
$9
export
MS_ROLE
=
${
10
}
echo
"=====Role is
$MS_ROLE
======"
if
[
"
$MS_ROLE
"
==
"MS_SCHED"
]
;
then
for
((
i
=
0
;
i<1
;
i++
))
;
do
rm
-rf
${
execute_path
}
/sched_
$i
/
mkdir
${
execute_path
}
/sched_
$i
/
cd
${
execute_path
}
/sched_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../train_and_eval_parameter_server.py
--data_path
=
$DATASET
--epochs
=
$EPOCH_SIZE
--parameter_server
=
1
>
sched_
$i
.log 2>&1 &
done
fi
if
[
"
$MS_ROLE
"
==
"MS_PSERVER"
]
;
then
for
((
i
=
0
;
i<
$LOCAL_SERVER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/server_
$i
/
mkdir
${
execute_path
}
/server_
$i
/
cd
${
execute_path
}
/server_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../train_and_eval_parameter_server.py
--data_path
=
$DATASET
--epochs
=
$EPOCH_SIZE
--parameter_server
=
1
>
server_
$i
.log 2>&1 &
done
fi
if
[
"
$MS_ROLE
"
==
"MS_WORKER"
]
;
then
for
((
i
=
0
;
i<
$LOCAL_WORKER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/worker_
$i
/
mkdir
${
execute_path
}
/worker_
$i
/
cd
${
execute_path
}
/worker_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../train_and_eval_parameter_server.py
--data_path
=
$DATASET
--epochs
=
$EPOCH_SIZE
--parameter_server
=
1
>
worker_
$i
.log 2>&1 &
done
fi
tests/st/ps/cmp_sparse_embedding/shell_run_test.sh
0 → 100644
浏览文件 @
b10d4d6e
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
execute_path
=
$(
pwd
)
self_path
=
$(
dirname
"
${
script_self
}
"
)
export
MS_COMM_TYPE
=
zmq
export
MS_SCHED_NUM
=
1
DEVICE_TARGET
=
$1
export
MS_WORKER_NUM
=
$2
export
MS_SERVER_NUM
=
$3
export
MS_SCHED_HOST
=
$4
export
MS_SCHED_PORT
=
$5
export
MS_ROLE
=
MS_SCHED
for
((
i
=
0
;
i<1
;
i++
))
;
do
rm
-rf
${
execute_path
}
/sched_
$i
/
mkdir
${
execute_path
}
/sched_
$i
/
cd
${
execute_path
}
/sched_
$i
/
||
exit
python
${
self_path
}
/../test_cmp_sparse_embedding.py &
done
export
MS_ROLE
=
MS_PSERVER
for
((
i
=
0
;
i<
$MS_SERVER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/server_
$i
/
mkdir
${
execute_path
}
/server_
$i
/
cd
${
execute_path
}
/server_
$i
/
||
exit
python
${
self_path
}
/../test_cmp_sparse_embedding.py &
done
export
MS_ROLE
=
MS_WORKER
for
((
i
=
0
;
i<
$MS_WORKER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/worker_
$i
/
mkdir
${
execute_path
}
/worker_
$i
/
cd
${
execute_path
}
/worker_
$i
/
||
exit
python
${
self_path
}
/../test_cmp_sparse_embedding.py &
done
wait
$!
exit
$?
tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
0 → 100644
浏览文件 @
b10d4d6e
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
os
import
argparse
import
numpy
as
np
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore
import
Tensor
from
mindspore.common
import
dtype
as
mstype
from
mindspore.nn
import
TrainOneStepCell
,
WithLossCell
from
mindspore.nn.optim
import
Adam
from
mindspore.ops
import
operations
as
P
from
mindspore.common.initializer
import
TruncatedNormal
,
initializer
from
mindspore
import
Parameter
parser
=
argparse
.
ArgumentParser
(
description
=
"test_sparse_embedding"
)
parser
.
add_argument
(
"--device_target"
,
type
=
str
,
default
=
"Ascend"
)
args
,
_
=
parser
.
parse_known_args
()
device_target
=
args
.
device_target
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
device_target
,
enable_sparse
=
True
)
def
fc_with_initialize
(
input_channels
,
out_channels
):
"""weight initial for fc layer"""
weight
=
weight_variable
()
bias
=
weight_variable
()
return
nn
.
Dense
(
input_channels
,
out_channels
,
weight
,
bias
)
def
weight_variable
():
"""weight initial"""
return
TruncatedNormal
(
0.02
)
class
LeNet5
(
nn
.
Cell
):
def
__init__
(
self
,
num_class
=
10
):
super
(
LeNet5
,
self
).
__init__
()
self
.
cast
=
P
.
Cast
()
self
.
flatten
=
nn
.
Flatten
()
self
.
embedding_table
=
Parameter
(
initializer
(
"normal"
,
(
16
,
4
),
mstype
.
float32
),
name
=
"embedding_table"
)
self
.
embedding
=
nn
.
EmbeddingLookup
()
self
.
relu
=
nn
.
ReLU
()
self
.
fc
=
fc_with_initialize
(
12
,
num_class
)
def
construct
(
self
,
x
):
x
=
self
.
cast
(
x
,
mstype
.
int32
)
x
=
self
.
embedding
(
self
.
embedding_table
,
x
)
x
=
self
.
flatten
(
x
)
x
=
self
.
fc
(
x
)
return
x
def
do_sparse_embedding
(
ps
=
False
):
epoch
=
10
net
=
LeNet5
(
10
)
if
ps
:
net
.
embedding_table
.
set_param_ps
()
optimizer
=
Adam
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()))
optimizer
.
sparse_opt
.
add_prim_attr
(
"primitive_target"
,
"CPU"
)
criterion
=
nn
.
SoftmaxCrossEntropyWithLogits
(
is_grad
=
False
,
sparse
=
True
,
reduction
=
"mean"
)
net_with_criterion
=
WithLossCell
(
net
,
criterion
)
train_network
=
TrainOneStepCell
(
net_with_criterion
,
optimizer
)
train_network
.
set_train
()
losses
=
[]
for
_
in
range
(
epoch
):
data
=
Tensor
(
np
.
random
.
randint
(
0
,
15
,
(
32
,
3
),
np
.
int32
))
label
=
Tensor
(
np
.
random
.
randint
(
0
,
9
,
(
32
),
np
.
int32
))
loss
=
train_network
(
data
,
label
).
asnumpy
()
losses
.
append
(
loss
)
print
(
losses
)
return
losses
envs
=
os
.
environ
if
__name__
==
"__main__"
:
np
.
random
.
seed
(
0
)
ps_loss
=
do_sparse_embedding
(
True
)
if
envs
.
get
(
"MS_ROLE"
)
==
"MS_WORKER"
:
envs
[
"MS_ROLE"
]
=
""
np
.
random
.
seed
(
0
)
no_ps_loss
=
do_sparse_embedding
()
envs
[
"MS_ROLE"
]
=
"MS_WORKER"
assert
np
.
allclose
(
ps_loss
,
no_ps_loss
,
rtol
=
1.0e-6
,
atol
=
1.0e-6
)
tests/st/ps/cmp_sparse_embedding/test_entry_cmp_sparse_embedding.py
0 → 100644
浏览文件 @
b10d4d6e
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
os
import
pytest
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_cmp_sparse_embedding
():
return_code
=
os
.
system
(
"bash shell_run_test.sh Ascend 1 1 127.0.0.1 8081"
)
assert
return_code
==
0
tests/st/ps/full_ps/shell_run_test.sh
0 → 100644
浏览文件 @
b10d4d6e
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
execute_path
=
$(
pwd
)
self_path
=
$(
dirname
"
${
script_self
}
"
)
export
MS_COMM_TYPE
=
zmq
export
MS_SCHED_NUM
=
1
DEVICE_TARGET
=
$1
DATASET_PATH
=
$2
export
MS_WORKER_NUM
=
$3
export
MS_SERVER_NUM
=
$4
export
MS_SCHED_HOST
=
$5
export
MS_SCHED_PORT
=
$6
export
MS_ROLE
=
MS_SCHED
for
((
i
=
0
;
i<1
;
i++
))
;
do
rm
-rf
${
execute_path
}
/sched_
$i
/
mkdir
${
execute_path
}
/sched_
$i
/
cd
${
execute_path
}
/sched_
$i
/
||
exit
python
${
self_path
}
/../test_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
--dataset_path
=
$DATASET_PATH
&
done
export
MS_ROLE
=
MS_PSERVER
for
((
i
=
0
;
i<
$MS_SERVER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/server_
$i
/
mkdir
${
execute_path
}
/server_
$i
/
cd
${
execute_path
}
/server_
$i
/
||
exit
python
${
self_path
}
/../test_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
--dataset_path
=
$DATASET_PATH
&
done
export
MS_ROLE
=
MS_WORKER
for
((
i
=
0
;
i<
$MS_WORKER_NUM
;
i++
))
;
do
rm
-rf
${
execute_path
}
/worker_
$i
/
mkdir
${
execute_path
}
/worker_
$i
/
cd
${
execute_path
}
/worker_
$i
/
||
exit
python
${
self_path
}
/../test_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
--dataset_path
=
$DATASET_PATH
&
done
wait
$!
exit
$?
tests/st/ps/full_ps/test_entry_full_ps_lenet.py
0 → 100644
浏览文件 @
b10d4d6e
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
os
import
pytest
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_full_ps_ascend_lenet
():
return_code
=
os
.
system
(
"bash shell_run_test.sh Ascend /home/workspace/mindspore_dataset/mnist 1 1 127.0.0.1 8082"
)
assert
return_code
==
0
tests/st/ps/
full_ps/test_run
.py
→
tests/st/ps/
multi_worker_full_ps/entry
.py
浏览文件 @
b10d4d6e
...
...
@@ -17,14 +17,16 @@ import os
# @pytest.mark.level0
# @pytest.mark.platform_arm_ascend_training
# @pytest.mark.platform_x86_ascend_training
# @pytest.mark.env_
onecard
def
test_full_ps_ascend_lenet
():
return_code
=
os
.
system
(
"bash
run_full_ps_lenet.sh Ascend 1
1 127.0.0.1 8088"
)
# @pytest.mark.env_
single
def
test_
multi_worker_
full_ps_ascend_lenet
():
return_code
=
os
.
system
(
"bash
shell_run_test.sh Ascend 8
1 127.0.0.1 8088"
)
assert
return_code
==
0
# @pytest.mark.level0
# @pytest.mark.platform_x86_gpu_training
# @pytest.mark.platform_arm_ascend_training
# @pytest.mark.platform_x86_ascend_training
# @pytest.mark.env_onecard
def
test_full_ps_
gpu
_lenet
():
return_code
=
os
.
system
(
"bash
run_full_ps_lenet.sh GPU
1 1 127.0.0.1 8088"
)
def
test_full_ps_
ascend
_lenet
():
return_code
=
os
.
system
(
"bash
shell_run_test.sh Ascend
1 1 127.0.0.1 8088"
)
assert
return_code
==
0
tests/st/ps/
full_ps/run_full_ps_lene
t.sh
→
tests/st/ps/
multi_worker_full_ps/shell_run_tes
t.sh
浏览文件 @
b10d4d6e
...
...
@@ -32,7 +32,7 @@ do
cd
${
execute_path
}
/sched_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../test
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
python
${
self_path
}
/../test_multi_worker
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
done
export
MS_ROLE
=
MS_PSERVER
...
...
@@ -43,7 +43,7 @@ do
cd
${
execute_path
}
/server_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../test
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
python
${
self_path
}
/../test_multi_worker
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
done
export
MS_ROLE
=
MS_WORKER
...
...
@@ -54,7 +54,7 @@ do
cd
${
execute_path
}
/worker_
$i
/
||
exit
export
RANK_ID
=
$i
export
DEVICE_ID
=
$i
python
-s
${
self_path
}
/../test
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
python
${
self_path
}
/../test_multi_worker
_full_ps_lenet.py
--device_target
=
$DEVICE_TARGET
&
done
wait
$!
...
...
tests/st/ps/multi_worker_full_ps/test_multi_worker_full_ps_lenet.py
0 → 100644
浏览文件 @
b10d4d6e
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
argparse
import
numpy
as
np
import
mindspore.context
as
context
import
mindspore.nn
as
nn
from
mindspore.common.initializer
import
TruncatedNormal
from
mindspore
import
Tensor
from
mindspore.nn
import
TrainOneStepCell
,
WithLossCell
parser
=
argparse
.
ArgumentParser
(
description
=
"test_ps_lenet"
)
parser
.
add_argument
(
"--device_target"
,
type
=
str
,
default
=
"Ascend"
)
args
,
_
=
parser
.
parse_known_args
()
device_target
=
args
.
device_target
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
device_target
)
def
conv
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
):
"""weight initial for conv layer"""
weight
=
weight_variable
()
return
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
weight_init
=
weight
,
has_bias
=
False
,
pad_mode
=
"valid"
,
)
def
fc_with_initialize
(
input_channels
,
out_channels
):
"""weight initial for fc layer"""
weight
=
weight_variable
()
bias
=
weight_variable
()
return
nn
.
Dense
(
input_channels
,
out_channels
,
weight
,
bias
)
def
weight_variable
():
"""weight initial"""
return
TruncatedNormal
(
0.02
)
class
LeNet5
(
nn
.
Cell
):
def
__init__
(
self
,
num_class
=
10
,
channel
=
3
):
super
(
LeNet5
,
self
).
__init__
()
self
.
num_class
=
num_class
self
.
conv1
=
conv
(
channel
,
6
,
5
)
self
.
conv2
=
conv
(
6
,
16
,
5
)
self
.
fc1
=
fc_with_initialize
(
16
*
5
*
5
,
120
)
self
.
fc2
=
fc_with_initialize
(
120
,
84
)
self
.
fc3
=
fc_with_initialize
(
84
,
self
.
num_class
)
self
.
relu
=
nn
.
ReLU
()
self
.
max_pool2d
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
)
self
.
flatten
=
nn
.
Flatten
()
def
construct
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
max_pool2d
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
max_pool2d
(
x
)
x
=
self
.
flatten
(
x
)
x
=
self
.
fc1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
fc3
(
x
)
return
x
if
__name__
==
"__main__"
:
epoch
=
5
np
.
random
.
seed
(
0
)
network
=
LeNet5
(
10
)
network
.
set_param_ps
()
criterion
=
nn
.
SoftmaxCrossEntropyWithLogits
(
is_grad
=
False
,
sparse
=
True
,
reduction
=
"mean"
)
net_opt
=
nn
.
Momentum
(
network
.
trainable_params
(),
0.01
,
0.9
)
net_with_criterion
=
WithLossCell
(
network
,
criterion
)
train_network
=
TrainOneStepCell
(
net_with_criterion
,
net_opt
)
train_network
.
set_train
()
losses
=
[]
for
_
in
range
(
epoch
):
data
=
Tensor
(
np
.
random
.
rand
(
32
,
3
,
32
,
32
).
astype
(
np
.
float32
))
label
=
Tensor
(
np
.
random
.
randint
(
0
,
9
,
(
32
)).
astype
(
np
.
int32
))
loss
=
train_network
(
data
,
label
).
asnumpy
()
losses
.
append
(
loss
)
print
(
losses
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录