Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
6adcb5a8
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6adcb5a8
编写于
7月 31, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 31, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3719 resnet101 changes for GPU
Merge pull request !3719 from RobinGrosman/master
上级
59aaf56a
a75228d7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
216 addition
and
41 deletion
+216
-41
model_zoo/official/cv/resnet/eval.py
model_zoo/official/cv/resnet/eval.py
+2
-5
model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
...oo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
+93
-0
model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
...oo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
+95
-0
model_zoo/official/cv/resnet/src/dataset.py
model_zoo/official/cv/resnet/src/dataset.py
+17
-27
model_zoo/official/cv/resnet/train.py
model_zoo/official/cv/resnet/train.py
+9
-9
未找到文件。
model_zoo/official/cv/resnet/eval.py
浏览文件 @
6adcb5a8
...
...
@@ -58,11 +58,8 @@ if __name__ == '__main__':
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
target
,
save_graphs
=
False
,
device_id
=
device_id
)
# create dataset
if
args_opt
.
net
==
"resnet50"
:
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
False
,
batch_size
=
config
.
batch_size
,
target
=
target
)
else
:
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
False
,
batch_size
=
config
.
batch_size
)
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
False
,
batch_size
=
config
.
batch_size
,
target
=
target
)
step_size
=
dataset
.
get_dataset_size
()
# define net
...
...
model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
0 → 100755
浏览文件 @
6adcb5a8
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if
[
$#
!=
3
]
&&
[
$#
!=
4
]
then
echo
"Usage: sh run_distribute_train_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
exit
1
fi
if
[
$1
!=
"resnet50"
]
&&
[
$1
!=
"resnet101"
]
then
echo
"error: the selected net is neither resnet50 nor resnet101"
exit
1
fi
if
[
$2
!=
"cifar10"
]
&&
[
$2
!=
"imagenet2012"
]
then
echo
"error: the selected dataset is neither cifar10 nor imagenet2012"
exit
1
fi
if
[
$1
==
"resnet101"
]
&&
[
$2
==
"cifar10"
]
then
echo
"error: training resnet101 with cifar10 dataset is unsupported now!"
exit
1
fi
get_real_path
(){
if
[
"
${
1
:0:1
}
"
==
"/"
]
;
then
echo
"
$1
"
else
echo
"
$(
realpath
-m
$PWD
/
$1
)
"
fi
}
PATH1
=
$(
get_real_path
$3
)
if
[
$#
==
4
]
then
PATH2
=
$(
get_real_path
$4
)
fi
if
[
!
-d
$PATH2
]
then
echo
"error: DATASET_PATH=
$PATH1
is not a directory"
exit
1
fi
if
[
$#
==
5
]
&&
[
!
-f
$PATH2
]
then
echo
"error: PRETRAINED_CKPT_PATH=
$PATH2
is not a file"
exit
1
fi
ulimit
-u
unlimited
export
DEVICE_NUM
=
8
export
RANK_SIZE
=
8
rm
-rf
./train_parallel
mkdir
./train_parallel
cp
../
*
.py ./train_parallel
cp
*
.sh ./train_parallel
cp
-r
../src ./train_parallel
cd
./train_parallel
||
exit
if
[
$#
==
3
]
then
mpirun
--allow-run-as-root
-n
$RANK_SIZE
\
python train.py
--net
=
$1
--dataset
=
$2
--run_distribute
=
True
\
--device_num
=
$DEVICE_NUM
--device_target
=
"GPU"
--dataset_path
=
$PATH1
&> log &
fi
if
[
$#
==
4
]
then
mpirun
--allow-run-as-root
-n
$RANK_SIZE
\
python train.py
--net
=
$1
--dataset
=
$2
--run_distribute
=
True
\
--device_num
=
$DEVICE_NUM
--device_target
=
"GPU"
--dataset_path
=
$PATH1
--pre_trained
=
$PATH2
&> log &
fi
model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
0 → 100755
浏览文件 @
6adcb5a8
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if
[
$#
!=
3
]
&&
[
$#
!=
4
]
then
echo
"Usage: sh run_standalone_train.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
exit
1
fi
if
[
$1
!=
"resnet50"
]
&&
[
$1
!=
"resnet101"
]
then
echo
"error: the selected net is neither resnet50 nor resnet101"
exit
1
fi
if
[
$2
!=
"cifar10"
]
&&
[
$2
!=
"imagenet2012"
]
then
echo
"error: the selected dataset is neither cifar10 nor imagenet2012"
exit
1
fi
if
[
$1
==
"resnet101"
]
&&
[
$2
==
"cifar10"
]
then
echo
"error: training resnet101 with cifar10 dataset is unsupported now!"
exit
1
fi
get_real_path
(){
if
[
"
${
1
:0:1
}
"
==
"/"
]
;
then
echo
"
$1
"
else
echo
"
$(
realpath
-m
$PWD
/
$1
)
"
fi
}
PATH1
=
$(
get_real_path
$3
)
if
[
$#
==
4
]
then
PATH2
=
$(
get_real_path
$4
)
fi
if
[
!
-d
$PATH1
]
then
echo
"error: DATASET_PATH=
$PATH1
is not a directory"
exit
1
fi
if
[
$#
==
4
]
&&
[
!
-f
$PATH2
]
then
echo
"error: PRETRAINED_CKPT_PATH=
$PATH2
is not a file"
exit
1
fi
ulimit
-u
unlimited
export
DEVICE_NUM
=
1
export
DEVICE_ID
=
0
export
RANK_ID
=
0
export
RANK_SIZE
=
1
if
[
-d
"train"
]
;
then
rm
-rf
./train
fi
mkdir
./train
cp
../
*
.py ./train
cp
*
.sh ./train
cp
-r
../src ./train
cd
./train
||
exit
echo
"start training for device
$DEVICE_ID
"
env
>
env.log
if
[
$#
==
3
]
then
python train.py
--net
=
$1
--dataset
=
$2
--device_target
=
"GPU"
--dataset_path
=
$PATH1
&> log &
fi
if
[
$#
==
4
]
then
python train.py
--net
=
$1
--dataset
=
$2
--device_target
=
"GPU"
--dataset_path
=
$PATH1
--pre_trained
=
$PATH2
&> log &
fi
cd
..
model_zoo/official/cv/resnet/src/dataset.py
浏览文件 @
6adcb5a8
...
...
@@ -139,7 +139,7 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
return
ds
def
create_dataset3
(
dataset_path
,
do_train
,
repeat_num
=
1
,
batch_size
=
32
):
def
create_dataset3
(
dataset_path
,
do_train
,
repeat_num
=
1
,
batch_size
=
32
,
target
=
"Ascend"
):
"""
create a train or eval imagenet2012 dataset for resnet101
Args:
...
...
@@ -158,36 +158,26 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32):
else
:
ds
=
de
.
ImageFolderDatasetV2
(
dataset_path
,
num_parallel_workers
=
8
,
shuffle
=
True
,
num_shards
=
device_num
,
shard_id
=
rank_id
)
resize_height
=
224
rescale
=
1.0
/
255.0
s
hift
=
0.0
image_size
=
224
mean
=
[
0.475
*
255
,
0.451
*
255
,
0.392
*
255
]
s
td
=
[
0.275
*
255
,
0.267
*
255
,
0.278
*
255
]
# define map operations
decode_op
=
C
.
Decode
()
random_resize_crop_op
=
C
.
RandomResizedCrop
(
resize_height
,
(
0.08
,
1.0
),
(
0.75
,
1.33
),
max_attempts
=
100
)
horizontal_flip_op
=
C
.
RandomHorizontalFlip
(
rank_id
/
(
rank_id
+
1
))
resize_op_256
=
C
.
Resize
((
256
,
256
))
center_crop
=
C
.
CenterCrop
(
224
)
rescale_op
=
C
.
Rescale
(
rescale
,
shift
)
normalize_op
=
C
.
Normalize
((
0.475
,
0.451
,
0.392
),
(
0.275
,
0.267
,
0.278
))
changeswap_op
=
C
.
HWC2CHW
()
if
do_train
:
trans
=
[
decode_op
,
random_resize_crop_op
,
horizontal_flip_op
,
rescale_op
,
normalize_op
,
changeswap_op
]
trans
=
[
C
.
RandomCropDecodeResize
(
image_size
,
scale
=
(
0.08
,
1.0
),
ratio
=
(
0.75
,
1.333
)),
C
.
RandomHorizontalFlip
(
rank_id
/
(
rank_id
+
1
)),
C
.
Normalize
(
mean
=
mean
,
std
=
std
),
C
.
HWC2CHW
()
]
else
:
trans
=
[
decode_op
,
resize_op_256
,
center_crop
,
rescale_op
,
normalize_op
,
changeswap_op
]
trans
=
[
C
.
Decode
(),
C
.
Resize
(
256
),
C
.
CenterCrop
(
image_size
),
C
.
Normalize
(
mean
=
mean
,
std
=
std
),
C
.
HWC2CHW
()
]
type_cast_op
=
C2
.
TypeCast
(
mstype
.
int32
)
...
...
model_zoo/official/cv/resnet/train.py
浏览文件 @
6adcb5a8
...
...
@@ -86,12 +86,8 @@ if __name__ == '__main__':
ckpt_save_dir
=
config
.
save_checkpoint_path
+
"ckpt_"
+
str
(
get_rank
())
+
"/"
# create dataset
if
args_opt
.
net
==
"resnet50"
:
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
True
,
repeat_num
=
1
,
batch_size
=
config
.
batch_size
,
target
=
target
)
else
:
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
True
,
repeat_num
=
1
,
batch_size
=
config
.
batch_size
)
dataset
=
create_dataset
(
dataset_path
=
args_opt
.
dataset_path
,
do_train
=
True
,
repeat_num
=
1
,
batch_size
=
config
.
batch_size
,
target
=
target
)
step_size
=
dataset
.
get_dataset_size
()
# define net
...
...
@@ -122,7 +118,7 @@ if __name__ == '__main__':
lr
=
get_lr
(
lr_init
=
config
.
lr_init
,
lr_end
=
0.0
,
lr_max
=
config
.
lr_max
,
warmup_epochs
=
config
.
warmup_epochs
,
total_epochs
=
config
.
epoch_size
,
steps_per_epoch
=
step_size
,
lr_decay_mode
=
'cosine'
)
else
:
lr
=
warmup_cosine_annealing_lr
(
config
.
lr
,
step_size
,
config
.
warmup_epochs
,
120
,
lr
=
warmup_cosine_annealing_lr
(
config
.
lr
,
step_size
,
config
.
warmup_epochs
,
config
.
epoch_size
,
config
.
pretrain_epoch_size
*
step_size
)
lr
=
Tensor
(
lr
)
...
...
@@ -147,9 +143,13 @@ if __name__ == '__main__':
amp_level
=
"O2"
,
keep_batchnorm_fp32
=
False
)
else
:
# GPU target
loss
=
SoftmaxCrossEntropyWithLogits
(
sparse
=
True
,
is_grad
=
False
,
reduction
=
'mean'
)
opt
=
Momentum
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
lr
,
config
.
momentum
)
loss
=
SoftmaxCrossEntropyWithLogits
(
sparse
=
True
,
reduction
=
"mean"
,
is_grad
=
False
,
smooth_factor
=
config
.
label_smooth_factor
,
num_classes
=
config
.
class_num
)
opt
=
Momentum
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
lr
,
config
.
momentum
,
config
.
weight_decay
)
model
=
Model
(
net
,
loss_fn
=
loss
,
optimizer
=
opt
,
metrics
=
{
'acc'
})
##Mixed precision
#model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
# amp_level="O2", keep_batchnorm_fp32=True)
# define callbacks
time_cb
=
TimeMonitor
(
data_size
=
step_size
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录