From 495d0428f7de189929c98dc7a957bf1fd68743dc Mon Sep 17 00:00:00 2001 From: meixiaowei Date: Fri, 8 May 2020 16:52:45 +0800 Subject: [PATCH] modify ReadMe and add data parallel --- example/resnet101_imagenet2012/README.md | 5 ++-- example/resnet101_imagenet2012/dataset.py | 4 +-- .../run_distribute_train.sh | 26 ++++++++++++++----- example/resnet101_imagenet2012/run_infer.sh | 22 ++++++++++++---- .../run_standalone_train.sh | 16 +++++++++--- 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/example/resnet101_imagenet2012/README.md b/example/resnet101_imagenet2012/README.md index 9b73fd2ee..cfd6a152b 100644 --- a/example/resnet101_imagenet2012/README.md +++ b/example/resnet101_imagenet2012/README.md @@ -8,9 +8,9 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore. - Install [MindSpore](https://www.mindspore.cn/install/en). -- Download the dataset [ImageNet](http://image-net.org/download). +- Download the dataset ImageNet2012. -> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows: +> Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows: ``` . @@ -25,7 +25,6 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore. ```shell . ├── crossentropy.py # CrossEntropy loss function -├── var_init.py # weight initial ├── config.py # parameter configuration ├── dataset.py # data preprocessing ├── eval.py # eval net diff --git a/example/resnet101_imagenet2012/dataset.py b/example/resnet101_imagenet2012/dataset.py index 27d93dc08..31377cfc1 100755 --- a/example/resnet101_imagenet2012/dataset.py +++ b/example/resnet101_imagenet2012/dataset.py @@ -76,8 +76,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans) - ds = ds.map(input_columns="label", operations=type_cast_op) + ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) + ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) # apply shuffle operations ds = ds.shuffle(buffer_size=config.buffer_size) diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh index f0b8f120c..ecdcd6685 100755 --- a/example/resnet101_imagenet2012/run_distribute_train.sh +++ b/example/resnet101_imagenet2012/run_distribute_train.sh @@ -20,23 +20,35 @@ then exit 1 fi -if [ ! -f $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +echo $PATH1 +echo $PATH2 + +if [ ! -f $PATH1 ] then - echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" exit 1 fi -if [ ! -d $2 ] +if [ ! -d $PATH2 ] then - echo "error: DATASET_PATH=$2 is not a directory" + echo "error: DATASET_PATH=$PATH2 is not a directory" exit 1 fi ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 -export MINDSPORE_HCCL_CONFIG_PATH=$1 -export RANK_TABLE_FILE=$1 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 +export RANK_TABLE_FILE=$PATH1 for((i=0; i<${DEVICE_NUM}; i++)) do @@ -49,6 +61,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log & + python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & cd .. done diff --git a/example/resnet101_imagenet2012/run_infer.sh b/example/resnet101_imagenet2012/run_infer.sh index 5df659275..b82427e15 100755 --- a/example/resnet101_imagenet2012/run_infer.sh +++ b/example/resnet101_imagenet2012/run_infer.sh @@ -20,15 +20,27 @@ then exit 1 fi -if [ ! -d $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +echo $PATH1 +echo $PATH2 + +if [ ! -d $PATH1 ] then - echo "error: DATASET_PATH=$1 is not a directory" + echo "error: DATASET_PATH=$PATH1 is not a directory" exit 1 fi -if [ ! -f $2 ] +if [ ! -f $PATH2 ] then - echo "error: CHECKPOINT_PATH=$2 is not a file" + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" exit 1 fi @@ -48,5 +60,5 @@ cp *.sh ./infer cd ./infer || exit env > env.log echo "start infering for device $DEVICE_ID" -python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log & +python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & cd .. diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh index 9ba574251..dde018b8e 100755 --- a/example/resnet101_imagenet2012/run_standalone_train.sh +++ b/example/resnet101_imagenet2012/run_standalone_train.sh @@ -20,9 +20,19 @@ then exit 1 fi -if [ ! -d $1 ] +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $1) +echo $PATH1 + +if [ ! -d $PATH1 ] then - echo "error: DATASET_PATH=$1 is not a directory" + echo "error: DATASET_PATH=$PATH1 is not a directory" exit 1 fi @@ -42,5 +52,5 @@ cp *.sh ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log -python train.py --do_train=True --dataset_path=$1 &> log & +python train.py --do_train=True --dataset_path=$PATH1 &> log & cd .. -- GitLab