modify ReadMe and add data parallel

495d0428 · meixiaowei · 07ae9fc1 · 495d0428 · 495d0428 · 495d0428
5 changed file
--- a/example/resnet101_imagenet2012/README.md
+++ b/example/resnet101_imagenet2012/README.md
@@ -8,9 +8,9 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore.

 - Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset [ImageNet](http://image-net.org/download).
+- Download the dataset ImageNet2012.
 
-> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows:
+> Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows:
 
 ```
 .
@@ -25,7 +25,6 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore.
 ```shell
 .
 ├── crossentropy.py                 # CrossEntropy loss function
-├── var_init.py                     # weight initial
 ├── config.py                       # parameter configuration
 ├── dataset.py                      # data preprocessing
 ├── eval.py                         # eval net

--- a/example/resnet101_imagenet2012/dataset.py
+++ b/example/resnet101_imagenet2012/dataset.py
@@ -76,8 +76,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):

    type_cast_op = C2.TypeCast(mstype.int32)

-    ds = ds.map(input_columns="image", operations=trans)
-    ds = ds.map(input_columns="label", operations=type_cast_op)
+    ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
+    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=config.buffer_size)

--- a/example/resnet101_imagenet2012/run_distribute_train.sh
+++ b/example/resnet101_imagenet2012/run_distribute_train.sh
@@ -20,23 +20,35 @@ then
 exit 1
 fi

-if [ ! -f $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+echo $PATH1
+echo $PATH2
+
+if [ ! -f $PATH1 ]
 then 
-    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 

-if [ ! -d $2 ]
+if [ ! -d $PATH2 ]
 then 
-    echo "error: DATASET_PATH=$2 is not a directory"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 

 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$1
-export RANK_TABLE_FILE=$1
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
+export RANK_TABLE_FILE=$PATH1

 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -49,6 +61,6 @@ do
    cd ./train_parallel$i || exit
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    env > env.log
-    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
    cd ..
 done
--- a/example/resnet101_imagenet2012/run_infer.sh
+++ b/example/resnet101_imagenet2012/run_infer.sh
@@ -20,15 +20,27 @@ then
 exit 1
 fi

-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+echo $PATH1
+echo $PATH2
+
+if [ ! -d $PATH1 ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 

-if [ ! -f $2 ]
+if [ ! -f $PATH2 ]
 then 
-    echo "error: CHECKPOINT_PATH=$2 is not a file"
+    echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
 exit 1
 fi 

@@ -48,5 +60,5 @@ cp *.sh ./infer
 cd ./infer || exit
 env > env.log
 echo "start infering for device $DEVICE_ID"
-python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log &
+python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
 cd ..
--- a/example/resnet101_imagenet2012/run_standalone_train.sh
+++ b/example/resnet101_imagenet2012/run_standalone_train.sh
@@ -20,9 +20,19 @@ then
 exit 1
 fi

-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+echo $PATH1
+
+if [ ! -d $PATH1 ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 

@@ -42,5 +52,5 @@ cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
-python train.py --do_train=True --dataset_path=$1 &> log &
+python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..