提交 495d0428 编写于 作者: M meixiaowei

modify ReadMe and add data parallel

上级 07ae9fc1
......@@ -8,9 +8,9 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore.
- Install [MindSpore](https://www.mindspore.cn/install/en).
- Download the dataset [ImageNet](http://image-net.org/download).
- Download the dataset ImageNet2012.
> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows:
> Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows:
```
.
......@@ -25,7 +25,6 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore.
```shell
.
├── crossentropy.py # CrossEntropy loss function
├── var_init.py # weight initial
├── config.py # parameter configuration
├── dataset.py # data preprocessing
├── eval.py # eval net
......
......@@ -76,8 +76,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32)
ds = ds.map(input_columns="image", operations=trans)
ds = ds.map(input_columns="label", operations=type_cast_op)
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
# apply shuffle operations
ds = ds.shuffle(buffer_size=config.buffer_size)
......
......@@ -20,23 +20,35 @@ then
exit 1
fi
if [ ! -f $1 ]
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
echo $PATH1
echo $PATH2
if [ ! -f $PATH1 ]
then
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
exit 1
fi
if [ ! -d $2 ]
if [ ! -d $PATH2 ]
then
echo "error: DATASET_PATH=$2 is not a directory"
echo "error: DATASET_PATH=$PATH2 is not a directory"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
export MINDSPORE_HCCL_CONFIG_PATH=$1
export RANK_TABLE_FILE=$1
export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
export RANK_TABLE_FILE=$PATH1
for((i=0; i<${DEVICE_NUM}; i++))
do
......@@ -49,6 +61,6 @@ do
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log
python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
cd ..
done
......@@ -20,15 +20,27 @@ then
exit 1
fi
if [ ! -d $1 ]
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
echo $PATH1
echo $PATH2
if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
echo "error: DATASET_PATH=$PATH1 is not a directory"
exit 1
fi
if [ ! -f $2 ]
if [ ! -f $PATH2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
exit 1
fi
......@@ -48,5 +60,5 @@ cp *.sh ./infer
cd ./infer || exit
env > env.log
echo "start infering for device $DEVICE_ID"
python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log &
python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
cd ..
......@@ -20,9 +20,19 @@ then
exit 1
fi
if [ ! -d $1 ]
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
echo $PATH1
if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
echo "error: DATASET_PATH=$PATH1 is not a directory"
exit 1
fi
......@@ -42,5 +52,5 @@ cp *.sh ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"
env > env.log
python train.py --do_train=True --dataset_path=$1 &> log &
python train.py --do_train=True --dataset_path=$PATH1 &> log &
cd ..
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册