提交 9056ed1d 编写于 作者: Z zhouyaqiang

fix bug of make file fail if not root user

上级 93a50ec4
......@@ -168,7 +168,7 @@ You can modify the training behaviour through the various flags in the `train.py
- running on Ascend
```
python train.py --data_dir /PATH/TO/DATASET --is_distributed 0> train.log 2>&1 &
python train.py --data_dir /PATH/TO/DATASET --is_distributed 0 > train.log 2>&1 &
```
The python command above will run in the background, The log and model checkpoint will be generated in `output/202x-xx-xx_time_xx_xx_xx/`. The loss value will be achieved as follows:
......
......@@ -190,21 +190,21 @@ def test(cloud_args=None):
args.logger.info('before results={}'.format(results))
if args.is_distributed:
model_md5 = model.replace('/', '')
tmp_dir = '/cache'
tmp_dir = '../cache'
if not os.path.exists(tmp_dir):
os.mkdir(tmp_dir)
top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(args.rank, model_md5)
top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(args.rank, model_md5)
img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(args.rank, model_md5)
top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(tmp_dir, args.rank, model_md5)
top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(tmp_dir, args.rank, model_md5)
img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(tmp_dir, args.rank, model_md5)
np.save(top1_correct_npy, top1_correct)
np.save(top5_correct_npy, top5_correct)
np.save(img_tot_npy, img_tot)
while True:
rank_ok = True
for other_rank in range(args.group_size):
top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(other_rank, model_md5)
top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(other_rank, model_md5)
img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(other_rank, model_md5)
top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
if not os.path.exists(top1_correct_npy) or not os.path.exists(top5_correct_npy) \
or not os.path.exists(img_tot_npy):
rank_ok = False
......@@ -215,9 +215,9 @@ def test(cloud_args=None):
top5_correct_all = 0
img_tot_all = 0
for other_rank in range(args.group_size):
top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(other_rank, model_md5)
top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(other_rank, model_md5)
img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(other_rank, model_md5)
top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(tmp_dir, other_rank, model_md5)
top1_correct_all += np.load(top1_correct_npy)
top5_correct_all += np.load(top5_correct_npy)
img_tot_all += np.load(img_tot_npy)
......
......@@ -16,8 +16,8 @@
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "sh run_distribute_train.sh DEVICE_NUM RANK_TABLE_FILE DATASET"
echo "for example: sh run_distribute_train.sh 8 /data/hccl.json /path/to/dataset"
echo "sh scipts/run_distribute_train.sh DEVICE_NUM RANK_TABLE_FILE DATASET"
echo "for example: sh scipts/run_distribute_train.sh 8 /data/hccl.json /path/to/dataset"
echo "It is better to use absolute path."
echo "================================================================================================================="
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册