提交 cf6e13cc 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3563 fix a bug that causes failure when running muti-p from origin dataset,not from MR

Merge pull request !3563 from zhouyuanshen/master
...@@ -118,7 +118,7 @@ epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.005 ...@@ -118,7 +118,7 @@ epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.005
``` ```
# infer # infer
sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] sh run_eval.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
``` ```
> checkpoint can be produced in training process. > checkpoint can be produced in training process.
......
...@@ -108,7 +108,7 @@ if __name__ == '__main__': ...@@ -108,7 +108,7 @@ if __name__ == '__main__':
prefix = "FasterRcnn_eval.mindrecord" prefix = "FasterRcnn_eval.mindrecord"
mindrecord_dir = config.mindrecord_dir mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix) mindrecord_file = os.path.join(mindrecord_dir, prefix)
if not os.path.exists(mindrecord_file): if args_opt.rank_id == 0 and not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir): if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir) os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco": if args_opt.dataset == "coco":
...@@ -126,5 +126,8 @@ if __name__ == '__main__': ...@@ -126,5 +126,8 @@ if __name__ == '__main__':
else: else:
print("IMAGE_DIR or ANNO_PATH not exits.") print("IMAGE_DIR or ANNO_PATH not exits.")
while not os.path.exists(mindrecord_file + ".db"):
time.sleep(5)
print("Start Eval!") print("Start Eval!")
FasterRcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file) FasterRcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file)
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
if [ $# != 2 ] if [ $# != 2 ]
then then
echo "Usage: sh run_eval.sh [ANN_FILE] [CHECKPOINT_PATH]" echo "Usage: sh run_eval.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]"
exit 1 exit 1
fi fi
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
"""train FasterRcnn and get checkpoint files.""" """train FasterRcnn and get checkpoint files."""
import os import os
import time
import argparse import argparse
import random import random
import numpy as np import numpy as np
...@@ -72,7 +73,7 @@ if __name__ == '__main__': ...@@ -72,7 +73,7 @@ if __name__ == '__main__':
prefix = "FasterRcnn.mindrecord" prefix = "FasterRcnn.mindrecord"
mindrecord_dir = config.mindrecord_dir mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if not os.path.exists(mindrecord_file): if rank == 0 and not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir): if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir) os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco": if args_opt.dataset == "coco":
...@@ -90,6 +91,9 @@ if __name__ == '__main__': ...@@ -90,6 +91,9 @@ if __name__ == '__main__':
else: else:
print("IMAGE_DIR or ANNO_PATH not exits.") print("IMAGE_DIR or ANNO_PATH not exits.")
while not os.path.exists(mindrecord_file + ".db"):
time.sleep(5)
if not args_opt.only_create_dataset: if not args_opt.only_create_dataset:
loss_scale = float(config.loss_scale) loss_scale = float(config.loss_scale)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册