From 6296090d5c4332b2c2b0936326ac4535b606896e Mon Sep 17 00:00:00 2001 From: Xingyuan Bu Date: Fri, 11 May 2018 14:15:31 +0800 Subject: [PATCH] Hotfix paralle nccl args (#903) * fix parallel * typo * fix data best_map parameter bug --- fluid/object_detection/README.md | 16 ++++++++-------- fluid/object_detection/eval_coco_map.py | 5 +++-- fluid/object_detection/train.py | 5 ++++- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fluid/object_detection/README.md b/fluid/object_detection/README.md index 9ef05eae..dd4b70d6 100644 --- a/fluid/object_detection/README.md +++ b/fluid/object_detection/README.md @@ -52,22 +52,22 @@ Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https #### Train on PASCAL VOC - Train on one device (/GPU). ```python - env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' + env CUDA_VISIBLE_DEVICES=0 python -u train.py --parallel=False --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' ``` - Train on multi devices (/GPUs). ```python - env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' + env CUDA_VISIBLE_DEVICES=0,1 python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' ``` #### Train on MS-COCO - Train on one device (/GPU). ```python - env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --dataset='coco2014' --pretrained_model='pretrained/mobilenet_imagenet/' + env CUDA_VISIBLE_DEVICES=0 python -u train.py --parallel=False --dataset='coco2014' --pretrained_model='pretrained/mobilenet_imagenet/' ``` - Train on multi devices (/GPUs). ```python - env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --dataset='coco2014' --pretrained_model='pretrained/mobilenet_imagenet/' + env CUDA_VISIBLE_DEVICES=0,1 python -u train.py --batch_size=64 --dataset='coco2014' --pretrained_model='pretrained/mobilenet_imagenet/' ``` TBD @@ -90,13 +90,13 @@ Note we set the defualt test list to the dataset's test/val list, you can use yo #### Evaluate on PASCAL VOC ```python -env CUDA_VISIABLE_DEVICES=0 python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/90' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' +env CUDA_VISIBLE_DEVICES=0 python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/90' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' ``` #### Evaluate on MS-COCO ```python -env CUDA_VISIABLE_DEVICES=0 python eval.py --dataset='coco2014' --nms_threshold=0.5 --model_dir='train_coco_model/40' --test_list='annotations/instances_minival2014.json' --ap_version='integral' -env CUDA_VISIABLE_DEVICES=0 python eval_coco_map.py --dataset='coco2017' --nms_threshold=0.5 --model_dir='train_coco_model/40' --test_list='annotations/instances_minival2017.json' +env CUDA_VISIBLE_DEVICES=0 python eval.py --dataset='coco2014' --nms_threshold=0.5 --model_dir='train_coco_model/40' --test_list='annotations/instances_minival2014.json' --ap_version='integral' +env CUDA_VISIBLE_DEVICES=0 python eval_coco_map.py --dataset='coco2017' --nms_threshold=0.5 --model_dir='train_coco_model/40' --test_list='annotations/instances_minival2017.json' ``` TBD @@ -104,7 +104,7 @@ TBD ### Infer and Visualize ```python -env CUDA_VISIABLE_DEVICES=0 python infer.py --model_dir='train_coco_model/20' --image_path='./data/coco/val2014/COCO_val2014_000000000139.jpg' +env CUDA_VISIBLE_DEVICES=0 python infer.py --model_dir='train_coco_model/20' --image_path='./data/coco/val2014/COCO_val2014_000000000139.jpg' ``` Below is the examples after running python infer.py to inference and visualize the model result.

diff --git a/fluid/object_detection/eval_coco_map.py b/fluid/object_detection/eval_coco_map.py index 741badf6..b9f03a63 100644 --- a/fluid/object_detection/eval_coco_map.py +++ b/fluid/object_detection/eval_coco_map.py @@ -69,7 +69,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None): place=place, feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) - def get_dt_res(nmsed_out_v): + def get_dt_res(nmsed_out_v, data): dts_res = [] lod = nmsed_out_v[0].lod()[0] nmsed_out_v = np.array(nmsed_out_v[0]) @@ -100,6 +100,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None): 'score': score } dts_res.append(dt_res) + return dts_res def test(): dts_res = [] @@ -111,7 +112,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None): return_numpy=False) if batch_id % 20 == 0: print("Batch {0}".format(batch_id)) - dts_res += get_dt_res(nmsed_out_v) + dts_res += get_dt_res(nmsed_out_v, data) with open("detection_result.json", 'w') as outfile: json.dump(dts_res, outfile) diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py index 4d3f89b4..45cccbbb 100644 --- a/fluid/object_detection/train.py +++ b/fluid/object_detection/train.py @@ -18,6 +18,8 @@ add_arg('learning_rate', float, 0.001, "Learning rate.") add_arg('batch_size', int, 32, "Minibatch size.") add_arg('num_passes', int, 120, "Epoch number.") add_arg('use_gpu', bool, True, "Whether use GPU.") +add_arg('parallel', bool, True, "Parallel.") +add_arg('use_nccl', bool, True, "NCCL.") add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalvoc.") add_arg('model_save_dir', str, 'model', "The path to save model.") add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.") @@ -274,6 +276,7 @@ def parallel_exe(args, best_map = test_map[0] save_model('best_model') print("Pass {0}, test map {1}".format(pass_id, test_map[0])) + return best_map for pass_id in range(num_passes): start_time = time.time() @@ -295,7 +298,7 @@ def parallel_exe(args, if batch_id % 20 == 0: print("Pass {0}, batch {1}, loss {2}, time {3}".format( pass_id, batch_id, loss_v, start_time - prev_start_time)) - test(pass_id, best_map) + best_map = test(pass_id, best_map) if pass_id % 10 == 0 or pass_id == num_passes - 1: save_model(str(pass_id)) print("Best test map {0}".format(best_map)) -- GitLab