diff --git a/PaddleCV/object_detection/README.md b/PaddleCV/object_detection/README.md index 383f0d895253e94c9c8962b54cedca192e1d1f06..c65e692217f8835da780a46ccacd244fd669660a 100644 --- a/PaddleCV/object_detection/README.md +++ b/PaddleCV/object_detection/README.md @@ -20,7 +20,7 @@ Major features: All components are modular encapsulated, including the data transforms. It's easy to plug in and pull out any module. For example, users can switch backbone easily or add mixup data augmentation for models. - High Efficiency: - Based on the high efficient PaddlePaddle framework, less memory is required. For example, the batch size of Mask-RCNN based on ResNet50 can be 5 per Tesla V100 (16G). The training speed of Yolo v3 is faster than other frameworks. + Based on the high efficient PaddlePaddle framework, less memory is required. For example, the batch size of Mask-RCNN based on ResNet50 can be 5 per Tesla V100 (16G) when multi-GPU training. The training speed of Yolo v3 is faster than other frameworks. The supported architectures are as follows: diff --git a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml index ade4ecc6ff266ef7f9b725b519a6a1445e79862c..f08f048a9f52122a7d55b1865a72ced1b191ff49 100644 --- a/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/cascade_rcnn_r50_fpn_1x.yml @@ -111,7 +111,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -124,7 +124,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml index 1ba2f2f12d52dee2b281f82eb492b8e82026fd80..d8cc3b0319d80b35137373d7448eb4dd53025d5e 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_1x.yml @@ -95,7 +95,7 @@ FasterRCNNTrainFeed: # batch size per device batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -104,7 +104,7 @@ FasterRCNNTrainFeed: FasterRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml index 8ff2eadb7a178e9323d311108b13a6b66c9a3a86..4233712c75c08ef2db3289e4950c5b0755a0c295 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_1x.yml @@ -112,7 +112,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -124,7 +124,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml index ff0d8ca3f66640225665c78a2b673ca0f9f29baa..38f3df44e0327c75e11ab4ce8daa6f1bc6cabeee 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_fpn_2x.yml @@ -112,7 +112,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -124,7 +124,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml index 9d207897a3d0526b9e15513d9617cc7d964c2bfa..3ee9d09422103eeede841c4e31ac20a292d80b27 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_1x.yml @@ -113,7 +113,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -125,7 +125,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml index 9bcc54c90eabe48d333cf31a7ade0601a1dec285..20763c9ace39cc7460d419ef61267e8c89458737 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r101_vd_fpn_2x.yml @@ -113,7 +113,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -125,7 +125,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml index b01443a4e05e876eda5fe15cbb4b1030b0701edd..b2dacdb13632009e35eecba66293741e674c4bb1 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_1x.yml @@ -95,7 +95,7 @@ FasterRCNNTrainFeed: # batch size per device batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -104,7 +104,7 @@ FasterRCNNTrainFeed: FasterRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml index e5953616b5547a5a7580570d2cac47e64b7120a3..46d9ac0409bae3b298a7a083e150c795c30d4ae5 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_2x.yml @@ -95,7 +95,7 @@ FasterRCNNTrainFeed: # batch size per device batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -104,7 +104,7 @@ FasterRCNNTrainFeed: FasterRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml index 7aed12f8896074ad44ef238fcb4e786ddb9b64c5..1501cee1ef8bd79d71ec1cd8f5ec0e238fe9391e 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_1x.yml @@ -111,7 +111,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -124,7 +124,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml index bb9afac76d1d7045c9411a582e4f5b8cc1ce4af8..2be9f9f04950dde1ae527923e5e3af90e597fe72 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_fpn_2x.yml @@ -111,7 +111,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -124,7 +124,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml index 03d075d90f93d56025b519302894d0fc27eb500a..3b4ab8d1be91e7cad3936376d5d78682970d583a 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_1x.yml @@ -97,7 +97,7 @@ FasterRCNNTrainFeed: # batch size per device batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 drop_last: false @@ -106,7 +106,7 @@ FasterRCNNTrainFeed: FasterRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml index 312352c8d8a17059336a3f80dfa0bb57547736ac..3bf6b08e6aa0772d445c87893ebfa550722ac0c0 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_r50_vd_fpn_2x.yml @@ -113,7 +113,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -125,7 +125,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml index 183b4bb707b4e268b9b09ad1c7d148a3a0830d98..c8b1d09ac6ac3450f5e75b79b72fcd6eeb4fffdb 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_1x.yml @@ -104,7 +104,7 @@ FasterRCNNTrainFeed: # batch size per device batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 @@ -113,7 +113,7 @@ FasterRCNNTrainFeed: FasterRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml index 065c0ea741237c59a53f3dcf107ae15a8fb0a06c..fb920a7a6fcd391ce93ba6039d9c21e7f1c207e2 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_1x.yml @@ -115,7 +115,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -127,7 +127,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml index 36e849c9a5f075d13d7ac4076725ae0581a7ec20..4469e8cd656ed517a8e58b1f829f3cdc22e3aba1 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_se154_vd_fpn_s1x.yml @@ -115,7 +115,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -127,7 +127,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml index bdbe03c426e971fb6980addc2428b9835945742a..68f745ce8889095ff5fd74783c141cd2ce4680e6 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_1x.yml @@ -114,7 +114,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -126,7 +126,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml index d9012e0e627a653ce04c728d7b6b5975eb31a684..8fd5420a84e97c6b88a48cae4697967eda0f981a 100644 --- a/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/faster_rcnn_x101_64x4d_fpn_2x.yml @@ -114,7 +114,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -126,7 +126,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml index fd1e4bc6c8a16b7c78cf7cd9d838e3f88c01e6d3..98d3cc477e727df470436d4a7bfa104156108390 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_1x.yml @@ -120,7 +120,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -132,7 +132,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml index 6d9bcbbaef682b8d553a1482c73aa3e77c4f01cb..44b631df6960d211a75ff10b94377bdc8bcdd83d 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r101_fpn_2x.yml @@ -120,7 +120,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -132,7 +132,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml index e523d684cccc972a7f5889d5389226a04064c515..05968cc65765ad81fe408d8ba029dee938f5368d 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_1x.yml @@ -107,7 +107,7 @@ OptimizerBuilder: MaskRCNNTrainFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -116,7 +116,7 @@ MaskRCNNTrainFeed: MaskRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 shuffle: false diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml index d0c8ced12f70bc998d350dddeffee7436c1289b7..a486b2f2bee30818d42288d2265cc3f2194c0cf7 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_2x.yml @@ -109,7 +109,7 @@ OptimizerBuilder: MaskRCNNTrainFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -118,7 +118,7 @@ MaskRCNNTrainFeed: MaskRCNNEvalFeed: batch_size: 1 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 shuffle: false diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml index e9204d0a5b8261fca3b54214222d07609b1d7327..12bbd590dc3ee7dfc3a7706ba20dfde4b7548e06 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_1x.yml @@ -120,7 +120,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -132,7 +132,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml index 249848912a55bf48b033e96bcf3ee8544a4a00f8..69e2279a2247453af99fbc2eeb6ce14168e899cb 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_fpn_2x.yml @@ -120,7 +120,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -132,7 +132,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml index 90b817a0c3a8d556cfc48b0d78907d0b63777aee..a67938b20dc7b15a799a4a9223d45a02a14def7b 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_r50_vd_fpn_2x.yml @@ -122,7 +122,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -135,7 +135,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml index eebc248c95a1ae689efc80568538385388619438..c994a7e185144752d5ed9f696e521e686b0b6f63 100644 --- a/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml +++ b/PaddleCV/object_detection/configs/mask_rcnn_se154_vd_fpn_s1x.yml @@ -124,7 +124,7 @@ MaskRCNNTrainFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco image_dir: train2017 annotation: annotations/instances_train2017.json num_workers: 2 @@ -137,7 +137,7 @@ MaskRCNNEvalFeed: - !PadBatch pad_to_stride: 32 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml index 082837fd6beef3c2c6ff2448896f5965ee310d40..5477fe2eb93f08d495d57407394db9100db67bbb 100644 --- a/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml +++ b/PaddleCV/object_detection/configs/retinanet_r50_fpn_1x.yml @@ -80,7 +80,7 @@ FasterRCNNTrainFeed: - !PadBatch pad_to_stride: 128 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 2 @@ -92,7 +92,7 @@ FasterRCNNEvalFeed: - !PadBatch pad_to_stride: 128 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 num_workers: 2 diff --git a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml index 4053a9908b2aa1506436bdfb25d91e51d2d701e6..252b14ce54b2830f6dd859e4b70855f8c0f41c18 100644 --- a/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml +++ b/PaddleCV/object_detection/configs/ssd_mobilenet_v1_voc.yml @@ -62,7 +62,7 @@ SSDTrainFeed: batch_size: 32 use_process: true dataset: - dataset_dir: data/voc + dataset_dir: dataset/voc annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt image_dir: VOCdevkit/VOC_all/JPEGImages use_default_label: true @@ -71,7 +71,7 @@ SSDEvalFeed: batch_size: 64 use_process: true dataset: - dataset_dir: data/voc + dataset_dir: dataset/voc annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt image_dir: VOCdevkit/VOC_all/JPEGImages use_default_label: true diff --git a/PaddleCV/object_detection/configs/yolov3_darknet.yml b/PaddleCV/object_detection/configs/yolov3_darknet.yml index 886d4e23b4dbd4cfaaebc9cccee8bdd1dfd2ff3d..b317ec18d45da7c7f87e8b89956dca1b186dac59 100644 --- a/PaddleCV/object_detection/configs/yolov3_darknet.yml +++ b/PaddleCV/object_detection/configs/yolov3_darknet.yml @@ -60,7 +60,7 @@ OptimizerBuilder: YoloTrainFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 8 @@ -70,7 +70,7 @@ YoloTrainFeed: YoloEvalFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml index d27449beedb7c8d7665112e0ea4aec533192b608..5f8232a749a9b31567142803fc833282f576beff 100644 --- a/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml +++ b/PaddleCV/object_detection/configs/yolov3_mobilenet_v1.yml @@ -61,7 +61,7 @@ OptimizerBuilder: YoloTrainFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 8 @@ -71,7 +71,7 @@ YoloTrainFeed: YoloEvalFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/configs/yolov3_r34.yml b/PaddleCV/object_detection/configs/yolov3_r34.yml index e782992ad9a252e12f265601e095d11febc63021..3f212bdad0031ed6f51014b7dee5d9fd8f963d37 100644 --- a/PaddleCV/object_detection/configs/yolov3_r34.yml +++ b/PaddleCV/object_detection/configs/yolov3_r34.yml @@ -63,7 +63,7 @@ OptimizerBuilder: YoloTrainFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_train2017.json image_dir: train2017 num_workers: 8 @@ -73,7 +73,7 @@ YoloTrainFeed: YoloEvalFeed: batch_size: 8 dataset: - dataset_dir: data/coco + dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 diff --git a/PaddleCV/object_detection/dataset/coco/download.sh b/PaddleCV/object_detection/dataset/coco/download.sh new file mode 100644 index 0000000000000000000000000000000000000000..6f262ccebb635e993b35349890a793430d9ad597 --- /dev/null +++ b/PaddleCV/object_detection/dataset/coco/download.sh @@ -0,0 +1,20 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +echo "Downloading..." +wget http://images.cocodataset.org/zips/train2014.zip +wget http://images.cocodataset.org/zips/val2014.zip +wget http://images.cocodataset.org/zips/train2017.zip +wget http://images.cocodataset.org/zips/val2017.zip +wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip +wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +# Extract the data. +echo "Extracting..." +unzip train2014.zip +unzip val2014.zip +unzip train2017.zip +unzip val2017.zip +unzip annotations_trainval2014.zip +unzip annotations_trainval2017.zip + diff --git a/PaddleCV/object_detection/dataset/voc/download.sh b/PaddleCV/object_detection/dataset/voc/download.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c7341a4114013733cb5d002e87d0260c90711b7 --- /dev/null +++ b/PaddleCV/object_detection/dataset/voc/download.sh @@ -0,0 +1,16 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +echo "Downloading..." +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar +# Extract the data. +echo "Extracting..." +tar -xf VOCtrainval_11-May-2012.tar +tar -xf VOCtrainval_06-Nov-2007.tar +tar -xf VOCtest_06-Nov-2007.tar + +echo "Creating data lists..." +python -c 'from ppdet.utils.voc_utils import merge_and_create_list; merge_and_create_list("VOCdevkit", ["2007", "2012"], "VOCdevkit/VOC_all")' diff --git a/PaddleCV/object_detection/docs/DATA.md b/PaddleCV/object_detection/docs/DATA.md new file mode 100644 index 0000000000000000000000000000000000000000..b4161e13da89a2979bb4b0e1a0c18e23f6891038 --- /dev/null +++ b/PaddleCV/object_detection/docs/DATA.md @@ -0,0 +1,202 @@ +## Introduction +This is a Python module used to load and convert data into formats for detection model training, evaluation and inference. The converted sample schema is a tuple of np.ndarrays. For example, the schema of Faster R-CNN training data is: `[(im, im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`. + +### Implementation +This module is consists of four sub-systems: data parsing, image pre-processing, data conversion and data feeding apis. + +We use `dataset.Dataset` to abstract a set of data samples. For example, `COCO` data contains 3 sets of data for training, validation, and testing respectively. Original data stored in files could be loaded into memory using `dataset.source`; Then make use of `dataset.transform` to process the data; Finally, the batch data could be fetched by the api of `dataset.Reader`. + +Sub-systems introduction: +1. Data prasing +By data parsing, we can get a `dataset.Dataset` instance, whose implementation is located in `dataset.source`. This sub-system is used to parse different data formats, which is easy to add new data format supports. Currently, only following data sources are included: + +- COCO data source +This kind of source is used to load `COCO` data directly, eg: `COCO2017`. It's composed of json files for labeling info and image files. And it's directory structure is as follows: + + ``` + data/coco/ + ├── annotations + │ ├── instances_train2017.json + │ ├── instances_val2017.json + | ... + ├── train2017 + │ ├── 000000000009.jpg + │ ├── 000000580008.jpg + | ... + ├── val2017 + │ ├── 000000000139.jpg + │ ├── 000000000285.jpg + | ... + ``` + +- Pascal VOC data source +This kind of source is used to load `VOC` data directly, eg: `VOC2007`. It's composed of xml files for labeling info and image files. And it's directory structure is as follows: + + + ``` + data/pascalvoc/ + ├──Annotations + │ ├── i000050.jpg + │ ├── 003876.xml + | ... + ├── ImageSets + │ ├──Main + └── train.txt + └── val.txt + └── test.txt + └── dog_train.txt + └── dog_trainval.txt + └── dog_val.txt + └── dog_test.txt + └── ... + │ ├──Layout + └──... + │ ├── Segmentation + └──... + ├── JPEGImages + │ ├── 000050.jpg + │ ├── 003876.jpg + | ... + ``` + + + +- Roidb data source +This kind of source is a normalized data format which only contains a pickle file. The pickle file only has a dictionary which only has a list named 'records' (maybe there is a mapping file for label name to label id named 'canme2id'). You can convert `COCO` or `VOC` data into this format. The pickle file's content is as follows: +```python +(records, catname2clsid) +'records' is list of dict whose structure is: +{ + 'im_file': im_fname, # image file name + 'im_id': im_id, # image id + 'h': im_h, # height of image + 'w': im_w, # width + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_poly': gt_poly, +} +'cname2id' is a dict to map category name to class id + +``` +We also provide the tool to generate the roidb data source in `./tools/`. You can use the follow command to implement. +```python +# --type: the type of original data (xml or json) +# --annotation: the path of file, which contains the name of annotation files +# --save-dir: the save path +# --samples: the number of samples (default is -1, which mean all datas in dataset) +python ./tools/generate_data_for_training.py + --type=json \ + --annotation=./annotations/instances_val2017.json \ + --save-dir=./roidb \ + --samples=-1 +``` + + 2. Image preprocessing + Image preprocessing subsystem includes operations such as image decoding, expanding, cropping, etc. We use `dataset.transform.operator` to unify the implementation, which is convenient for extension. In addition, multiple operators can be combined to form a complex processing pipeline, and used by data transformers in `dataset.transformer`, such as multi-threading to acclerate a complex image data processing. + + 3. Data transformer + The function of the data transformer is used to convert a `dataset.Dataset` to a new `dataset.Dataset`, for example: convert a jpeg image dataset into a decoded and resized dataset. We use the decorator pattern to implement different transformers which are all subclass of `dataset.Dataset`. For example, the `dataset.transform.paralle_map` transformer is for multi-process preprocessing, more transformers can be found in `dataset.transform.transformer`. + + 4. Data feeding apis +To facilitate data pipeline building and data feeding for training, we combine multiple `dataset.Dataset` to form a `dataset.Reader` which can provide data for training, validation and testing respectively. The user only needs to call `Reader.[train|eval|infer]` to get the corresponding data stream. `Reader` supports yaml file to configure data address, preprocessing oprators, acceleration mode, and so on. + + + +The main APIs are as follows: + + + +1. Data parsing + + - `source/coco_loader.py`: Use to parse the COCO dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/coco_loader.py) + - `source/voc_loader.py`: Use to parse the Pascal VOC dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/voc_loader.py) + [Note] When using VOC datasets, if you do not use the default label list, you need to generate `label_list.txt` using `tools/generate_data_for_training.py` (the usage method is same as generating the roidb data source) or provide `label_list.txt` in `data/pascalvoc/ImageSets/Main` firstly. Also set the parameter `use_default_label` to `false` in the configuration file. + - `source/loader.py`: Use to parse the Roidb dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/loader.py) + +2. Operator + `transform/operators.py`: Contains a variety of data enhancement methods, including: + +``` python +RandomFlipImage: Horizontal flip. +RandomDistort: Distort brightness, contrast, saturation, and hue. +ResizeImage: Adjust the image size according to the specific interpolation method. +RandomInterpImage: Use a random interpolation method to resize the image. +CropImage: Crop image with respect to different scale, aspect ratio, and overlap. +ExpandImage: Put the original image into a larger expanded image which is initialized using image mean. +DecodeImage: Read images in RGB format. +Permute: Arrange the channels of the image and converted to the BGR format. +NormalizeImage: Normalize image pixel values. +NormalizeBox: Normalize the bounding box. +MixupImage: Mixup two images in proportion. +``` +[Note] The mixup operation can refer to[paper](https://arxiv.org/pdf/1710.09412.pdf)。 + +`transform/arrange_sample.py`: Sort the data which need to input the network. +3. Transformer +`transform/post_map.py`: A pre-processing operation for completing batch data, which mainly includes: + +``` python +Randomly adjust the image size of the batch data +Multi-scale adjustment of image size +Padding operation +``` +`transform/transformer.py`: Used to filter useless data and return batch data. +`transform/parallel_map.py`: Used to achieve acceleration. +4. Reader +`reader.py`: Used to combine source and transformer operations, and return batch data according to `max_iter`. +`data_feed.py`: Configure default parameters for `reader.py`. + + + + + +### Usage + +#### Ordinary usage +The function of this module is completed by combining the configuration information in the yaml file. The use of yaml files can be found in the configuration file section. + + - Read data for training + +``` python +ccfg = load_cfg('./config.yml') +coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) +``` +#### How to use customized dataset? +- Option 1: Convert the dataset to the VOC format or COCO format. +```python + # In ./tools/, the code named labelme2coco.py is provided to convert + # the dataset which is annotatedby Labelme to a COCO dataset. + python ./tools/labelme2coco.py --json_input_dir ./labelme_annos/ + --image_input_dir ./labelme_imgs/ + --output_dir ./cocome/ + --train_proportion 0.8 + --val_proportion 0.2 + --test_proportion 0.0 + # --json_input_dir:The path of json files which are annotated by Labelme. + # --image_input_dir:The path of images. + # --output_dir:The path of coverted COCO dataset. + # --train_proportion:The train proportion of annatation data. + # --val_proportion:The validation proportion of annatation data. + # --test_proportion: The inference proportion of annatation data. +``` +- Option 2: + +1. Following the `./source/coco_loader.py` and `./source/voc_loader.py`, add `./source/XX_loader.py` and implement the `load` function. +2. Add the entry for `./source/XX_loader.py` in the `load` function of `./source/loader.py`. +3. Modify `./source/__init__.py`: + + +```python +if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: + source_type = 'RoiDbSource' +# Replace the above code with the following code: +if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: + source_type = 'RoiDbSource' +``` + +4. In the configure file, define the `type` of `dataset` as `XXSource`。 + +#### How to add data pre-processing? +- If you want to add the enhanced preprocessing of a single image, you can refer to the code of each class in `transform/operators.py`, and create a new class to implement new data enhancement. Also add the name of this preprocessing to the configuration file. +- If you want to add image preprocessing for a single batch, you can refer to the code for each function in `build_post_map` of `transform/post_map.py`, and create a new internal function to implement new batch data preprocessing. Also add the name of this preprocessing to the configuration file. diff --git a/PaddleCV/object_detection/docs/DATA_cn.md b/PaddleCV/object_detection/docs/DATA_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..7c724d69af54203870aad362b7ee8b5444433260 --- /dev/null +++ b/PaddleCV/object_detection/docs/DATA_cn.md @@ -0,0 +1,201 @@ +## 介绍 +本模块是一个Python模块,用于加载数据并将其转换成适用于检测模型的训练、验证、测试所需要的格式——由多个np.ndarray组成的tuple数组,例如用于Faster R-CNN模型的训练数据格式为:`[(im, im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`。 + +### 实现 +该模块内部可分为4个子功能:数据解析、图片预处理、数据转换和数据获取接口。 + +我们采用`dataset.Dataset`表示一份数据,比如`COCO`数据包含3份数据,分别用于训练、验证和测试。原始数据存储与文件中,通过`dataset.source`加载到内存,然后使用`dataset.transform`对数据进行处理转换,最终通过`dataset.Reader`的接口可以获得用于训练、验证和测试的batch数据。 + +子功能介绍: + +1. 数据解析 + 数据解析得到的是`dataset.Dataset`,实现逻辑位于`dataset.source`中。通过它可以实现解析不同格式的数据集,已支持的数据源包括: +- COCO数据源 + 该数据集目前分为COCO2012和COCO2017,主要由json文件和image文件组成,其组织结构如下所示: + + ``` + data/coco/ + ├── annotations + │ ├── instances_train2014.json + │ ├── instances_train2017.json + │ ├── instances_val2014.json + │ ├── instances_val2017.json + | ... + ├── train2017 + │ ├── 000000000009.jpg + │ ├── 000000580008.jpg + | ... + ├── val2017 + │ ├── 000000000139.jpg + │ ├── 000000000285.jpg + | ... + ``` + + +- Pascal VOC数据源 + 该数据集目前分为VOC2007和VOC2012,主要由xml文件和image文件组成,其组织结构如下所示: + + + ``` + data/pascalvoc/ + ├──Annotations + │ ├── i000050.jpg + │ ├── 003876.xml + | ... + ├── ImageSets + │ ├──Main + └── train.txt + └── val.txt + └── test.txt + └── dog_train.txt + └── dog_trainval.txt + └── dog_val.txt + └── dog_test.txt + └── ... + │ ├──Layout + └──... + │ ├── Segmentation + └──... + ├── JPEGImages + │ ├── 000050.jpg + │ ├── 003876.jpg + | ... + ``` + + + +- Roidb数据源 + 该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件,包含一个dict,而dict中只包含一个命名为‘records’的list(可能还有一个命名为‘cname2cid’的字典),其内容如下所示: +```python +(records, catname2clsid) +'records'是一个list并且它的结构如下: +{ + 'im_file': im_fname, # 图像文件名 + 'im_id': im_id, # 图像id + 'h': im_h, # 图像高度 + 'w': im_w, # 图像宽度 + 'is_crowd': is_crowd, # 是否重叠 + 'gt_class': gt_class, # 真实框类别 + 'gt_bbox': gt_bbox, # 真实框坐标 + 'gt_poly': gt_poly, # 多边形坐标 +} +'cname2id'是一个dict,保存了类别名到id的映射 + +``` +我们在`./tools/`中提供了一个生成roidb数据集的代码,可以通过下面命令实现该功能。 +```python +# --type: 原始数据集的类别(只能是xml或者json) +# --annotation: 一个包含所需标注文件名的文件的路径 +# --save-dir: 保存路径 +# --samples: sample的个数(默认是-1,代表使用所有sample) +python ./tools/generate_data_for_training.py + --type=json \ + --annotation=./annotations/instances_val2017.json \ + --save-dir=./roidb \ + --samples=-1 +``` + 2. 图片预处理 + 图片预处理通过包括图片解码、缩放、裁剪等操作,我们采用`dataset.transform.operator`算子的方式来统一实现,这样能方便扩展。此外,多个算子还可以组合形成复杂的处理流程, 并被`dataset.transformer`中的转换器使用,比如多线程完成一个复杂的预处理流程。 + + 3. 数据转换器 + 数据转换器的功能是完成对某个`dataset.Dataset`进行转换处理,从而得到一个新的`dataset.Dataset`。我们采用装饰器模式实现各种不同的`dataset.transform.transformer`。比如用于多进程预处理的`dataset.transform.paralle_map`转换器。 + + 4. 数据获取接口 + 为方便训练时的数据获取,我们将多个`dataset.Dataset`组合在一起构成一个`dataset.Reader`为用户提供数据,用户只需要调用`Reader.[train|eval|infer]`即可获得对应的数据流。`Reader`支持yaml文件配置数据地址、预处理过程、加速方式等。 + +主要的APIs如下: + + + + +1. 数据解析 + + - `source/coco_loader.py`:用于解析COCO数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/coco_loader.py) + - `source/voc_loader.py`:用于解析Pascal VOC数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/voc_loader.py) + [注意]在使用VOC数据集时,若不使用默认的label列表,则需要先使用`tools/generate_data_for_training.py`生成`label_list.txt`(使用方式与数据解析中的roidb数据集获取过程一致),或提供`label_list.txt`放置于`data/pascalvoc/ImageSets/Main`中;同时在配置文件中设置参数`use_default_label`为`true`。 + - `source/loader.py`:用于解析Roidb数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/loader.py) + +2. 算子 + `transform/operators.py`:包含多种数据增强方式,主要包括: + +``` python +RandomFlipImage:水平翻转。 +RandomDistort:随机扰动图片亮度、对比度、饱和度和色相。 +ResizeImage:根据特定的插值方式调整图像大小。 +RandomInterpImage:使用随机的插值方式调整图像大小。 +CropImage:根据缩放比例、长宽比例两个参数生成若干候选框,再依据这些候选框和标注框的面积交并比(IoU)挑选出符合要求的裁剪结果。 +ExpandImage:将原始图片放进一张使用像素均值填充(随后会在减均值操作中减掉)的扩张图中,再对此图进行裁剪、缩放和翻转。 +DecodeImage:以RGB格式读取图像。 +Permute:对图像的通道进行排列并转为BGR格式。 +NormalizeImage:对图像像素值进行归一化。 +NormalizeBox:对bounding box进行归一化。 +MixupImage:按比例叠加两张图像。 +``` +[注意]:Mixup的操作可参考[论文](https://arxiv.org/pdf/1710.09412.pdf)。 + +`transform/arrange_sample.py`:实现对输入网络数据的排序。 +3. 转换 +`transform/post_map.py`:用于完成批数据的预处理操作,其主要包括: + +``` python +随机调整批数据的图像大小 +多尺度调整图像大小 +padding操作 +``` +`transform/transformer.py`:用于过滤无用的数据,并返回批数据。 +`transform/parallel_map.py`:用于实现加速。 +4. 读取 +`reader.py`:用于组合source和transformer操作,根据`max_iter`返回batch数据。 +`data_feed.py`: 用于配置 `reader.py`中所需的默认参数. + + + + +### 使用 +#### 常规使用 +结合yaml文件中的配置信息,完成本模块的功能。yaml文件的使用可以参见配置文件部分。 + + - 读取用于训练的数据 + +``` python +ccfg = load_cfg('./config.yml') +coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) +``` +#### 如何使用自定义数据集? + +- 选择1:将数据集转换为VOC格式或者COCO格式。 +```python + # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集 + python ./tools/labelme2coco.py --json_input_dir ./labelme_annos/ + --image_input_dir ./labelme_imgs/ + --output_dir ./cocome/ + --train_proportion 0.8 + --val_proportion 0.2 + --test_proportion 0.0 + # --json_input_dir:使用labelme标注的json文件所在文件夹 + # --image_input_dir:图像文件所在文件夹 + # --output_dir:转换后的COCO格式数据集存放位置 + # --train_proportion:标注数据中用于train的比例 + # --val_proportion:标注数据中用于validation的比例 + # --test_proportion: 标注数据中用于infer的比例 +``` +- 选择2: + +1. 仿照`./source/coco_loader.py`和`./source/voc_loader.py`,添加`./source/XX_loader.py`并实现`load`函数。 +2. 在`./source/loader.py`的`load`函数中添加使用`./source/XX_loader.py`的入口。 +3. 修改`./source/__init__.py`: + + +```python +if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: + source_type = 'RoiDbSource' +# 将上述代码替换为如下代码: +if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: + source_type = 'RoiDbSource' +``` + +4. 在配置文件中修改`dataset`下的`type`为`XXSource`。 + +#### 如何增加数据预处理? +- 若增加单张图像的增强预处理,可在`transform/operators.py`中参考每个类的代码,新建一个类来实现新的数据增强;同时在配置文件中增加该预处理。 +- 若增加单个batch的图像预处理,可在`transform/post_map.py`中参考`build_post_map`中每个函数的代码,新建一个内部函数来实现新的批数据预处理;同时在配置文件中增加该预处理。 diff --git a/PaddleCV/object_detection/docs/INSTALL.md b/PaddleCV/object_detection/docs/INSTALL.md index f31943988358c773e4b8389d3b90b608140e571a..b6d9215f499b48e8c986d228c718ef67487d99b2 100644 --- a/PaddleCV/object_detection/docs/INSTALL.md +++ b/PaddleCV/object_detection/docs/INSTALL.md @@ -35,7 +35,7 @@ python -c "import paddle; print(paddle.__version__)" - Python2 or Python3 - CUDA >= 8.0 -- cuDNN >= 7.0 +- cuDNN >= 5.0 - nccl >= 2.1.2 @@ -68,9 +68,9 @@ git clone https://github.com/PaddlePaddle/models cd models/PaddleCV/object_detection ``` -**Install python module requirements:** +**Install Python module requirements:** -Other python module requirements is set in [requirements.txt](./requirements.txt), you can install these requirements with folloing command: +Other python module requirements is set in [requirements.txt](../requirements.txt), you can install these requirements with folloing command: ``` pip install -r requirements.txt @@ -79,7 +79,7 @@ pip install -r requirements.txt **Check PaddleDetection architectures tests pass:** ``` -export PYTHONPATH=$PYTHONPATH:. +export PYTHONPATH=`pwd`:$PYTHONPATH python ppdet/modeling/tests/test_architectures.py ``` @@ -90,7 +90,7 @@ PaddleDetection support train/eval/infer models with dataset [MSCOCO](http://coc **Create symlinks for datasets:** -Dataset default path in PaddleDetection config files is `data/coco` and `data/voc`, you can set symlinks for your COCO/COCO-like or VOC/VOC-like datasets with following commands: +Dataset default path in PaddleDetection config files is `dataset/coco` and `dataset/voc`, you can set symlinks for your COCO/COCO-like or VOC/VOC-like datasets with following commands: ``` ln -sf $PaddleDetection/data/coco @@ -99,28 +99,18 @@ ln -sf $PaddleDetection/data/voc If you do not have datasets locally, you can download dataset as follows: -- MSCOCO-2017 +- MS-COCO ``` -# download -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip - -# decompress -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2017.zip +cd dataset/coco +./download.sh ``` -- VOC2012 +- PASCAL VOC ``` -# download -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar - -# decompress -tar -xf VOCtrainval_11-May-2012.tar +cd dataset/voc +./download.sh ``` **Auto download datasets:** @@ -128,5 +118,4 @@ tar -xf VOCtrainval_11-May-2012.tar If you set up models while `data/coc` and `data/voc` is not found, PaddleDetection will automaticaly download them from [MSCOCO-2017](http://images.cocodataset.org) and [VOC2012](http://host.robots.ox.ac.uk/pascal/VOC), the decompressed datasets will be places in `~/.cache/paddle/dataset/` and can be discovered automaticaly in the next setting up time. -**NOTE:** For further informations on the datasets, please see [DATASET.md](../ppdet/data/README.md) - +**NOTE:** For further informations on the datasets, please see [DATASET.md](DATA.md) diff --git a/PaddleCV/object_detection/ppdet/data/README.md b/PaddleCV/object_detection/ppdet/data/README.md deleted file mode 100644 index 34e647b92855fa16fc92b9dec666da10ce981cc3..0000000000000000000000000000000000000000 --- a/PaddleCV/object_detection/ppdet/data/README.md +++ /dev/null @@ -1,202 +0,0 @@ -## Introduction -This is a Python module used to load and convert data into formats for detection model training, evaluation and inference. The converted sample schema is a tuple of np.ndarrays. For example, the schema of Faster R-CNN training data is: `[(im, im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`. - -### Implementation -This module is consists of four sub-systems: data parsing, image pre-processing, data conversion and data feeding apis. - -We use `dataset.Dataset` to abstract a set of data samples. For example, `COCO` data contains 3 sets of data for training, validation, and testing respectively. Original data stored in files could be loaded into memory using `dataset.source`; Then make use of `dataset.transform` to process the data; Finally, the batch data could be fetched by the api of `dataset.Reader`. - -Sub-systems introduction: -1. Data prasing -By data parsing, we can get a `dataset.Dataset` instance, whose implementation is located in `dataset.source`. This sub-system is used to parse different data formats, which is easy to add new data format supports. Currently, only following data sources are included: - -- COCO data source -This kind of source is used to load `COCO` data directly, eg: `COCO2017`. It's composed of json files for labeling info and image files. And it's directory structure is as follows: - - ``` - data/coco/ - ├── annotations - │ ├── instances_train2017.json - │ ├── instances_val2017.json - | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - | ... - ``` - -- Pascal VOC data source -This kind of source is used to load `VOC` data directly, eg: `VOC2007`. It's composed of xml files for labeling info and image files. And it's directory structure is as follows: - - - ``` - data/pascalvoc/ - ├──Annotations - │ ├── i000050.jpg - │ ├── 003876.xml - | ... - ├── ImageSets - │ ├──Main - └── train.txt - └── val.txt - └── test.txt - └── dog_train.txt - └── dog_trainval.txt - └── dog_val.txt - └── dog_test.txt - └── ... - │ ├──Layout - └──... - │ ├── Segmentation - └──... - ├── JPEGImages - │ ├── 000050.jpg - │ ├── 003876.jpg - | ... - ``` - - - -- Roidb data source -This kind of source is a normalized data format which only contains a pickle file. The pickle file only has a dictionary which only has a list named 'records' (maybe there is a mapping file for label name to label id named 'canme2id'). You can convert `COCO` or `VOC` data into this format. The pickle file's content is as follows: -```python -(records, catname2clsid) -'records' is list of dict whose structure is: -{ - 'im_file': im_fname, # image file name - 'im_id': im_id, # image id - 'h': im_h, # height of image - 'w': im_w, # width - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - 'gt_poly': gt_poly, -} -'cname2id' is a dict to map category name to class id - -``` -We also provide the tool to generate the roidb data source in `./tools/`. You can use the follow command to implement. -```python -# --type: the type of original data (xml or json) -# --annotation: the path of file, which contains the name of annotation files -# --save-dir: the save path -# --samples: the number of samples (default is -1, which mean all datas in dataset) -python ./tools/generate_data_for_training.py - --type=json \ - --annotation=./annotations/instances_val2017.json \ - --save-dir=./roidb \ - --samples=-1 -``` - - 2. Image preprocessing - Image preprocessing subsystem includes operations such as image decoding, expanding, cropping, etc. We use `dataset.transform.operator` to unify the implementation, which is convenient for extension. In addition, multiple operators can be combined to form a complex processing pipeline, and used by data transformers in `dataset.transformer`, such as multi-threading to acclerate a complex image data processing. - - 3. Data transformer - The function of the data transformer is used to convert a `dataset.Dataset` to a new `dataset.Dataset`, for example: convert a jpeg image dataset into a decoded and resized dataset. We use the decorator pattern to implement different transformers which are all subclass of `dataset.Dataset`. For example, the `dataset.transform.paralle_map` transformer is for multi-process preprocessing, more transformers can be found in `dataset.transform.transformer`. - - 4. Data feeding apis -To facilitate data pipeline building and data feeding for training, we combine multiple `dataset.Dataset` to form a `dataset.Reader` which can provide data for training, validation and testing respectively. The user only needs to call `Reader.[train|eval|infer]` to get the corresponding data stream. `Reader` supports yaml file to configure data address, preprocessing oprators, acceleration mode, and so on. - - - -The main APIs are as follows: - - - -1. Data parsing - - - `source/coco_loader.py`: Use to parse the COCO dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/coco_loader.py) - - `source/voc_loader.py`: Use to parse the Pascal VOC dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/voc_loader.py) - [Note] When using VOC datasets, if you do not use the default label list, you need to generate `label_list.txt` using `tools/generate_data_for_training.py` (the usage method is same as generating the roidb data source) or provide `label_list.txt` in `data/pascalvoc/ImageSets/Main` firstly. Also set the parameter `use_default_label` to `false` in the configuration file. - - `source/loader.py`: Use to parse the Roidb dataset. [detail code](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/loader.py) - -2. Operator - `transform/operators.py`: Contains a variety of data enhancement methods, including: - -``` python -RandomFlipImage: Horizontal flip. -RandomDistort: Distort brightness, contrast, saturation, and hue. -ResizeImage: Adjust the image size according to the specific interpolation method. -RandomInterpImage: Use a random interpolation method to resize the image. -CropImage: Crop image with respect to different scale, aspect ratio, and overlap. -ExpandImage: Put the original image into a larger expanded image which is initialized using image mean. -DecodeImage: Read images in RGB format. -Permute: Arrange the channels of the image and converted to the BGR format. -NormalizeImage: Normalize image pixel values. -NormalizeBox: Normalize the bounding box. -MixupImage: Mixup two images in proportion. -``` -[Note] The mixup operation can refer to[paper](https://arxiv.org/pdf/1710.09412.pdf)。 - -`transform/arrange_sample.py`: Sort the data which need to input the network. -3. Transformer -`transform/post_map.py`: A pre-processing operation for completing batch data, which mainly includes: - -``` python -Randomly adjust the image size of the batch data -Multi-scale adjustment of image size -Padding operation -``` -`transform/transformer.py`: Used to filter useless data and return batch data. -`transform/parallel_map.py`: Used to achieve acceleration. -4. Reader -`reader.py`: Used to combine source and transformer operations, and return batch data according to `max_iter`. -`data_feed.py`: Configure default parameters for `reader.py`. - - - - - -### Usage - -#### Ordinary usage -The function of this module is completed by combining the configuration information in the yaml file. The use of yaml files can be found in the configuration file section. - - - Read data for training - -``` python -ccfg = load_cfg('./config.yml') -coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) -``` -#### How to use customized dataset? -- Option 1: Convert the dataset to the VOC format or COCO format. -```python - # In ./tools/, the code named labelme2coco.py is provided to convert - # the dataset which is annotatedby Labelme to a COCO dataset. - python ./tools/labelme2coco.py --json_input_dir ./labelme_annos/ - --image_input_dir ./labelme_imgs/ - --output_dir ./cocome/ - --train_proportion 0.8 - --val_proportion 0.2 - --test_proportion 0.0 - # --json_input_dir:The path of json files which are annotated by Labelme. - # --image_input_dir:The path of images. - # --output_dir:The path of coverted COCO dataset. - # --train_proportion:The train proportion of annatation data. - # --val_proportion:The validation proportion of annatation data. - # --test_proportion: The inference proportion of annatation data. -``` -- Option 2: - -1. Following the `./source/coco_loader.py` and `./source/voc_loader.py`, add `./source/XX_loader.py` and implement the `load` function. -2. Add the entry for `./source/XX_loader.py` in the `load` function of `./source/loader.py`. -3. Modify `./source/__init__.py`: - - -```python -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: - source_type = 'RoiDbSource' -# Replace the above code with the following code: -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: - source_type = 'RoiDbSource' -``` - -4. In the configure file, define the `type` of `dataset` as `XXSource`。 - -#### How to add data pre-processing? -- If you want to add the enhanced preprocessing of a single image, you can refer to the code of each class in `transform/operators.py`, and create a new class to implement new data enhancement. Also add the name of this preprocessing to the configuration file. -- If you want to add image preprocessing for a single batch, you can refer to the code for each function in `build_post_map` of `transform/post_map.py`, and create a new internal function to implement new batch data preprocessing. Also add the name of this preprocessing to the configuration file. diff --git a/PaddleCV/object_detection/ppdet/data/README.md b/PaddleCV/object_detection/ppdet/data/README.md new file mode 120000 index 0000000000000000000000000000000000000000..238fc99bf487f0505c27541ecaa9a64b0bcd62f7 --- /dev/null +++ b/PaddleCV/object_detection/ppdet/data/README.md @@ -0,0 +1 @@ +docs/DATA.md \ No newline at end of file diff --git a/PaddleCV/object_detection/ppdet/data/README_cn.md b/PaddleCV/object_detection/ppdet/data/README_cn.md deleted file mode 100644 index 0dfce342bbb676a2c7accf8ff0d6a19b9e5d5f2f..0000000000000000000000000000000000000000 --- a/PaddleCV/object_detection/ppdet/data/README_cn.md +++ /dev/null @@ -1,201 +0,0 @@ -## 介绍 -本模块是一个Python模块,用于加载数据并将其转换成适用于检测模型的训练、验证、测试所需要的格式——由多个np.ndarray组成的tuple数组,例如用于Faster R-CNN模型的训练数据格式为:`[(im, im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`。 - -### 实现 -该模块内部可分为4个子功能:数据解析、图片预处理、数据转换和数据获取接口。 - -我们采用`dataset.Dataset`表示一份数据,比如`COCO`数据包含3份数据,分别用于训练、验证和测试。原始数据存储与文件中,通过`dataset.source`加载到内存,然后使用`dataset.transform`对数据进行处理转换,最终通过`dataset.Reader`的接口可以获得用于训练、验证和测试的batch数据。 - -子功能介绍: - -1. 数据解析 - 数据解析得到的是`dataset.Dataset`,实现逻辑位于`dataset.source`中。通过它可以实现解析不同格式的数据集,已支持的数据源包括: -- COCO数据源 - 该数据集目前分为COCO2012和COCO2017,主要由json文件和image文件组成,其组织结构如下所示: - - ``` - data/coco/ - ├── annotations - │ ├── instances_train2014.json - │ ├── instances_train2017.json - │ ├── instances_val2014.json - │ ├── instances_val2017.json - | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - | ... - ``` - - -- Pascal VOC数据源 - 该数据集目前分为VOC2007和VOC2012,主要由xml文件和image文件组成,其组织结构如下所示: - - - ``` - data/pascalvoc/ - ├──Annotations - │ ├── i000050.jpg - │ ├── 003876.xml - | ... - ├── ImageSets - │ ├──Main - └── train.txt - └── val.txt - └── test.txt - └── dog_train.txt - └── dog_trainval.txt - └── dog_val.txt - └── dog_test.txt - └── ... - │ ├──Layout - └──... - │ ├── Segmentation - └──... - ├── JPEGImages - │ ├── 000050.jpg - │ ├── 003876.jpg - | ... - ``` - - - -- Roidb数据源 - 该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件,包含一个dict,而dict中只包含一个命名为‘records’的list(可能还有一个命名为‘cname2cid’的字典),其内容如下所示: -```python -(records, catname2clsid) -'records'是一个list并且它的结构如下: -{ - 'im_file': im_fname, # 图像文件名 - 'im_id': im_id, # 图像id - 'h': im_h, # 图像高度 - 'w': im_w, # 图像宽度 - 'is_crowd': is_crowd, # 是否重叠 - 'gt_class': gt_class, # 真实框类别 - 'gt_bbox': gt_bbox, # 真实框坐标 - 'gt_poly': gt_poly, # 多边形坐标 -} -'cname2id'是一个dict,保存了类别名到id的映射 - -``` -我们在`./tools/`中提供了一个生成roidb数据集的代码,可以通过下面命令实现该功能。 -```python -# --type: 原始数据集的类别(只能是xml或者json) -# --annotation: 一个包含所需标注文件名的文件的路径 -# --save-dir: 保存路径 -# --samples: sample的个数(默认是-1,代表使用所有sample) -python ./tools/generate_data_for_training.py - --type=json \ - --annotation=./annotations/instances_val2017.json \ - --save-dir=./roidb \ - --samples=-1 -``` - 2. 图片预处理 - 图片预处理通过包括图片解码、缩放、裁剪等操作,我们采用`dataset.transform.operator`算子的方式来统一实现,这样能方便扩展。此外,多个算子还可以组合形成复杂的处理流程, 并被`dataset.transformer`中的转换器使用,比如多线程完成一个复杂的预处理流程。 - - 3. 数据转换器 - 数据转换器的功能是完成对某个`dataset.Dataset`进行转换处理,从而得到一个新的`dataset.Dataset`。我们采用装饰器模式实现各种不同的`dataset.transform.transformer`。比如用于多进程预处理的`dataset.transform.paralle_map`转换器。 - - 4. 数据获取接口 - 为方便训练时的数据获取,我们将多个`dataset.Dataset`组合在一起构成一个`dataset.Reader`为用户提供数据,用户只需要调用`Reader.[train|eval|infer]`即可获得对应的数据流。`Reader`支持yaml文件配置数据地址、预处理过程、加速方式等。 - -主要的APIs如下: - - - - -1. 数据解析 - - - `source/coco_loader.py`:用于解析COCO数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/coco_loader.py) - - `source/voc_loader.py`:用于解析Pascal VOC数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/voc_loader.py) - [注意]在使用VOC数据集时,若不使用默认的label列表,则需要先使用`tools/generate_data_for_training.py`生成`label_list.txt`(使用方式与数据解析中的roidb数据集获取过程一致),或提供`label_list.txt`放置于`data/pascalvoc/ImageSets/Main`中;同时在配置文件中设置参数`use_default_label`为`true`。 - - `source/loader.py`:用于解析Roidb数据集。[详见代码](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/ppdet/data/source/loader.py) - -2. 算子 - `transform/operators.py`:包含多种数据增强方式,主要包括: - -``` python -RandomFlipImage:水平翻转。 -RandomDistort:随机扰动图片亮度、对比度、饱和度和色相。 -ResizeImage:根据特定的插值方式调整图像大小。 -RandomInterpImage:使用随机的插值方式调整图像大小。 -CropImage:根据缩放比例、长宽比例两个参数生成若干候选框,再依据这些候选框和标注框的面积交并比(IoU)挑选出符合要求的裁剪结果。 -ExpandImage:将原始图片放进一张使用像素均值填充(随后会在减均值操作中减掉)的扩张图中,再对此图进行裁剪、缩放和翻转。 -DecodeImage:以RGB格式读取图像。 -Permute:对图像的通道进行排列并转为BGR格式。 -NormalizeImage:对图像像素值进行归一化。 -NormalizeBox:对bounding box进行归一化。 -MixupImage:按比例叠加两张图像。 -``` -[注意]:Mixup的操作可参考[论文](https://arxiv.org/pdf/1710.09412.pdf)。 - -`transform/arrange_sample.py`:实现对输入网络数据的排序。 -3. 转换 -`transform/post_map.py`:用于完成批数据的预处理操作,其主要包括: - -``` python -随机调整批数据的图像大小 -多尺度调整图像大小 -padding操作 -``` -`transform/transformer.py`:用于过滤无用的数据,并返回批数据。 -`transform/parallel_map.py`:用于实现加速。 -4. 读取 -`reader.py`:用于组合source和transformer操作,根据`max_iter`返回batch数据。 -`data_feed.py`: 用于配置 `reader.py`中所需的默认参数. - - - - -### 使用 -#### 常规使用 -结合yaml文件中的配置信息,完成本模块的功能。yaml文件的使用可以参见配置文件部分。 - - - 读取用于训练的数据 - -``` python -ccfg = load_cfg('./config.yml') -coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) -``` -#### 如何使用自定义数据集? - -- 选择1:将数据集转换为VOC格式或者COCO格式。 -```python - # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集 - python ./tools/labelme2coco.py --json_input_dir ./labelme_annos/ - --image_input_dir ./labelme_imgs/ - --output_dir ./cocome/ - --train_proportion 0.8 - --val_proportion 0.2 - --test_proportion 0.0 - # --json_input_dir:使用labelme标注的json文件所在文件夹 - # --image_input_dir:图像文件所在文件夹 - # --output_dir:转换后的COCO格式数据集存放位置 - # --train_proportion:标注数据中用于train的比例 - # --val_proportion:标注数据中用于validation的比例 - # --test_proportion: 标注数据中用于infer的比例 -``` -- 选择2: - -1. 仿照`./source/coco_loader.py`和`./source/voc_loader.py`,添加`./source/XX_loader.py`并实现`load`函数。 -2. 在`./source/loader.py`的`load`函数中添加使用`./source/XX_loader.py`的入口。 -3. 修改`./source/__init__.py`: - - -```python -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: - source_type = 'RoiDbSource' -# 将上述代码替换为如下代码: -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: - source_type = 'RoiDbSource' -``` - -4. 在配置文件中修改`dataset`下的`type`为`XXSource`。 - -#### 如何增加数据预处理? -- 若增加单张图像的增强预处理,可在`transform/operators.py`中参考每个类的代码,新建一个类来实现新的数据增强;同时在配置文件中增加该预处理。 -- 若增加单个batch的图像预处理,可在`transform/post_map.py`中参考`build_post_map`中每个函数的代码,新建一个内部函数来实现新的批数据预处理;同时在配置文件中增加该预处理。 diff --git a/PaddleCV/object_detection/ppdet/data/README_cn.md b/PaddleCV/object_detection/ppdet/data/README_cn.md new file mode 120000 index 0000000000000000000000000000000000000000..c8e59f3054954c6abe6732b01998a87d6d3074c4 --- /dev/null +++ b/PaddleCV/object_detection/ppdet/data/README_cn.md @@ -0,0 +1 @@ +docs/DATA_cn.md \ No newline at end of file diff --git a/PaddleCV/object_detection/ppdet/utils/voc_utils.py b/PaddleCV/object_detection/ppdet/utils/voc_utils.py index 213e16934d9fcac285dfe4b07a23f43ff2d1264e..2d7fc4a3618e047275d3c8f5366fdf03545be6de 100644 --- a/PaddleCV/object_detection/ppdet/utils/voc_utils.py +++ b/PaddleCV/object_detection/ppdet/utils/voc_utils.py @@ -48,7 +48,7 @@ def merge_and_create_list(devkit_dir, years, output_dir): with open(osp.join(main_dir, 'train.txt'), 'w') as ftrainval: for item in trainval_list: ftrainval.write(item + '\n') - + with open(osp.join(main_dir, 'val.txt'), 'w') as fval: with open(osp.join(main_dir, 'test.txt'), 'w') as ftest: ct = 0 @@ -86,13 +86,14 @@ def _walk_voc_dir(devkit_dir, year, output_dir): if name_prefix in added: continue added.add(name_prefix) - ann_path = osp.join(annotation_dir, name_prefix + '.xml') + ann_path = osp.join(annotation_dir, name_prefix + '.xml') img_path = osp.join(img_dir, name_prefix + '.jpg') - new_ann_path = osp.join(output_dir, 'Annotations/', name_prefix + '.xml') - new_img_path = osp.join(output_dir, 'JPEGImages/', name_prefix + '.jpg') + new_ann_path = osp.join(output_dir, 'Annotations/', + name_prefix + '.xml') + new_img_path = osp.join(output_dir, 'JPEGImages/', + name_prefix + '.jpg') shutil.copy(ann_path, new_ann_path) shutil.copy(img_path, new_img_path) img_ann_list.append(name_prefix) return trainval_list, test_list - diff --git a/PaddleCV/object_detection/tools/eval.py b/PaddleCV/object_detection/tools/eval.py index 2f1fa525c7e46ee90968a285ac373b8b25f98e14..a8feac1fd616ec49381999de8e9b493d0982f10a 100644 --- a/PaddleCV/object_detection/tools/eval.py +++ b/PaddleCV/object_detection/tools/eval.py @@ -49,8 +49,8 @@ def main(): if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: - devices_num = int(os.environ.get('CPU_NUM', - multiprocessing.cpu_count())) + devices_num = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) if 'eval_feed' not in cfg: eval_feed = create(main_arch + 'EvalFeed') @@ -61,8 +61,7 @@ def main(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) - # 2. build program - # get detector and losses + # build program model = create(main_arch) startup_prog = fluid.Program() eval_prog = fluid.Program() @@ -75,7 +74,7 @@ def main(): reader = create_reader(eval_feed) pyreader.decorate_sample_list_generator(reader, place) - # 3. Compile program for multi-devices + # compile program for multi-devices if devices_num <= 1: compile_program = fluid.compiler.CompiledProgram(eval_prog) else: @@ -85,7 +84,7 @@ def main(): compile_program = fluid.compiler.CompiledProgram( eval_prog).with_data_parallel(build_strategy=build_strategy) - # 5. Load model + # load model exe.run(startup_prog) if 'weights' in cfg: checkpoint.load_pretrain(exe, eval_prog, cfg.weights) @@ -96,9 +95,8 @@ def main(): keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys) - # 6. Run results = eval_run(exe, compile_program, pyreader, keys, values, cls) - # Evaluation + # evaluation resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution @@ -112,7 +110,6 @@ if __name__ == '__main__': "--output_file", default=None, type=str, - help="Evaluation file name, default to bbox.json and mask.json." - ) + help="Evaluation file name, default to bbox.json and mask.json.") FLAGS = parser.parse_args() main() diff --git a/PaddleCV/object_detection/tools/infer.py b/PaddleCV/object_detection/tools/infer.py index c4ee32d3e62b1a5c37061d1795126b1913b4870a..4ee1b5dd94538e8a1e1dfa60b5fe411b68107386 100644 --- a/PaddleCV/object_detection/tools/infer.py +++ b/PaddleCV/object_detection/tools/infer.py @@ -127,7 +127,7 @@ def main(): extra_keys = ['im_id'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) - # 6. Parse dataset category + # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == "VOC": @@ -155,8 +155,7 @@ def main(): mask_results = None is_bbox_normalized = True if cfg.metric == 'VOC' else False if 'bbox' in res: - bbox_results = bbox2out([res], clsid2catid, - is_bbox_normalized) + bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) @@ -166,8 +165,9 @@ def main(): for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') - image = visualize_results(image, int(im_id), catid2name, 0.5, - bbox_results, mask_results, is_bbox_normalized) + image = visualize_results(image, + int(im_id), catid2name, 0.5, bbox_results, + mask_results, is_bbox_normalized) save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name) diff --git a/PaddleCV/object_detection/tools/train.py b/PaddleCV/object_detection/tools/train.py index 55639e7911d98f56c0c62e3030efa4da2b8935fe..2428469863c14736c5dc000c47b1eef9553f7240 100644 --- a/PaddleCV/object_detection/tools/train.py +++ b/PaddleCV/object_detection/tools/train.py @@ -19,9 +19,22 @@ from __future__ import print_function import os import time import multiprocessing - import numpy as np + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + from paddle import fluid from ppdet.core.workspace import load_config, merge_config, create @@ -52,8 +65,8 @@ def main(): if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: - devices_num = int(os.environ.get('CPU_NUM', - multiprocessing.cpu_count())) + devices_num = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) if 'train_feed' not in cfg: train_feed = create(main_arch + 'TrainFeed') @@ -73,6 +86,7 @@ def main(): lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') + # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): @@ -107,10 +121,10 @@ def main(): eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) - # 3. Compile program for multi-devices + # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False - build_strategy.enable_inplace = False + build_strategy.enable_inplace = True sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn train_compile_program = fluid.compiler.CompiledProgram( @@ -151,15 +165,14 @@ def main(): checkpoint.save(exe, train_prog, os.path.join(save_dir, str(it))) if FLAGS.eval: - # Run evaluation + # evaluation results = eval_run(exe, eval_compile_program, eval_pyreader, eval_keys, eval_values, eval_cls) - # Evaluation resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution - eval_results(results, eval_feed, cfg.metric, - resolution, FLAGS.output_file) + eval_results(results, eval_feed, cfg.metric, resolution, + FLAGS.output_file) checkpoint.save(exe, train_prog, os.path.join(save_dir, "model_final")) train_pyreader.reset() @@ -183,7 +196,6 @@ if __name__ == '__main__': "--output_file", default=None, type=str, - help="Evaluation file name, default to bbox.json and mask.json." - ) + help="Evaluation file name, default to bbox.json and mask.json.") FLAGS = parser.parse_args() main()